diff options
author | Alexei Starovoitov | 2024-08-22 09:27:59 -0700 |
---|---|---|
committer | Alexei Starovoitov | 2024-08-22 09:48:44 -0700 |
commit | 50c374c6d1a43db9444cb74cc09552c817db2a9b (patch) | |
tree | b80c7befe92ef07799cf2dca3135afe56a6a646f | |
parent | d352eca2662734cdd5ef90df1f8bc28b9505e36f (diff) | |
parent | 872cf28b8df9c5c3a1e71a88ee750df7c2513971 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Cross-merge bpf fixes after downstream PR including
important fixes (from bpf-next point of view):
commit 41c24102af7b ("selftests/bpf: Filter out _GNU_SOURCE when compiling test_cpp")
commit fdad456cbcca ("bpf: Fix updating attached freplace prog in prog_array map")
No conflicts.
Adjacent changes in:
include/linux/bpf_verifier.h
kernel/bpf/verifier.c
tools/testing/selftests/bpf/Makefile
Link: https://lore.kernel.org/bpf/20240813234307.82773-1-alexei.starovoitov@gmail.com/
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1334 files changed, 19542 insertions, 9273 deletions
@@ -166,6 +166,7 @@ Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com> David Brownell <david-b@pacbell.net> David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> +David Heidelberg <david@ixit.cz> <d.okias@gmail.com> David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com> David Rheinsberg <david@readahead.eu> <dh.herrmann@googlemail.com> David Rheinsberg <david@readahead.eu> <david.rheinsberg@gmail.com> @@ -260,6 +261,7 @@ Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@motorola.com> Jakub Kicinski <kuba@kernel.org> <jakub.kicinski@netronome.com> James Bottomley <jejb@mulgrave.(none)> James Bottomley <jejb@titanic.il.steeleye.com> +James Clark <james.clark@linaro.org> <james.clark@arm.com> James E Wilson <wilson@specifix.com> James Hogan <jhogan@kernel.org> <james@albanarts.com> James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index c61f9b813973..12488c14be64 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -14,9 +14,10 @@ Description: event to its internal Informational Event log, updates the Event Status register, and if configured, interrupts the host. It is not an error to inject poison into an address that - already has poison present and no error is returned. The - inject_poison attribute is only visible for devices supporting - the capability. + already has poison present and no error is returned. If the + device returns 'Inject Poison Limit Reached' an -EBUSY error + is returned to the user. The inject_poison attribute is only + visible for devices supporting the capability. What: /sys/kernel/debug/memX/clear_poison diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu index 307a55f599cb..35a8f6dae5bf 100644 --- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu @@ -32,9 +32,9 @@ Description: (RW) The front button on the Turris Omnia router can be interrupt. This file switches between these two modes: - - "mcu" makes the button press event be handled by the MCU to - change the LEDs panel intensity. - - "cpu" makes the button press event be handled by the CPU. + - ``mcu`` makes the button press event be handled by the MCU to + change the LEDs panel intensity. + - ``cpu`` makes the button press event be handled by the CPU. Format: %s. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71..de725ca3be82 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index fd4b56c0996f..c09674a75a9e 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -742,7 +742,7 @@ SecurityFlags Flags which control security negotiation and may use NTLMSSP 0x00080 must use NTLMSSP 0x80080 seal (packet encryption) 0x00040 - must seal (not implemented yet) 0x40040 + must seal 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index e625830d335e..552c9155165d 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -162,13 +162,14 @@ iv_large_sectors Module parameters:: -max_read_size -max_write_size - Maximum size of read or write requests. When a request larger than this size - is received, dm-crypt will split the request. The splitting improves - concurrency (the split requests could be encrypted in parallel by multiple - cores), but it also causes overhead. The user should tune these parameters to - fit the actual workload. + + max_read_size + max_write_size + Maximum size of read or write requests. When a request larger than this size + is received, dm-crypt will split the request. The splitting improves + concurrency (the split requests could be encrypted in parallel by multiple + cores), but it also causes overhead. The user should tune these parameters to + fit the actual workload. Example scripts diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c1134ad5f06d..09126bb8cc9f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3830,9 +3830,6 @@ noalign [KNL,ARM] - noaltinstr [S390,EARLY] Disables alternative instructions - patching (CPU alternatives feature). - noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any IOAPICs that may be present in the system. @@ -4801,11 +4798,9 @@ profile= [KNL] Enable kernel profiling via /proc/profile Format: [<profiletype>,]<number> - Param: <profiletype>: "schedule", "sleep", or "kvm" + Param: <profiletype>: "schedule" or "kvm" [defaults to kernel profiling] Param: "schedule" - profile schedule points. - Param: "sleep" - profile D-state sleeping (millisecs). - Requires CONFIG_SCHEDSTATS Param: "kvm" - profile VM exits. Param: <number> - step/bucket size as a power of 2 for statistical time based profiling. diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bb83c5d8c675..50327c05be8d 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -122,10 +122,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1490853 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | @@ -138,8 +146,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | @@ -160,6 +174,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -170,6 +186,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 02eb4d98b7de..85b709257918 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -239,28 +239,38 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was + mistakenly classified as a bitmask rather than a value. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing + the performance of misaligned scalar native word accesses on the selected set + of processors. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of + misaligned scalar accesses is unknown. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar + accesses are emulated via software, either in or below the kernel. These + accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native + word sized accesses are slower than the equivalent quantity of byte + accesses. Misaligned accesses may be supported directly in hardware, or + trapped and emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native + word sized accesses are faster than the equivalent quantity of byte + accesses. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar + accesses are not supported at all and will generate a misaligned address + fault. * :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which represents the size of the Zicboz block in bytes. * :c:macro:`RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS`: An unsigned long which represent the highest userspace virtual address usable. + +* :c:macro:`RISCV_HWPROBE_KEY_TIME_CSR_FREQ`: Frequency (in Hz) of `time CSR`. diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst index e476b4386bd9..077b968dcc81 100644 --- a/Documentation/arch/riscv/vm-layout.rst +++ b/Documentation/arch/riscv/vm-layout.rst @@ -47,11 +47,12 @@ RISC-V Linux Kernel SV39 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap - ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io - ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap - ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space - ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory + ffffffc4fea00000 | -236 GB | ffffffc4feffffff | 6 MB | fixmap + ffffffc4ff000000 | -236 GB | ffffffc4ffffffff | 16 MB | PCI io + ffffffc500000000 | -236 GB | ffffffc5ffffffff | 4 GB | vmemmap + ffffffc600000000 | -232 GB | ffffffd5ffffffff | 64 GB | vmalloc/ioremap space + ffffffd600000000 | -168 GB | fffffff5ffffffff | 128 GB | direct mapping of all physical memory + | | | | fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan __________________|____________|__________________|_________|____________________________________________________________ | diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml index c960c8e0a9a5..08b89b62c505 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml @@ -30,7 +30,7 @@ description: | maintainers: - Mike Leach <mike.leach@linaro.org> - Suzuki K Poulose <suzuki.poulose@arm.com> - - James Clark <james.clark@arm.com> + - James Clark <james.clark@linaro.org> - Mao Jinlong <quic_jinlmao@quicinc.com> - Hao Zhang <quic_hazha@quicinc.com> diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml index 6745b4cc8f1c..d50a60368e27 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml @@ -29,7 +29,7 @@ description: | maintainers: - Mike Leach <mike.leach@linaro.org> - Suzuki K Poulose <suzuki.poulose@arm.com> - - James Clark <james.clark@arm.com> + - James Clark <james.clark@linaro.org> - Mao Jinlong <quic_jinlmao@quicinc.com> - Hao Zhang <quic_hazha@quicinc.com> diff --git a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml index b5e5767d8698..13eaa8d9a16e 100644 --- a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml @@ -35,6 +35,9 @@ properties: ports-implemented: const: 1 + power-domains: + maxItems: 1 + sata-port@0: $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml index a584b4953e68..46403b98411f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml index 6b9c1d198b14..10afe984e2fb 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on MSM8994 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml index a5a29dc75ae1..1fe68e07a2b2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml index 2280b859b2ad..78e232fa95dc 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml index cf19f44af774..4ff17a91344b 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6115 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml index 374a1844a159..10a9c96a97b6 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks and power domains on diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml index fd6658cb793d..c03b30f64f35 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Camera Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm camera clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml index 183b1c75dbdf..3cd422a645fd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml index 147b75a21508..de4e9066eeb8 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml index cf4cad76f6c9..d9dd479c17bd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml index 46d1d91e3a01..5c2ecec0624e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM8350 Video Clock & Reset Controller maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm video clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml index 3c2cac14e6c3..d10bb002906e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM8450 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 8e8a288d318c..e22b4c433fd0 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM6375 Display MDSS maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: SM6375 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 2399cabf044c..dd614e077bbf 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel maintainers: - - Konrad Dybcio <konradybcio@gmail.com> + - Konrad Dybcio <konradybcio@kernel.org> description: |+ This panel seems to only be found in the Asus Z00T diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 765ca155c83a..032f783eefc4 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -14,7 +14,16 @@ allOf: properties: compatible: - const: samsung,atna33xc20 + oneOf: + # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel + - const: samsung,atna33xc20 + - items: + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 + - const: samsung,atna33xc20 enable-gpios: true port: true diff --git a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml index 191b692125e1..032a989184ff 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony TD4353 JDI 5 / 5.7" 2160x1080 MIPI-DSI Panel maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | The Sony TD4353 JDI is a 5 (XZ2c) / 5.7 (XZ2) inch 2160x1080 diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 1715b0c9feea..c31e5e719525 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -28,6 +28,7 @@ properties: - anvo,anv32e61w - atmel,at25256B - fujitsu,mb85rs1mt + - fujitsu,mb85rs256 - fujitsu,mb85rs64 - microchip,at25160bn - microchip,25lc040 diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index 113957ebe9f1..e25fa72fd785 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -91,6 +91,7 @@ patternProperties: - const: 0 - description: | Shall encode the I3C LVR (Legacy Virtual Register): + See include/dt-bindings/i3c/i3c.h bit[31:8]: unused/ignored bit[7:5]: I2C device index. Possible values: * 0: I2C device has a 50 ns spike filter @@ -153,6 +154,8 @@ additionalProperties: true examples: - | + #include <dt-bindings/i3c/i3c.h> + i3c@d040000 { compatible = "cdns,i3c-master"; clocks = <&coreclock>, <&i3csysclock>; @@ -166,7 +169,7 @@ examples: /* I2C device. */ eeprom@57 { compatible = "atmel,24c01"; - reg = <0x57 0x0 0x10>; + reg = <0x57 0x0 (I2C_FM | I2C_FILTER)>; pagesize = <0x8>; }; diff --git a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml index c0e805e531be..4fc13e3c0f75 100644 --- a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml @@ -20,7 +20,16 @@ properties: maxItems: 1 clocks: - maxItems: 1 + minItems: 1 + items: + - description: Core clock + - description: APB clock + + clock-names: + minItems: 1 + items: + - const: core + - const: apb interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml index a03da9489ed9..190889c7b62a 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml @@ -120,9 +120,8 @@ patternProperties: description: Positive input can be connected to pins AIN1 to AIN16 by choosing the appropriate value from 1 to 16. Negative input is connected to AINCOM. - items: - minimum: 1 - maximum: 16 + minimum: 1 + maximum: 16 oneOf: - required: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml index 9fce7203bd42..78210791496f 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC7280 maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml index 6c2da03f0cd2..100c68636909 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC8280XP maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml index 3cff7e662255..300640a533dd 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SM8450 maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index 571e5746d177..f8cebc9e8cd9 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies legacy IOMMU implementations maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm "B" family devices which are not compatible with arm-smmu have diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index a1b71b35319e..42f9843d1868 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -38,6 +38,10 @@ properties: managed: true + phys: + description: A reference to the SerDes lane(s) + maxItems: 1 + required: - reg diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index bd3cbb44c99a..e75393b3d196 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. MDM9607 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm MDM9607 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml index a4771f87d936..b262af6be97d 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6350 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm SM6350 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml index 047f82863f9b..c11af09c3f5b 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6375 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm SM6375 SoC. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml index 7afafde17a38..61cf4fe19ca5 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml @@ -8,7 +8,7 @@ title: Qualcomm Resource Power Manager (RPM) Processor/Subsystem maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> - Stephan Gerhold <stephan@gerhold.net> description: | diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml index 9410404f87f1..ad2dcc39a5f5 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. (QTI) RPM Master Stats maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | The Qualcomm RPM (Resource Power Manager) architecture includes a concept diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml index beb0ff0245b0..a65b1d1d5fdd 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml @@ -199,10 +199,11 @@ additionalProperties: false examples: - | + #include <dt-bindings/gpio/gpio.h> codec@1,0{ compatible = "slim217,250"; reg = <1 0>; - reset-gpios = <&tlmm 64 0>; + reset-gpios = <&tlmm 64 GPIO_ACTIVE_LOW>; slim-ifc-dev = <&wcd9340_ifd>; #sound-dai-cells = <1>; interrupt-parent = <&tlmm>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml index de397d879acc..f94203798f24 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml @@ -42,7 +42,7 @@ examples: pinctrl-names = "default", "sleep"; pinctrl-0 = <&wcd_reset_n>; pinctrl-1 = <&wcd_reset_n_sleep>; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p8>; vdd-rxtx-supply = <&vreg_l18b_1p8>; vdd-px-supply = <&vreg_l18b_1p8>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml index cf6c3787adfe..10531350c336 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml @@ -34,9 +34,10 @@ unevaluatedProperties: false examples: - | + #include <dt-bindings/gpio/gpio.h> codec { compatible = "qcom,wcd9380-codec"; - reset-gpios = <&tlmm 32 0>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd938x_tx>; qcom,rx-device = <&wcd938x_rx>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml index 6e76f6a8634f..c69291f4d575 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml @@ -52,10 +52,10 @@ unevaluatedProperties: false examples: - | - #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/gpio/gpio.h> codec { compatible = "qcom,wcd9390-codec"; - reset-gpios = <&tlmm 32 IRQ_TYPE_NONE>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd939x_tx>; qcom,rx-device = <&wcd939x_rx>; diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 5d3dc952770d..7913ca9b6b54 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -328,7 +328,9 @@ properties: - renesas,hs3001 # Renesas ISL29501 time-of-flight sensor - renesas,isl29501 - # Rohm DH2228FV + # Rohm BH2228FV 8 channel DAC + - rohm,bh2228fv + # Rohm DH2228FV - This device does not exist, use rohm,bh2228fv instead. - rohm,dh2228fv # S524AD0XF1 (128K/256K-bit Serial EEPROM for Low Power) - samsung,24ad0xd1 diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index 783c27591e56..245e8c3ce669 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -18,6 +18,7 @@ properties: - usb424,2412 - usb424,2417 - usb424,2514 + - usb424,2517 reg: true diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml index 69845ec32e81..d0eff1ea52b4 100644 --- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml @@ -21,6 +21,7 @@ properties: - amlogic,t7-wdt - items: - enum: + - amlogic,a4-wdt - amlogic,c3-wdt - amlogic,s4-wdt - const: amlogic,t7-wdt diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 036e49553542..12b82725d322 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -9,4 +9,6 @@ Compute Express Link memory-devices + maturity-map + .. only:: subproject and html diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst new file mode 100644 index 000000000000..df8e2ac2a320 --- /dev/null +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -0,0 +1,202 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +=========================================== +Compute Express Link Subsystem Maturity Map +=========================================== + +The Linux CXL subsystem tracks the dynamic `CXL specification +<https://computeexpresslink.org/cxl-specification-landing-page>`_ that +continues to respond to new use cases with new features, capability +updates and fixes. At any given point some aspects of the subsystem are +more mature than others. While the periodic pull requests summarize the +`work being incorporated each merge window +<https://lore.kernel.org/linux-cxl/?q=s%3APULL+s%3ACXL+tc%3Atorvalds+NOT+s%3ARe>`_, +those do not always convey progress relative to a starting point and a +future end goal. + +What follows is a coarse breakdown of the subsystem's major +responsibilities along with a maturity score. The expectation is that +the change-history of this document provides an overview summary of the +subsystem maturation over time. + +The maturity scores are: + +- [3] Mature: Work in this area is complete and no changes on the horizon. + Note that this score can regress from one kernel release to the next + based on new test results or end user reports. + +- [2] Stabilizing: Major functionality operational, common cases are + mature, but known corner cases are still a work in progress. + +- [1] Initial: Capability that has exited the Proof of Concept phase, but + may still have significant gaps to close and fixes to apply as real + world testing occurs. + +- [0] Known gap: Feature is on a medium to long term horizon to + implement. If the specification has a feature that does not even have + a '0' score in this document, there is a good chance that no one in + the linux-cxl@vger.kernel.org community has started to look at it. + +- X: Out of scope for kernel enabling, or kernel enabling not required + +Feature and Capabilities +======================== + +Enumeration / Provisioning +-------------------------- +All of the fundamental enumeration an object model of the subsystem is +in place, but there are several corner cases that are pending closure. + + +* [2] CXL Window Enumeration + + * [0] :ref:`Extended-linear memory-side cache <extended-linear>` + * [0] Low Memory-hole + * [0] Hetero-interleave + +* [2] Switch Enumeration + + * [0] CXL register enumeration link-up dependency + +* [2] HDM Decoder Configuration + + * [0] Decoder target and granularity constraints + +* [2] Performance enumeration + + * [3] Endpoint CDAT + * [3] Switch CDAT + * [1] CDAT to Core-mm integration + + * [1] x86 + * [0] Arm64 + * [0] All other arch. + + * [0] Shared link + +* [2] Hotplug + (see CXL Window Enumeration) + + * [0] Handle Soft Reserved conflicts + +* [0] :ref:`RCH link status <rch-link-status>` +* [0] Fabrics / G-FAM (chapter 7) +* [0] Global Access Endpoint + + +RAS +--- +In many ways CXL can be seen as a standardization of what would normally +be handled by custom EDAC drivers. The open development here is +mainly caused by the enumeration corner cases above. + +* [3] Component events (OS) +* [2] Component events (FFM) +* [1] Endpoint protocol errors (OS) +* [1] Endpoint protocol errors (FFM) +* [0] Switch protocol errors (OS) +* [1] Switch protocol errors (FFM) +* [2] DPA->HPA Address translation + + * [1] XOR Interleave translation + (see CXL Window Enumeration) + +* [1] Memory Failure coordination +* [0] Scrub control +* [2] ACPI error injection EINJ + + * [0] EINJ v2 + * [X] Compliance DOE + +* [2] Native error injection +* [3] RCH error handling +* [1] VH error handling +* [0] PPR +* [0] Sparing +* [0] Device built in test + + +Mailbox commands +---------------- + +* [3] Firmware update +* [3] Health / Alerts +* [1] :ref:`Background commands <background-commands>` +* [3] Sanitization +* [3] Security commands +* [3] RAW Command Debug Passthrough +* [0] CEL-only-validation Passthrough +* [0] Switch CCI +* [3] Timestamp +* [1] PMEM labels +* [0] PMEM GPF / Dirty Shutdown +* [0] Scan Media + +PMU +--- +* [1] Type 3 PMU +* [0] Switch USP/ DSP, Root Port + +Security +-------- + +* [X] CXL Trusted Execution Environment Security Protocol (TSP) +* [X] CXL IDE (subsumed by TSP) + +Memory-pooling +-------------- + +* [1] Hotplug of LDs (via PCI hotplug) +* [0] Dynamic Capacity Device (DCD) Support + +Multi-host sharing +------------------ + +* [0] Hardware coherent shared memory +* [0] Software managed coherency shared memory + +Multi-host memory +----------------- + +* [0] Dynamic Capacity Device Support +* [0] Sharing + +Accelerator +----------- + +* [0] Accelerator memory enumeration HDM-D (CXL 1.1/2.0 Type-2) +* [0] Accelerator memory enumeration HDM-DB (CXL 3.0 Type-2) +* [0] CXL.cache 68b (CXL 2.0) +* [0] CXL.cache 256b Cache IDs (CXL 3.0) + +User Flow Support +----------------- + +* [0] HPA->DPA Address translation (need xormaps export solution) + +Details +======= + +.. _extended-linear: + +* **Extended-linear memory-side cache**: An HMAT proposal to enumerate the presence of a + memory-side cache where the cache capacity extends the SRAT address + range capacity. `See the ECN + <https://lore.kernel.org/linux-cxl/6650e4f835a0e_195e294a8@dwillia2-mobl3.amr.corp.intel.com.notmuch/>`_ + for more details: + +.. _rch-link-status: + +* **RCH Link Status**: RCH (Restricted CXL Host) topologies, end up + hiding some standard registers like PCIe Link Status / Capabilities in + the CXL RCRB (Root Complex Register Block). + +.. _background-commands: + +* **Background commands**: The CXL background command mechanism is + awkward as the single slot is monopolized potentially indefinitely by + various commands. A `cancel on conflict + <http://lore.kernel.org/r/66035c2e8ba17_770232948b@dwillia2-xfh.jf.intel.com.notmuch>`_ + facility is needed to make sure the kernel can ensure forward progress + of priority commands. diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index 6c1175c6afba..978198f8a18b 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -4,8 +4,6 @@ Generic Thermal Sysfs driver How To Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> -Updated: 2 January 2008 - Copyright (c) 2008 Intel Corporation @@ -38,23 +36,23 @@ temperature) and throttle appropriate devices. :: - struct thermal_zone_device - *thermal_zone_device_register(char *type, - int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay)) + struct thermal_zone_device * + thermal_zone_device_register_with_trips(const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + unsigned int passive_delay, + unsigned int polling_delay) - This interface function adds a new thermal zone device (sensor) to + This interface function adds a new thermal zone device (sensor) to the /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the - thermal cooling devices registered at the same time. + thermal cooling devices registered to it at the same time. type: the thermal zone type. trips: - the total number of trip points this thermal zone supports. - mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writable. + the table of trip points for this thermal zone. devdata: device private data ops: @@ -67,32 +65,29 @@ temperature) and throttle appropriate devices. .get_temp: get the current temperature of the thermal zone. .set_trips: - set the trip points window. Whenever the current temperature - is updated, the trip points immediately below and above the - current temperature are found. - .get_mode: - get the current mode (enabled/disabled) of the thermal zone. - - - "enabled" means the kernel thermal management is - enabled. - - "disabled" will prevent kernel thermal driver action - upon trip points so that user applications can take - charge of thermal management. - .set_mode: - set the mode (enabled/disabled) of the thermal zone. - .get_trip_type: - get the type of certain trip point. - .get_trip_temp: - get the temperature above which the certain trip point - will be fired. + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .change_mode: + change the mode (enabled/disabled) of the thermal zone. + .set_trip_temp: + set the temperature of a given trip point. + .get_crit_temp: + get the critical temperature for this thermal zone. .set_emul_temp: - set the emulation temperature which helps in debugging - different threshold temperature points. + set the emulation temperature which helps in debugging + different threshold temperature points. + .get_trend: + get the trend of most recent zone temperature changes. + .hot: + hot trip point crossing handler. + .critical: + critical trip point crossing handler. tzp: thermal zone platform parameters. passive_delay: - number of milliseconds to wait between polls when - performing passive cooling. + number of milliseconds to wait between polls when performing passive + cooling. polling_delay: number of milliseconds to wait between polls when checking whether trip points have been crossed (0 for interrupt driven systems). diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst index a74d7b052dc1..de1f32526cc1 100644 --- a/Documentation/filesystems/caching/fscache.rst +++ b/Documentation/filesystems/caching/fscache.rst @@ -318,10 +318,10 @@ where the columns are: Debugging ========= -If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime -debugging enabled by adjusting the value in:: +If CONFIG_NETFS_DEBUG is enabled, the FS-Cache facility and NETFS support can +have runtime debugging enabled by adjusting the value in:: - /sys/module/fscache/parameters/debug + /sys/module/netfs/parameters/debug This is a bitmask of debugging streams to enable: @@ -343,6 +343,6 @@ This is a bitmask of debugging streams to enable: The appropriate set of values should be OR'd together and the result written to the control file. For example:: - echo $((1|8|512)) >/sys/module/fscache/parameters/debug + echo $((1|8|512)) >/sys/module/netfs/parameters/debug will turn on all function entry debugging. diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f..c293f8e37468 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11..67cb68ea6e68 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a <Enter USERNAME for SMB share access> -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. diff --git a/Documentation/gpu/amdgpu/display/dcn-blocks.rst b/Documentation/gpu/amdgpu/display/dcn-blocks.rst index a3fbd3ea028b..5e34366f6dbe 100644 --- a/Documentation/gpu/amdgpu/display/dcn-blocks.rst +++ b/Documentation/gpu/amdgpu/display/dcn-blocks.rst @@ -8,37 +8,22 @@ and the code documentation when it is automatically generated. DCHUBBUB -------- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :internal: - HUBP ---- .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :internal: - DPP --- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h :internal: MPC @@ -48,10 +33,8 @@ MPC :doc: overview .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h :internal: + :no-identifiers: mpcc_blnd_cfg mpcc_alpha_blend_mode OPP --- @@ -60,19 +43,13 @@ OPP :doc: overview .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h :internal: DIO --- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c :internal: diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 67a811e6891f..b269ff3f7a54 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -132,7 +132,7 @@ The DRM blend mode and its elements are then mapped by AMDGPU display manager (MPC), as follows: .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :functions: mpcc_blnd_cfg + :identifiers: mpcc_blnd_cfg Therefore, the blending configuration for a single MPCC instance on the MPC tree is defined by :c:type:`mpcc_blnd_cfg`, where @@ -144,7 +144,7 @@ alpha and plane alpha values. It sets one of the three modes for :c:type:`MPCC_ALPHA_BLND_MODE`, as described below. .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :functions: mpcc_alpha_blend_mode + :identifiers: mpcc_alpha_blend_mode DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM blend formula, as follows: diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 495e35fcfb21..ea21fe135b97 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1753,6 +1753,7 @@ operations: request: attributes: - header + - context reply: attributes: - header @@ -1761,7 +1762,6 @@ operations: - indir - hkey - input_xfrm - dump: *rss-get-op - name: plca-get-cfg doc: Get PLCA params. diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 3ab423b80e91..d5f246aceb9f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1875,6 +1875,7 @@ Kernel response contents: ===================================== ====== ========================== ``ETHTOOL_A_RSS_HEADER`` nested reply header + ``ETHTOOL_A_RSS_CONTEXT`` u32 context number ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 1497e80f030e..3fc63f27c226 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -89,14 +89,7 @@ docs on :ref:`Building Linux with Clang/LLVM <kbuild_llvm>`. Rust (optional) --------------- -A particular version of the Rust toolchain is required. Newer versions may or -may not work because the kernel depends on some unstable Rust features, for -the moment. - -Each Rust toolchain comes with several "components", some of which are required -(like ``rustc``) and some that are optional. The ``rust-src`` component (which -is optional) needs to be installed to build the kernel. Other components are -useful for developing. +A recent version of the Rust compiler is required. Please see Documentation/rust/quick-start.rst for instructions on how to satisfy the build requirements of Rust support. In particular, the ``Makefile`` diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6e9a4597bf2c..daebce49cfdf 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -13,9 +13,9 @@ kernel. Hardware issues like Meltdown, Spectre, L1TF etc. must be treated differently because they usually affect all Operating Systems ("OS") and therefore need coordination across different OS vendors, distributions, -hardware vendors and other parties. For some of the issues, software -mitigations can depend on microcode or firmware updates, which need further -coordination. +silicon vendors, hardware integrators, and other parties. For some of the +issues, software mitigations can depend on microcode or firmware updates, +which need further coordination. .. _Contact: @@ -32,8 +32,8 @@ Linux kernel security team (:ref:`Documentation/admin-guide/ <securitybugs>`) instead. The team can be contacted by email at <hardware-security@kernel.org>. This -is a private list of security officers who will help you to coordinate a -fix according to our documented process. +is a private list of security officers who will help you coordinate a fix +according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -43,7 +43,7 @@ the following URLs: - PGP: https://www.kernel.org/static/files/hardware-security.asc - S/MIME: https://www.kernel.org/static/files/hardware-security.crt -While hardware security issues are often handled by the affected hardware +While hardware security issues are often handled by the affected silicon vendor, we welcome contact from researchers or individuals who have identified a potential hardware flaw. @@ -65,7 +65,7 @@ of Linux Foundation's IT operations personnel technically have the ability to access the embargoed information, but are obliged to confidentiality by their employment contract. Linux Foundation IT personnel are also responsible for operating and managing the rest of -kernel.org infrastructure. +kernel.org's infrastructure. The Linux Foundation's current director of IT Project infrastructure is Konstantin Ryabitsev. @@ -85,7 +85,7 @@ Memorandum of Understanding The Linux kernel community has a deep understanding of the requirement to keep hardware security issues under embargo for coordination between -different OS vendors, distributors, hardware vendors and other parties. +different OS vendors, distributors, silicon vendors, and other parties. The Linux kernel community has successfully handled hardware security issues in the past and has the necessary mechanisms in place to allow @@ -103,11 +103,11 @@ the issue in the best technical way. All involved developers pledge to adhere to the embargo rules and to keep the received information confidential. Violation of the pledge will lead to immediate exclusion from the current issue and removal from all related -mailing-lists. In addition, the hardware security team will also exclude +mailing lists. In addition, the hardware security team will also exclude the offender from future issues. The impact of this consequence is a highly effective deterrent in our community. In case a violation happens the hardware security team will inform the involved parties immediately. If you -or anyone becomes aware of a potential violation, please report it +or anyone else becomes aware of a potential violation, please report it immediately to the Hardware security officers. @@ -124,14 +124,16 @@ method for these types of issues. Start of Disclosure """"""""""""""""""" -Disclosure starts by contacting the Linux kernel hardware security team by -email. This initial contact should contain a description of the problem and -a list of any known affected hardware. If your organization builds or -distributes the affected hardware, we encourage you to also consider what -other hardware could be affected. +Disclosure starts by emailing the Linux kernel hardware security team per +the Contact section above. This initial contact should contain a +description of the problem and a list of any known affected silicon. If +your organization builds or distributes the affected hardware, we encourage +you to also consider what other hardware could be affected. The disclosing +party is responsible for contacting the affected silicon vendors in a +timely manner. The hardware security team will provide an incident-specific encrypted -mailing-list which will be used for initial discussion with the reporter, +mailing list which will be used for initial discussion with the reporter, further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of @@ -158,8 +160,8 @@ This serves several purposes: - The disclosed entities can be contacted to name experts who should participate in the mitigation development. - - If an expert which is required to handle an issue is employed by an - listed entity or member of an listed entity, then the response teams can + - If an expert who is required to handle an issue is employed by a listed + entity or member of an listed entity, then the response teams can request the disclosure of that expert from that entity. This ensures that the expert is also part of the entity's response team. @@ -169,8 +171,8 @@ Disclosure The disclosing party provides detailed information to the initial response team via the specific encrypted mailing-list. -From our experience the technical documentation of these issues is usually -a sufficient starting point and further technical clarification is best +From our experience, the technical documentation of these issues is usually +a sufficient starting point, and further technical clarification is best done via email. Mitigation development @@ -179,57 +181,93 @@ Mitigation development The initial response team sets up an encrypted mailing-list or repurposes an existing one if appropriate. -Using a mailing-list is close to the normal Linux development process and -has been successfully used in developing mitigations for various hardware +Using a mailing list is close to the normal Linux development process and +has been successfully used to develop mitigations for various hardware security issues in the past. -The mailing-list operates in the same way as normal Linux development. -Patches are posted, discussed and reviewed and if agreed on applied to a -non-public git repository which is only accessible to the participating +The mailing list operates in the same way as normal Linux development. +Patches are posted, discussed, and reviewed and if agreed upon, applied to +a non-public git repository which is only accessible to the participating developers via a secure connection. The repository contains the main development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed. Bringing in experts can happen at any -time of the development process and needs to be handled in a timely manner. +kernel developer community as needed. Any involved party can suggest +further experts to be included, each of which will be subject to the same +requirements outlined above. -If an expert is employed by or member of an entity on the disclosure list +Bringing in experts can happen at any time in the development process and +needs to be handled in a timely manner. + +If an expert is employed by or a member of an entity on the disclosure list provided by the disclosing party, then participation will be requested from the relevant entity. -If not, then the disclosing party will be informed about the experts +If not, then the disclosing party will be informed about the experts' participation. The experts are covered by the Memorandum of Understanding -and the disclosing party is requested to acknowledge the participation. In -case that the disclosing party has a compelling reason to object, then this -objection has to be raised within five work days and resolved with the -incident team immediately. If the disclosing party does not react within -five work days this is taken as silent acknowledgement. +and the disclosing party is requested to acknowledge their participation. +In the case where the disclosing party has a compelling reason to object, +any objection must to be raised within five working days and resolved with +the incident team immediately. If the disclosing party does not react +within five working days this is taken as silent acknowledgment. -After acknowledgement or resolution of an objection the expert is disclosed -by the incident team and brought into the development process. +After the incident team acknowledges or resolves an objection, the expert +is disclosed and brought into the development process. List participants may not communicate about the issue outside of the private mailing list. List participants may not use any shared resources (e.g. employer build farms, CI systems, etc) when working on patches. +Early access +"""""""""""" + +The patches discussed and developed on the list can neither be distributed +to any individual who is not a member of the response team nor to any other +organization. + +To allow the affected silicon vendors to work with their internal teams and +industry partners on testing, validation, and logistics, the following +exception is provided: + + Designated representatives of the affected silicon vendors are + allowed to hand over the patches at any time to the silicon + vendor’s response team. The representative must notify the kernel + response team about the handover. The affected silicon vendor must + have and maintain their own documented security process for any + patches shared with their response team that is consistent with + this policy. + + The silicon vendor’s response team can distribute these patches to + their industry partners and to their internal teams under the + silicon vendor’s documented security process. Feedback from the + industry partners goes back to the silicon vendor and is + communicated by the silicon vendor to the kernel response team. + + The handover to the silicon vendor’s response team removes any + responsibility or liability from the kernel response team regarding + premature disclosure, which happens due to the involvement of the + silicon vendor’s internal teams or industry partners. The silicon + vendor guarantees this release of liability by agreeing to this + process. Coordinated release """"""""""""""""""" -The involved parties will negotiate the date and time where the embargo -ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. There is no pre-notification process: -fixes are published in public and available to everyone at the same time. +The involved parties will negotiate the date and time when the embargo +ends. At that point, the prepared mitigations are published into the +relevant kernel trees. There is no pre-notification process: the +mitigations are published in public and available to everyone at the same +time. While we understand that hardware security issues need coordinated embargo -time, the embargo time should be constrained to the minimum time which is -required for all involved parties to develop, test and prepare the +time, the embargo time should be constrained to the minimum time that is +required for all involved parties to develop, test, and prepare their mitigations. Extending embargo time artificially to meet conference talk -dates or other non-technical reasons is creating more work and burden for -the involved developers and response teams as the patches need to be kept -up to date in order to follow the ongoing upstream kernel development, -which might create conflicting changes. +dates or other non-technical reasons creates more work and burden for the +involved developers and response teams as the patches need to be kept up to +date in order to follow the ongoing upstream kernel development, which +might create conflicting changes. CVE assignment """""""""""""" @@ -275,34 +313,35 @@ an involved disclosed party. The current ambassadors list: If you want your organization to be added to the ambassadors list, please contact the hardware security team. The nominated ambassador has to -understand and support our process fully and is ideally well connected in +understand and support our process fully and is ideally well-connected in the Linux kernel community. Encrypted mailing-lists ----------------------- -We use encrypted mailing-lists for communication. The operating principle +We use encrypted mailing lists for communication. The operating principle of these lists is that email sent to the list is encrypted either with the -list's PGP key or with the list's S/MIME certificate. The mailing-list +list's PGP key or with the list's S/MIME certificate. The mailing list software decrypts the email and re-encrypts it individually for each subscriber with the subscriber's PGP key or S/MIME certificate. Details -about the mailing-list software and the setup which is used to ensure the +about the mailing list software and the setup that is used to ensure the security of the lists and protection of the data can be found here: https://korg.wiki.kernel.org/userdoc/remail. List keys ^^^^^^^^^ -For initial contact see :ref:`Contact`. For incident specific mailing-lists -the key and S/MIME certificate are conveyed to the subscribers by email -sent from the specific list. +For initial contact see the :ref:`Contact` section above. For incident +specific mailing lists, the key and S/MIME certificate are conveyed to the +subscribers by email sent from the specific list. -Subscription to incident specific lists +Subscription to incident-specific lists ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Subscription is handled by the response teams. Disclosed parties who want -to participate in the communication send a list of potential subscribers to -the response team so the response team can validate subscription requests. +Subscription to incident-specific lists is handled by the response teams. +Disclosed parties who want to participate in the communication send a list +of potential experts to the response team so the response team can validate +subscription requests. Each subscriber needs to send a subscription request to the response team by email. The email must be signed with the subscriber's PGP key or S/MIME diff --git a/Documentation/rust/general-information.rst b/Documentation/rust/general-information.rst index 4bb6ac12d482..e3f388ef4ee4 100644 --- a/Documentation/rust/general-information.rst +++ b/Documentation/rust/general-information.rst @@ -7,6 +7,14 @@ This document contains useful information to know when working with the Rust support in the kernel. +``no_std`` +---------- + +The Rust support in the kernel can link only `core <https://doc.rust-lang.org/core/>`_, +but not `std <https://doc.rust-lang.org/std/>`_. Crates for use in the +kernel must opt into this behavior using the ``#![no_std]`` attribute. + + Code documentation ------------------ diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst index cc3f11e0d441..d06a36106cd4 100644 --- a/Documentation/rust/quick-start.rst +++ b/Documentation/rust/quick-start.rst @@ -5,17 +5,93 @@ Quick Start This document describes how to get started with kernel development in Rust. +There are a few ways to install a Rust toolchain needed for kernel development. +A simple way is to use the packages from your Linux distribution if they are +suitable -- the first section below explains this approach. An advantage of this +approach is that, typically, the distribution will match the LLVM used by Rust +and Clang. + +Another way is using the prebuilt stable versions of LLVM+Rust provided on +`kernel.org <https://kernel.org/pub/tools/llvm/rust/>`_. These are the same slim +and fast LLVM toolchains from :ref:`Getting LLVM <getting_llvm>` with versions +of Rust added to them that Rust for Linux supports. Two sets are provided: the +"latest LLVM" and "matching LLVM" (please see the link for more information). + +Alternatively, the next two "Requirements" sections explain each component and +how to install them through ``rustup``, the standalone installers from Rust +and/or building them. + +The rest of the document explains other aspects on how to get started. + + +Distributions +------------- + +Arch Linux +********** + +Arch Linux provides recent Rust releases and thus it should generally work out +of the box, e.g.:: + + pacman -S rust rust-src rust-bindgen + + +Debian +****** + +Debian Unstable (Sid), outside of the freeze period, provides recent Rust +releases and thus it should generally work out of the box, e.g.:: + + apt install rustc rust-src bindgen rustfmt rust-clippy + + +Fedora Linux +************ + +Fedora Linux provides recent Rust releases and thus it should generally work out +of the box, e.g.:: + + dnf install rust rust-src bindgen-cli rustfmt clippy + + +Gentoo Linux +************ + +Gentoo Linux (and especially the testing branch) provides recent Rust releases +and thus it should generally work out of the box, e.g.:: + + USE='rust-src rustfmt clippy' emerge dev-lang/rust dev-util/bindgen + +``LIBCLANG_PATH`` may need to be set. + + +Nix +*** + +Nix (unstable channel) provides recent Rust releases and thus it should +generally work out of the box, e.g.:: + + { pkgs ? import <nixpkgs> {} }: + pkgs.mkShell { + nativeBuildInputs = with pkgs; [ rustc rust-bindgen rustfmt clippy ]; + RUST_LIB_SRC = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}"; + } + + +openSUSE +******** + +openSUSE Slowroll and openSUSE Tumbleweed provide recent Rust releases and thus +they should generally work out of the box, e.g.:: + + zypper install rust rust1.79-src rust-bindgen clang + Requirements: Building ---------------------- This section explains how to fetch the tools needed for building. -Some of these requirements might be available from Linux distributions -under names like ``rustc``, ``rust-src``, ``rust-bindgen``, etc. However, -at the time of writing, they are likely not to be recent enough unless -the distribution tracks the latest releases. - To easily check whether the requirements are met, the following target can be used:: @@ -29,16 +105,15 @@ if that is the case. rustc ***** -A particular version of the Rust compiler is required. Newer versions may or -may not work because, for the moment, the kernel depends on some unstable -Rust features. +A recent version of the Rust compiler is required. If ``rustup`` is being used, enter the kernel build directory (or use -``--path=<build-dir>`` argument to the ``set`` sub-command) and run:: +``--path=<build-dir>`` argument to the ``set`` sub-command) and run, +for instance:: - rustup override set $(scripts/min-tool-version.sh rustc) + rustup override set stable -This will configure your working directory to use the correct version of +This will configure your working directory to use the given version of ``rustc`` without affecting your default toolchain. Note that the override applies to the current working directory (and its @@ -65,9 +140,9 @@ version later on requires re-adding the component. Otherwise, if a standalone installer is used, the Rust source tree may be downloaded into the toolchain's installation folder:: - curl -L "https://static.rust-lang.org/dist/rust-src-$(scripts/min-tool-version.sh rustc).tar.gz" | + curl -L "https://static.rust-lang.org/dist/rust-src-$(rustc --version | cut -d' ' -f2).tar.gz" | tar -xzf - -C "$(rustc --print sysroot)/lib" \ - "rust-src-$(scripts/min-tool-version.sh rustc)/rust-src/lib/" \ + "rust-src-$(rustc --version | cut -d' ' -f2)/rust-src/lib/" \ --strip-components=3 In this case, upgrading the Rust compiler version later on requires manually @@ -101,26 +176,22 @@ bindgen ******* The bindings to the C side of the kernel are generated at build time using -the ``bindgen`` tool. A particular version is required. +the ``bindgen`` tool. -Install it via (note that this will download and build the tool from source):: +Install it, for instance, via (note that this will download and build the tool +from source):: - cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen-cli + cargo install --locked bindgen-cli -``bindgen`` needs to find a suitable ``libclang`` in order to work. If it is -not found (or a different ``libclang`` than the one found should be used), -the process can be tweaked using the environment variables understood by -``clang-sys`` (the Rust bindings crate that ``bindgen`` uses to access -``libclang``): +``bindgen`` uses the ``clang-sys`` crate to find a suitable ``libclang`` (which +may be linked statically, dynamically or loaded at runtime). By default, the +``cargo`` command above will produce a ``bindgen`` binary that will load +``libclang`` at runtime. If it is not found (or a different ``libclang`` than +the one found should be used), the process can be tweaked, e.g. by using the +``LIBCLANG_PATH`` environment variable. For details, please see ``clang-sys``'s +documentation at: -* ``LLVM_CONFIG_PATH`` can be pointed to an ``llvm-config`` executable. - -* Or ``LIBCLANG_PATH`` can be pointed to a ``libclang`` shared library - or to the directory containing it. - -* Or ``CLANG_PATH`` can be pointed to a ``clang`` executable. - -For details, please see ``clang-sys``'s documentation at: + https://github.com/KyleMayes/clang-sys#linking https://github.com/KyleMayes/clang-sys#environment-variables @@ -164,20 +235,6 @@ can be installed manually:: The standalone installers also come with ``clippy``. -cargo -***** - -``cargo`` is the Rust native build system. It is currently required to run -the tests since it is used to build a custom standard library that contains -the facilities provided by the custom ``alloc`` in the kernel. The tests can -be run using the ``rusttest`` Make target. - -If ``rustup`` is being used, all the profiles already install the tool, -thus nothing needs to be done. - -The standalone installers also come with ``cargo``. - - rustdoc ******* diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst index acfd0c2be48d..568b71b415a4 100644 --- a/Documentation/rust/testing.rst +++ b/Documentation/rust/testing.rst @@ -131,9 +131,8 @@ Additionally, there are the ``#[test]`` tests. These can be run using the make LLVM=1 rusttest -This requires the kernel ``.config`` and downloads external repositories. It -runs the ``#[test]`` tests on the host (currently) and thus is fairly limited in -what these tests can test. +This requires the kernel ``.config``. It runs the ``#[test]`` tests on the host +(currently) and thus is fairly limited in what these tests can test. The Kselftests -------------- diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst index f02e6cf3516a..74df19be91f6 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst @@ -21,9 +21,9 @@ are often referred to as greyscale formats. .. raw:: latex - \scriptsize + \tiny -.. tabularcolumns:: |p{3.6cm}|p{3.0cm}|p{1.3cm}|p{2.6cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| +.. tabularcolumns:: |p{3.6cm}|p{2.4cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| .. flat-table:: Luma-Only Image Formats :header-rows: 1 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe722c5dada9..b3be87489108 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2592,7 +2592,7 @@ Specifically: 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT] 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] - 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] + 0x6030 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] [1]_ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] [1]_ ... @@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap). See KVM_SET_USER_MEMORY_REGION2 for additional details. 4.143 KVM_PRE_FAULT_MEMORY ------------------------- +--------------------------- :Capability: KVM_CAP_PRE_FAULT_MEMORY :Architectures: none @@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a stage-2 read page fault, e.g. faults in memory as needed, but doesn't break CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. +In the case of confidential VM types where there is an initial set up of +private guest memory before the guest is 'finalized'/measured, this ioctl +should only be issued after completing all the necessary setup to put the +guest into a 'finalized' state so that the above semantics can be reliably +ensured. + In some cases, multiple vCPUs might share the page tables. In this case, the ioctl can be called in parallel. diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 29b1b2e6d42c..31a136942892 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -130,12 +130,12 @@ data using the `bmfdec <https://github.com/pali/bmfdec>`_ utility: Due to a peculiarity in how Windows handles the ``CreateByteField()`` ACPI operator (errors only happen when a invalid byte field is ultimately accessed), all methods require a 32 byte input -buffer, even if the Binay MOF says otherwise. +buffer, even if the Binary MOF says otherwise. The input buffer contains a single byte to select the subfeature to be accessed and 31 bytes of input data, the meaning of which depends on the subfeature being accessed. -The output buffer contains a singe byte which signals success or failure (``0x00`` on failure) +The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. WMI method Get_EC() @@ -147,7 +147,7 @@ data contains a flag byte and a 28 byte controller firmware version string. The first 4 bits of the flag byte contain the minor version of the embedded controller interface, with the next 2 bits containing the major version of the embedded controller interface. -The 7th bit signals if the embedded controller page chaged (exact meaning is unknown), and the +The 7th bit signals if the embedded controller page changed (exact meaning is unknown), and the last bit signals if the platform is a Tigerlake platform. The MSI software seems to only use this interface when the last bit is set. diff --git a/MAINTAINERS b/MAINTAINERS index c0a3d9e93689..f328373463b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2196,7 +2196,7 @@ N: digicolor ARM/CORESIGHT FRAMEWORK AND DRIVERS M: Suzuki K Poulose <suzuki.poulose@arm.com> R: Mike Leach <mike.leach@linaro.org> -R: James Clark <james.clark@arm.com> +R: James Clark <james.clark@linaro.org> L: coresight@lists.linaro.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -5306,7 +5306,7 @@ F: drivers/media/cec/i2c/ch7322.c CIRRUS LOGIC AUDIO CODEC DRIVERS M: David Rhodes <david.rhodes@cirrus.com> M: Richard Fitzgerald <rf@opensource.cirrus.com> -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Maintained F: Documentation/devicetree/bindings/sound/cirrus,cs* @@ -5375,7 +5375,7 @@ F: sound/soc/codecs/lochnagar-sc.c CIRRUS LOGIC MADERA CODEC DRIVERS M: Charles Keepax <ckeepax@opensource.cirrus.com> M: Richard Fitzgerald <rf@opensource.cirrus.com> -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Supported W: https://github.com/CirrusLogic/linux-drivers/wiki @@ -5613,6 +5613,7 @@ M: Ira Weiny <ira.weiny@intel.com> M: Dan Williams <dan.j.williams@intel.com> L: linux-cxl@vger.kernel.org S: Maintained +F: Documentation/driver-api/cxl F: drivers/cxl/ F: include/linux/einj-cxl.h F: include/linux/cxl-event.h @@ -10655,6 +10656,7 @@ F: Documentation/ABI/testing/sysfs-bus-i3c F: Documentation/devicetree/bindings/i3c/ F: Documentation/driver-api/i3c F: drivers/i3c/ +F: include/dt-bindings/i3c/ F: include/linux/i3c/ IBM Operation Panel Input Driver @@ -13322,14 +13324,16 @@ F: Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt F: drivers/i2c/muxes/i2c-mux-ltc4306.c LTP (Linux Test Project) +M: Andrea Cervesato <andrea.cervesato@suse.com> M: Cyril Hrubis <chrubis@suse.cz> M: Jan Stancek <jstancek@redhat.com> M: Petr Vorel <pvorel@suse.cz> M: Li Wang <liwang@redhat.com> M: Yang Xu <xuyang2018.jy@fujitsu.com> +M: Xiao Yang <yangx.jy@fujitsu.com> L: ltp@lists.linux.it (subscribers-only) S: Maintained -W: http://linux-test-project.github.io/ +W: https://linux-test-project.readthedocs.io/ T: git https://github.com/linux-test-project/ltp.git LTR390 AMBIENT/UV LIGHT SENSOR DRIVER @@ -13537,7 +13541,7 @@ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner <mlindner@marvell.com> M: Stephen Hemminger <stephen@networkplumber.org> L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER @@ -15934,6 +15938,7 @@ F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h F: include/linux/netdevice.h +F: include/linux/skbuff.h F: include/net/ F: include/uapi/linux/in.h F: include/uapi/linux/net.h @@ -17894,7 +17899,7 @@ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 R: John Garry <john.g.garry@oracle.com> R: Will Deacon <will@kernel.org> -R: James Clark <james.clark@arm.com> +R: James Clark <james.clark@linaro.org> R: Mike Leach <mike.leach@linaro.org> R: Leo Yan <leo.yan@linux.dev> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -18554,7 +18559,7 @@ F: drivers/usb/misc/qcom_eud.c QCOM IPA DRIVER M: Alex Elder <elder@kernel.org> L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/ipa/ QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 10 +PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -445,17 +445,17 @@ KBUILD_USERLDFLAGS := $(USERLDFLAGS) # host programs. export rust_common_flags := --edition=2021 \ -Zbinary_dep_depinfo=y \ - -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms \ - -Dunreachable_pub -Dnon_ascii_idents \ + -Dunsafe_op_in_unsafe_fn \ + -Dnon_ascii_idents \ + -Wrust_2018_idioms \ + -Wunreachable_pub \ -Wmissing_docs \ - -Drustdoc::missing_crate_level_docs \ - -Dclippy::correctness -Dclippy::style \ - -Dclippy::suspicious -Dclippy::complexity \ - -Dclippy::perf \ - -Dclippy::let_unit_value -Dclippy::mut_mut \ - -Dclippy::needless_bitwise_bool \ - -Dclippy::needless_continue \ - -Dclippy::no_mangle_with_rust_abi \ + -Wrustdoc::missing_crate_level_docs \ + -Wclippy::all \ + -Wclippy::mut_mut \ + -Wclippy::needless_bitwise_bool \ + -Wclippy::needless_continue \ + -Wclippy::no_mangle_with_rust_abi \ -Wclippy::dbg_macro KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) \ @@ -493,7 +493,6 @@ RUSTDOC = rustdoc RUSTFMT = rustfmt CLIPPY_DRIVER = clippy-driver BINDGEN = bindgen -CARGO = cargo PAHOLE = pahole RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids LEX = flex @@ -559,7 +558,7 @@ KBUILD_RUSTFLAGS := $(rust_common_flags) \ -Csymbol-mangling-version=v0 \ -Crelocation-model=static \ -Zfunction-sections=n \ - -Dclippy::float_arithmetic + -Wclippy::float_arithmetic KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := @@ -587,7 +586,7 @@ endif export RUSTC_BOOTSTRAP := 1 export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG -export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO +export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX @@ -1959,9 +1958,12 @@ quiet_cmd_tags = GEN $@ tags TAGS cscope gtags: FORCE $(call cmd,tags) -# IDE support targets +# Generate rust-project.json (a file that describes the structure of non-Cargo +# Rust projects) for rust-analyzer (an implementation of the Language Server +# Protocol). PHONY += rust-analyzer rust-analyzer: + +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh $(Q)$(MAKE) $(build)=rust $@ # Script to generate missing namespace dependencies diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 2bb8cbeedf91..b191d87f89c4 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -534,8 +534,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define ioread16be(p) swab16(ioread16(p)) #define ioread32be(p) swab32(ioread32(p)) +#define ioread64be(p) swab64(ioread64(p)) #define iowrite16be(v,p) iowrite16(swab16(v), (p)) #define iowrite32be(v,p) iowrite32(swab32(v), (p)) +#define iowrite64be(v,p) iowrite64(swab64(v), (p)) #define inb_p inb #define inw_p inw @@ -634,8 +636,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); */ #define ioread64 ioread64 #define iowrite64 iowrite64 -#define ioread64be ioread64be -#define iowrite64be iowrite64be #define ioread8_rep ioread8_rep #define ioread16_rep ioread16_rep #define ioread32_rep ioread32_rep diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 954a1916a500..54b2bb817a7f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -87,6 +87,7 @@ config ARM select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE @@ -116,6 +117,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL @@ -736,7 +738,7 @@ config ARM_ERRATA_764319 bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction" depends on CPU_V7 help - This option enables the workaround for the 764319 Cortex A-9 erratum. + This option enables the workaround for the 764319 Cortex-A9 erratum. CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an unexpected Undefined Instruction exception when the DBGSWENABLE external pin is set to 0, even when the CP14 accesses are performed diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6bca03c0c7f0..945b5975fce2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -9,6 +9,7 @@ OBJS = HEAD = head.o OBJS += misc.o decompress.o +CFLAGS_decompress.o += $(DISABLE_STACKLEAK_PLUGIN) ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o AFLAGS_head.o += -DDEBUG diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 3fcb3e62dc56..d411abd4310e 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -125,7 +125,7 @@ SECTIONS . = BSS_START; __bss_start = .; - .bss : { *(.bss) } + .bss : { *(.bss .bss.*) } _end = .; . = ALIGN(8); /* the stack must be 64-bit aligned */ diff --git a/arch/arm/boot/dts/arm/versatile-ab.dts b/arch/arm/boot/dts/arm/versatile-ab.dts index 6fe6b49f5d8e..635ab9268899 100644 --- a/arch/arm/boot/dts/arm/versatile-ab.dts +++ b/arch/arm/boot/dts/arm/versatile-ab.dts @@ -157,7 +157,7 @@ clocks = <&xtal24mhz>; }; - pclk: clock-24000000 { + pclk: clock-pclk { #clock-cells = <0>; compatible = "fixed-factor-clock"; clock-div = <1>; diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 360f0d2406bf..f80a85b091d6 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -26,6 +26,13 @@ struct stackframe { #endif }; +static inline bool on_thread_stack(void) +{ + unsigned long delta = current_stack_pointer ^ (unsigned long)current->stack; + + return delta < THREAD_SIZE; +} + static __always_inline void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) { diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4c8632d5c432..d60f6e83a9f7 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -42,7 +42,7 @@ #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ - *(.proc.info.init) \ + KEEP(*(.proc.info.init)) \ __proc_info_end = .; #define IDMAP_TEXT \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 6150a716828c..f01d23a220e6 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1065,6 +1065,7 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) @@ -1078,6 +1079,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_loop8_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) @@ -1090,6 +1092,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_bpiall_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 5c31e9de7a60..f379c852dcb7 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -119,6 +119,9 @@ no_work_pending: ct_user_enter save = 0 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase_on_task_stack +#endif restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 677f218f7e84..da488d92e7a0 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -395,11 +395,6 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } -struct mod_unwind_map { - const Elf_Shdr *unw_sec; - const Elf_Shdr *txt_sec; -}; - static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) { diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 7147edbe56c6..1d230ac9d0eb 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -85,8 +85,7 @@ static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, pc); - return true; + return perf_callchain_store(entry, pc) == 0; } void diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index c16d196b5aad..5eddb75a7174 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -83,7 +83,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bd9127c4b451..de373c6c2ae8 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -99,7 +99,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { @@ -116,7 +116,7 @@ SECTIONS #endif .init.pv_table : { __pv_table_begin = .; - *(.pv_table) + KEEP(*(.pv_table)) __pv_table_end = .; } diff --git a/arch/arm/mach-alpine/alpine_cpu_pm.c b/arch/arm/mach-alpine/alpine_cpu_pm.c index 13ae8412e9ce..b48da6f12b6c 100644 --- a/arch/arm/mach-alpine/alpine_cpu_pm.c +++ b/arch/arm/mach-alpine/alpine_cpu_pm.c @@ -29,7 +29,7 @@ int alpine_cpu_wakeup(unsigned int phys_cpu, uint32_t phys_resume_addr) /* * Set CPU resume address - * secure firmware running on boot will jump to this address - * after setting proper CPU mode, and initialiing e.g. secure + * after setting proper CPU mode, and initializing e.g. secure * regs (the same mode all CPUs are booted to - usually HYP) */ writel(phys_resume_addr, diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index efa6faa62a2c..1713bdf3b71e 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -21,6 +21,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/gpio/machine.h> +#include <linux/gpio/property.h> #include <linux/gpio.h> #include <linux/err.h> #include <linux/clk.h> @@ -40,6 +41,7 @@ #include <linux/platform_data/mmc-pxamci.h> #include "udc.h" #include "gumstix.h" +#include "devices.h" #include "generic.h" @@ -99,8 +101,8 @@ static void __init gumstix_mmc_init(void) } #endif -#ifdef CONFIG_USB_PXA25X -static const struct property_entry spitz_mci_props[] __initconst = { +#if IS_ENABLED(CONFIG_USB_PXA25X) +static const struct property_entry gumstix_vbus_props[] __initconst = { PROPERTY_ENTRY_GPIO("vbus-gpios", &pxa2xx_gpiochip_node, GPIO_GUMSTIX_USB_GPIOn, GPIO_ACTIVE_HIGH), PROPERTY_ENTRY_GPIO("pullup-gpios", &pxa2xx_gpiochip_node, @@ -109,8 +111,9 @@ static const struct property_entry spitz_mci_props[] __initconst = { }; static const struct platform_device_info gumstix_gpio_vbus_info __initconst = { - .name = "gpio-vbus", - .id = PLATFORM_DEVID_NONE, + .name = "gpio-vbus", + .id = PLATFORM_DEVID_NONE, + .properties = gumstix_vbus_props, }; static void __init gumstix_udc_init(void) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c52..9f7454b8efa7 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c index bdbbf65d1b36..2027845efefb 100644 --- a/arch/arm/mm/proc.c +++ b/arch/arm/mm/proc.c @@ -17,7 +17,7 @@ void cpu_arm7tdmi_proc_init(void); __ADDRESSABLE(cpu_arm7tdmi_proc_init); void cpu_arm7tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm7tdmi_proc_fin); -void cpu_arm7tdmi_reset(void); +void cpu_arm7tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm7tdmi_reset); int cpu_arm7tdmi_do_idle(void); __ADDRESSABLE(cpu_arm7tdmi_do_idle); @@ -32,7 +32,7 @@ void cpu_arm720_proc_init(void); __ADDRESSABLE(cpu_arm720_proc_init); void cpu_arm720_proc_fin(void); __ADDRESSABLE(cpu_arm720_proc_fin); -void cpu_arm720_reset(void); +void cpu_arm720_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm720_reset); int cpu_arm720_do_idle(void); __ADDRESSABLE(cpu_arm720_do_idle); @@ -49,7 +49,7 @@ void cpu_arm740_proc_init(void); __ADDRESSABLE(cpu_arm740_proc_init); void cpu_arm740_proc_fin(void); __ADDRESSABLE(cpu_arm740_proc_fin); -void cpu_arm740_reset(void); +void cpu_arm740_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm740_reset); int cpu_arm740_do_idle(void); __ADDRESSABLE(cpu_arm740_do_idle); @@ -64,7 +64,7 @@ void cpu_arm9tdmi_proc_init(void); __ADDRESSABLE(cpu_arm9tdmi_proc_init); void cpu_arm9tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm9tdmi_proc_fin); -void cpu_arm9tdmi_reset(void); +void cpu_arm9tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm9tdmi_reset); int cpu_arm9tdmi_do_idle(void); __ADDRESSABLE(cpu_arm9tdmi_do_idle); @@ -79,7 +79,7 @@ void cpu_arm920_proc_init(void); __ADDRESSABLE(cpu_arm920_proc_init); void cpu_arm920_proc_fin(void); __ADDRESSABLE(cpu_arm920_proc_fin); -void cpu_arm920_reset(void); +void cpu_arm920_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm920_reset); int cpu_arm920_do_idle(void); __ADDRESSABLE(cpu_arm920_do_idle); @@ -102,7 +102,7 @@ void cpu_arm922_proc_init(void); __ADDRESSABLE(cpu_arm922_proc_init); void cpu_arm922_proc_fin(void); __ADDRESSABLE(cpu_arm922_proc_fin); -void cpu_arm922_reset(void); +void cpu_arm922_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm922_reset); int cpu_arm922_do_idle(void); __ADDRESSABLE(cpu_arm922_do_idle); @@ -119,7 +119,7 @@ void cpu_arm925_proc_init(void); __ADDRESSABLE(cpu_arm925_proc_init); void cpu_arm925_proc_fin(void); __ADDRESSABLE(cpu_arm925_proc_fin); -void cpu_arm925_reset(void); +void cpu_arm925_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm925_reset); int cpu_arm925_do_idle(void); __ADDRESSABLE(cpu_arm925_do_idle); @@ -159,7 +159,7 @@ void cpu_arm940_proc_init(void); __ADDRESSABLE(cpu_arm940_proc_init); void cpu_arm940_proc_fin(void); __ADDRESSABLE(cpu_arm940_proc_fin); -void cpu_arm940_reset(void); +void cpu_arm940_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm940_reset); int cpu_arm940_do_idle(void); __ADDRESSABLE(cpu_arm940_do_idle); @@ -174,7 +174,7 @@ void cpu_arm946_proc_init(void); __ADDRESSABLE(cpu_arm946_proc_init); void cpu_arm946_proc_fin(void); __ADDRESSABLE(cpu_arm946_proc_fin); -void cpu_arm946_reset(void); +void cpu_arm946_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm946_reset); int cpu_arm946_do_idle(void); __ADDRESSABLE(cpu_arm946_do_idle); @@ -429,7 +429,7 @@ void cpu_v7_proc_init(void); __ADDRESSABLE(cpu_v7_proc_init); void cpu_v7_proc_fin(void); __ADDRESSABLE(cpu_v7_proc_fin); -void cpu_v7_reset(void); +void cpu_v7_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_v7_reset); int cpu_v7_do_idle(void); __ADDRESSABLE(cpu_v7_do_idle); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7fd70be0463f..a2f8ff354ca6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -168,9 +168,9 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) + select HAVE_ARCH_KASAN_VMALLOC + select HAVE_ARCH_KASAN_SW_TAGS + select HAVE_ARCH_KASAN_HW_TAGS if ARM64_MTE # Some instrumentation may be unsound, hence EXPERT select HAVE_ARCH_KCSAN if EXPERT select HAVE_ARCH_KFENCE @@ -211,8 +211,8 @@ config ARM64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_GCC_PLUGINS select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI @@ -1069,18 +1069,28 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-*/Neoverse-*: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A76 erratum 3324349 + * ARM Cortex-A77 erratum 3324348 + * ARM Cortex-A78 erratum 3324344 + * ARM Cortex-A78C erratum 3324346 + * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-A725 erratum 3456106 + * ARM Cortex-X1 erratum 3324344 + * ARM Cortex-X1C erratum 3324346 * ARM Cortex-X2 erratum 3324338 * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -1088,11 +1098,11 @@ config ARM64_ERRATUM_3194386 subsequent speculative instructions, which may permit unexepected speculative store bypassing. - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. + Work around this problem by placing a Speculation Barrier (SB) or + Instruction Synchronization Barrier (ISB) after kernel changes to + SSBS. The presence of the SSBS special-purpose register is hidden + from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such that userspace + will use the PR_SPEC_STORE_BYPASS prctl to change SSBS. If unsure, say Y. @@ -1471,7 +1481,6 @@ config HOTPLUG_CPU config NUMA bool "NUMA Memory Allocation and Scheduler Support" select GENERIC_ARCH_NUMA - select ACPI_NUMA if ACPI select OF_NUMA select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK @@ -2337,6 +2346,17 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config COMPRESSED_INSTALL + bool "Install compressed image by default" + help + This makes the regular "make install" install the compressed + image we built, not the legacy uncompressed one. + + You can check that a compressed image works for you by doing + "make zinstall" first, and verifying that everything is fine + in your environment before making "make install" do this for + you. + config DMI bool "Enable support for SMBIOS (DMI) tables" depends on EFI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3f0f35fd5bb7..f6bc3da1ef11 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -182,7 +182,13 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -install: KBUILD_IMAGE := $(boot)/Image +ifeq ($(CONFIG_COMPRESSED_INSTALL),y) + DEFAULT_KBUILD_IMAGE = $(KBUILD_IMAGE) +else + DEFAULT_KBUILD_IMAGE = $(boot)/Image +endif + +install: KBUILD_IMAGE := $(DEFAULT_KBUILD_IMAGE) install zinstall: $(call cmd,install) @@ -229,7 +235,7 @@ define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)' - echo ' install - Install uncompressed kernel' + echo ' install - Install kernel (compressed if COMPRESSED_INSTALL set)' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' echo ' (distribution) /sbin/installkernel or' diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi index e8f4d136e5df..9202181fbd65 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi @@ -43,15 +43,6 @@ sound-dai = <&mcasp0>; }; }; - - reg_usb_hub: regulator-usb-hub { - compatible = "regulator-fixed"; - enable-active-high; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; - regulator-boot-on; - regulator-name = "HUB_PWR_EN"; - }; }; /* Verdin ETHs */ @@ -193,11 +184,6 @@ status = "okay"; }; -/* Do not force CTRL_SLEEP_MOCI# always enabled */ -®_force_sleep_moci { - status = "disabled"; -}; - /* Verdin SD_1 */ &sdhci1 { status = "okay"; @@ -218,15 +204,7 @@ }; &usb1 { - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - usb-hub@1 { - compatible = "usb424,2744"; - reg = <1>; - vdd-supply = <®_usb_hub>; - }; }; /* Verdin CTRL_WAKE1_MICO# */ diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index 359f53f3e019..5bef31b8577b 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -138,12 +138,6 @@ vin-supply = <®_1v8>; }; - /* - * By default we enable CTRL_SLEEP_MOCI#, this is required to have - * peripherals on the carrier board powered. - * If more granularity or power saving is required this can be disabled - * in the carrier board device tree files. - */ reg_force_sleep_moci: regulator-force-sleep-moci { compatible = "regulator-fixed"; enable-active-high; diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi index e65db6ce02bf..df7945156397 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi @@ -146,6 +146,8 @@ power-domains = <&k3_pds 79 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 79 0>; clock-names = "gpio"; + gpio-ranges = <&mcu_pmx0 0 0 21>, <&mcu_pmx0 21 23 1>, + <&mcu_pmx0 22 32 2>; }; mcu_rti0: watchdog@4880000 { diff --git a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi index 57383bd2eaeb..0ce9721b4176 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi @@ -45,7 +45,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 92 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 22 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 149 2 PIN_GPIO_RANGE_IOPAD>; diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index c797980528ec..dde4bd5c6645 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -193,7 +193,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 55 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 17 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 101 25 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts index 9338d987180d..ffa38f41679d 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts @@ -1262,6 +1262,14 @@ &serdes0 { status = "okay"; + serdes0_pcie1_link: phy@0 { + reg = <0>; + cdns,num-lanes = <2>; + #phy-cells = <0>; + cdns,phy-type = <PHY_TYPE_PCIE>; + resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; + }; + serdes0_usb_link: phy@3 { reg = <3>; cdns,num-lanes = <1>; @@ -1386,23 +1394,6 @@ phys = <&transceiver3>; }; -&serdes0 { - status = "okay"; - - serdes0_pcie1_link: phy@0 { - reg = <0>; - cdns,num-lanes = <4>; - #phy-cells = <0>; - cdns,phy-type = <PHY_TYPE_PCIE>; - resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>, - <&serdes_wiz0 3>, <&serdes_wiz0 4>; - }; -}; - -&serdes_wiz0 { - status = "okay"; -}; - &pcie1_rc { status = "okay"; num-lanes = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi index f170f80f00c1..d4ac1c9872a5 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi @@ -2755,7 +2755,7 @@ interrupts = <GIC_SPI 550 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 551 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc500>, <&main_udmap 0x4500>; + dmas = <&main_udmap 0xc403>, <&main_udmap 0x4403>; dma-names = "tx", "rx"; clocks = <&k3_clks 268 0>; clock-names = "fck"; @@ -2773,7 +2773,7 @@ interrupts = <GIC_SPI 552 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 553 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc501>, <&main_udmap 0x4501>; + dmas = <&main_udmap 0xc404>, <&main_udmap 0x4404>; dma-names = "tx", "rx"; clocks = <&k3_clks 269 0>; clock-names = "fck"; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1cb0704c6163..5fd7caea4419 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,12 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -165,12 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 4e753908b801..a0a5bbae7229 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <asm/insn.h> +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE #define JUMP_TABLE_ENTRY(key, label) \ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index d81bac256abc..6199c9f7ec6e 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -104,7 +104,7 @@ alternative_else_nop_endif #define __ptrauth_save_key(ctxt, key) \ do { \ - u64 __val; \ + u64 __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f8efbc128446..7a4f5604be3f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1065,6 +1065,28 @@ static inline bool pgtable_l5_enabled(void) { return false; } #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) +static inline +p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + /* + * With runtime folding of the pud, pud_offset_lockless() passes + * the 'pgd_t *' we return here to p4d_to_folded_pud(), which + * will offset the pointer assuming that it points into + * a page-table page. However, the fast GUP path passes us a + * pgd_t allocated on the stack and so we must use the original + * pointer in 'pgdp' to construct the p4d pointer instead of + * using the generic p4d_offset_lockless() implementation. + * + * Note: reusing the original pointer means that we may + * dereference the same (live) page-table entry multiple times. + * This is safe because it is still only loaded once in the + * context of each level and the CPU guarantees same-address + * read-after-read ordering. + */ + return p4d_offset(pgdp, addr); +} +#define p4d_offset_lockless p4d_offset_lockless_folded + #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #define pgd_ERROR(e) \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a..1aa4ecb73429 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls index 3cfafd003b2d..0542a718871a 100644 --- a/arch/arm64/kernel/Makefile.syscalls +++ b/arch/arm64/kernel/Makefile.syscalls @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += -syscall_abis_64 += renameat newstat rlimit memfd_secret +syscall_abis_64 += renameat rlimit memfd_secret syscalltbl = arch/arm64/tools/syscall_%.tbl diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c33..2465f291c7e1 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include <asm/numa.h> -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 617424b73f8c..f6b6b4507357 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,15 +434,24 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {} }; #endif diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e..f63ea915d6ad 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include <linux/kernel.h> #include <linux/jump_label.h> +#include <linux/smp.h> #include <asm/insn.h> #include <asm/patching.h> -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044..b22d28ec8028 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a2..f01f0fd7b7fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d63930c82839..d11da6461278 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index cc4508c604b2..25a2cb6317f3 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += -shared --build-id=sha1 VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 58f09370d17e..8304eb342be9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -19,6 +19,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on AS_HAS_ARMV8_4 select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8..86a629aaf0a1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,9 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ +CFLAGS_sys_regs.o += -Wno-override-init +CFLAGS_handle_exit.o += -Wno-override-init + kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a7ca776b51ec..9bef7638342e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,7 @@ static int kvm_arm_default_max_vcpus(void) /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct + * @type: kvm device type */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { @@ -521,10 +522,10 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) { - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu) && !is_protected_kvm_enabled()) { /* - * Either we're running running an L2 guest, and the API/APK - * bits come from L1's HCR_EL2, or API/APK are both set. + * Either we're running an L2 guest, and the API/APK bits come + * from L1's HCR_EL2, or API/APK are both set. */ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { u64 val; @@ -541,16 +542,10 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Save the host keys if there is any chance for the guest * to use pauth, as the entry code will reload the guest * keys in that case. - * Protected mode is the exception to that rule, as the - * entry into the EL2 code eagerly switch back and forth - * between host and hyp keys (and kvm_hyp_ctxt is out of - * reach anyway). */ - if (is_protected_kvm_enabled()) - return; - if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); ptrauth_save_keys(ctxt); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f59ccfe11ab9..37ff87d782b6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,7 +27,6 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> #include <asm/kvm_nested.h> -#include <asm/kvm_ptrauth.h> #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 782b34b004be..b43426a493df 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) +CFLAGS_switch.nvhe.o += -Wno-override-init + hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6af179c6356d..8f5c56d5b1cd 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -173,9 +173,8 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { /* - * Make sure we handle the exit for workarounds and ptrauth - * before the pKVM handling, as the latter could decide to - * UNDEF. + * Make sure we handle the exit for workarounds before the pKVM + * handling, as the latter could decide to UNDEF. */ return (kvm_hyp_handle_sysreg(vcpu, exit_code) || kvm_handle_pvm_sysreg(vcpu, exit_code)); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 3b9e5464b5b3..afc4aed9231a 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,6 +6,8 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ +CFLAGS_switch.o += -Wno-override-init + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index de789e0f1ae9..bab27f9d8cc6 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -786,7 +786,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) if (!WARN_ON(atomic_read(&mmu->refcnt))) kvm_free_stage2_pgd(mmu); } - kfree(kvm->arch.nested_mmus); + kvfree(kvm->arch.nested_mmus); kvm->arch.nested_mmus = NULL; kvm->arch.nested_mmus_size = 0; kvm_uninit_stage2_mmu(kvm); diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bcbc8c986b1d..bc74d06398ef 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -45,7 +45,8 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && + iter->nr_lpis) { if (iter->lpi_idx < iter->nr_lpis) xa_find_after(&dist->lpi_xa, &iter->intid, VGIC_LPI_MAX_INTID, @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - iter->lpi_idx > iter->nr_lpis; + (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 7f68cf58b978..41feb858ff9a 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -438,14 +438,13 @@ void kvm_vgic_destroy(struct kvm *kvm) unsigned long i; mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) __kvm_vgic_vcpu_destroy(vcpu); - mutex_lock(&kvm->arch.config_lock); - kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 8c711deb25aa..c314c016659a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -9,7 +9,7 @@ #include <kvm/arm_vgic.h> #include "vgic.h" -/** +/* * vgic_irqfd_set_irq: inject the IRQ corresponding to the * irqchip routing entry * @@ -75,7 +75,8 @@ static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, msi->flags = e->msi.flags; msi->devid = e->msi.devid; } -/** + +/* * kvm_set_msi: inject the MSI corresponding to the * MSI routing entry * @@ -98,7 +99,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return vgic_its_inject_msi(kvm, &msi); } -/** +/* * kvm_arch_set_irq_inatomic: fast-path for irqfd injection */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 40bb43f20bf3..ba945ba78cc7 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2040,6 +2040,7 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, * @start_id: the ID of the first entry in the table * (non zero for 2d level tables) * @fn: function to apply on each entry + * @opaque: pointer to opaque data * * Return: < 0 on error, 0 if last element was identified, 1 otherwise * (the last element may not be found on second level tables) @@ -2079,7 +2080,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return 1; } -/** +/* * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, @@ -2099,6 +2100,8 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, /** * vgic_its_restore_ite - restore an interrupt translation entry + * + * @its: its handle * @event_id: id used for indexing * @ptr: pointer to the ITE entry * @opaque: pointer to the its_device @@ -2231,6 +2234,7 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA + * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, gpa_t ptr, int dte_esz) @@ -2313,7 +2317,7 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, return 1; } -/** +/* * vgic_its_save_device_tables - Save the device table and all ITT * into guest RAM * @@ -2386,7 +2390,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, return ret; } -/** +/* * vgic_its_restore_device_tables - Restore the device table and all ITT * from guest RAM to internal data structs */ @@ -2478,7 +2482,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) return 1; } -/** +/* * vgic_its_save_collection_table - Save the collection table into * guest RAM */ @@ -2518,7 +2522,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_restore_collection_table - reads the collection table * in guest memory and restores the ITS internal state. Requires the * BASER registers to be restored before. @@ -2556,7 +2560,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM * according to v0 ABI */ @@ -2571,7 +2575,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its) return vgic_its_save_collection_table(its); } -/** +/* * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM * to internal data structs according to V0 ABI * diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ed6e412cd74b..3eecdd2f4b8f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -370,7 +370,7 @@ static void map_all_vpes(struct kvm *kvm) dist->its_vm.vpes[i]->irq)); } -/** +/* * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f07b3ddff7d4..974849ea7101 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -313,7 +313,7 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * with all locks dropped. */ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 03d356a12377..ba8f790431bd 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -186,7 +186,7 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags); + unsigned long flags) __releases(&irq->irq_lock); void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index a4c1dd4741a4..7ceaa1e0b4bc 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -149,7 +149,7 @@ Res0 63:32 UnsignedEnum 31:28 GIC 0b0000 NI 0b0001 GICv3 - 0b0010 GICv4p1 + 0b0011 GICv4p1 EndEnum UnsignedEnum 27:24 Virt_frac 0b0000 NI @@ -903,7 +903,7 @@ EndEnum UnsignedEnum 27:24 GIC 0b0000 NI 0b0001 IMP - 0b0010 V4P1 + 0b0011 V4P1 EndEnum SignedEnum 23:20 AdvSIMD 0b0000 IMP diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ebdb7156560c..70f169210b52 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -476,7 +476,6 @@ config NR_CPUS config NUMA bool "NUMA Support" select SMP - select ACPI_NUMA if ACPI help Say Y to compile the kernel with NUMA (Non-Uniform Memory Access) support. This option improves performance on systems with more diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index aa44b3fe43dd..5da32c00d483 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -34,7 +34,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t clear; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_val(clear) = (unsigned long)invalid_pte_table; set_pte_at(mm, addr, ptep, clear); @@ -65,7 +65,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - int changed = !pte_same(*ptep, pte); + int changed = !pte_same(ptep_get(ptep), pte); if (changed) { set_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 92636e82957c..da9e93024626 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -53,13 +53,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (WARN_ON(!pte) || pte_none(*pte)) + if (WARN_ON(!pte) || pte_none(ptep_get(pte))) return false; if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~(_PAGE_VALID | _PAGE_PRESENT))); else - set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | (_PAGE_VALID | _PAGE_PRESENT))); preempt_disable(); local_flush_tlb_one(addr); diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 44b54965f5b4..5f0677e03817 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -26,8 +26,6 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 335fb86778e2..43ec61589e6c 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -39,9 +39,9 @@ struct kvm_steal_time { * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args + * a1-a5: args * Return value will be placed in a0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ static __always_inline long kvm_hypercall0(u64 fid) { diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 3fbf1f37c58e..85431f20a14d 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define KFENCE_AREA_START (VMEMMAP_END + 1) #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) +#define ptep_get(ptep) READ_ONCE(*(ptep)) +#define pmdp_get(pmdp) READ_ONCE(*(pmdp)) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -147,11 +150,6 @@ static inline int p4d_present(p4d_t p4d) return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void p4d_clear(p4d_t *p4dp) -{ - p4d_val(*p4dp) = (unsigned long)invalid_pud_table; -} - static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)p4d_val(p4d); @@ -159,7 +157,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *p4d = p4dval; + WRITE_ONCE(*p4d, p4dval); +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, __p4d((unsigned long)invalid_pud_table)); } #define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) @@ -193,17 +196,20 @@ static inline int pud_present(pud_t pud) return pud_val(pud) != (unsigned long)invalid_pmd_table; } -static inline void pud_clear(pud_t *pudp) +static inline pmd_t *pud_pgtable(pud_t pud) { - pud_val(*pudp) = ((unsigned long)invalid_pmd_table); + return (pmd_t *)pud_val(pud); } -static inline pmd_t *pud_pgtable(pud_t pud) +static inline void set_pud(pud_t *pud, pud_t pudval) { - return (pmd_t *)pud_val(pud); + WRITE_ONCE(*pud, pudval); } -#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud((unsigned long)invalid_pmd_table)); +} #define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) @@ -231,12 +237,15 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) != (unsigned long)invalid_pte_table; } -static inline void pmd_clear(pmd_t *pmdp) +static inline void set_pmd(pmd_t *pmd, pmd_t pmdval) { - pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); + WRITE_ONCE(*pmd, pmdval); } -#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd((unsigned long)invalid_pte_table)); +} #define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) @@ -314,7 +323,8 @@ extern void paging_init(void); static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); + if (pte_val(pteval) & _PAGE_GLOBAL) { pte_t *buddy = ptep_buddy(ptep); /* @@ -341,8 +351,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ - if (pte_none(*buddy)) - pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + if (pte_none(ptep_get(buddy))) + WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ } } @@ -350,7 +360,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* Preserve global status for the pair */ - if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) set_pte(ptep, __pte(_PAGE_GLOBAL)); else set_pte(ptep, __pte(0)); @@ -603,7 +613,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old = *pmdp; + pmd_t old = pmdp_get(pmdp); pmd_clear(pmdp); diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls index 523bb411a3bc..ab7d9baa2915 100644 --- a/arch/loongarch/kernel/Makefile.syscalls +++ b/arch/loongarch/kernel/Makefile.syscalls @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -syscall_abis_64 += newstat +# No special ABIs on loongarch so far +syscall_abis_64 += diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 000825406c1f..2bf86aeda874 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -66,6 +66,12 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } +bool efi_poweroff_required(void) +{ + return efi_enabled(EFI_RUNTIME_SERVICES) && + (acpi_gbl_reduced_hardware || acpi_no_s5); +} + unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; #if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 2634a9e8d82c..28681dfb4b85 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -714,19 +714,19 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 12222c56cb59..e4068906143b 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -39,11 +39,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { + if (pgd_present(pgdp_get(pgd))) { p4d = p4d_offset(pgd, addr); - if (p4d_present(*p4d)) { + if (p4d_present(p4dp_get(p4d))) { pud = pud_offset(p4d, addr); - if (pud_present(*pud)) + if (pud_present(pudp_get(pud))) pmd = pmd_offset(pud, addr); } } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index bf789d114c2d..8a87a482c8f4 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -141,7 +141,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int huge = pmd_val(*pmd) & _PAGE_HUGE; + int huge = pmd_val(pmdp_get(pmd)) & _PAGE_HUGE; if (huge) vmemmap_verify((pte_t *)pmd, node, addr, next); @@ -173,7 +173,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pud) panic("%s: Failed to allocate memory\n", __func__); @@ -184,7 +184,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pmd) panic("%s: Failed to allocate memory\n", __func__); @@ -195,7 +195,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *pte; pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -216,7 +216,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ptep = populate_kernel_pte(addr); - if (!pte_none(*ptep)) { + if (!pte_none(ptep_get(ptep))) { pte_ERROR(*ptep); return; } diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index c608adc99845..427d6b1aec09 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -105,7 +105,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (__pmd_none(early, READ_ONCE(*pmdp))) { + if (__pmd_none(early, pmdp_get(pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); if (!early) @@ -118,7 +118,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (__pud_none(early, READ_ONCE(*pudp))) { + if (__pud_none(early, pudp_get(pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); if (!early) @@ -131,7 +131,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) { - if (__p4d_none(early, READ_ONCE(*p4dp))) { + if (__p4d_none(early, p4dp_get(p4dp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); if (!early) @@ -154,7 +154,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && __pte_none(early, ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, @@ -166,7 +166,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, do { next = pmd_addr_end(addr, end); kasan_pte_populate(pmdp, addr, next, node, early); - } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, pmdp_get(pmdp))); } static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index bda018150000..eb6a29b491a7 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -128,7 +128,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot) void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); flush_tlb_all(); } diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd..d20eec742bfa 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5d650e02cbf4..b0a2ac3ba916 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -20,6 +20,7 @@ config PARISC select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 2a60d7a72f1f..a3f0f100f219 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -20,7 +20,16 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#ifdef CONFIG_PA20 +#define ARCH_DMA_MINALIGN 128 +#else +#define ARCH_DMA_MINALIGN 32 +#endif +#define ARCH_KMALLOC_MINALIGN 16 /* ldcw requires 16-byte alignment */ + +#define arch_slab_minalign() ((unsigned)dcache_stride) +#define cache_line_size() dcache_stride +#define dma_get_cache_alignment cache_line_size #define __read_mostly __section(".data..read_mostly") diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index 979f45d4d1fb..06cbcd6fe87b 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->header = bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, - sizeof(u32), + sizeof(long), bpf_fill_ill_insns); if (!jit_data->header) { prog = orig_prog; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b7..16bacfe8c7a2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include <linux/cpu_smt.h> +#include <linux/cpumask.h> #include <asm/cputhreads.h> static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616ba..943430077375 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 961aadc71de2..5e6c7b527677 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1984,8 +1984,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; r = -ENXIO; - if (!xive_enabled()) + if (!xive_enabled()) { + fdput(f); break; + } r = -EPERM; dev = kvm_device_from_filp(f.file); diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f8..2978fcbe307e 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201..da21cb018984 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 517b963e3e6a..a0934b516933 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -559,9 +559,7 @@ bool __init xive_native_init(void) struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio = 7; - const __be32 *p; u32 val, cpu; s64 rc; @@ -592,7 +590,7 @@ bool __init xive_native_init(void) max_prio = val - 1; /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index e45419264391..f2fa985a2c77 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -814,7 +814,6 @@ bool __init xive_spapr_init(void) struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio; u32 val; u32 len; @@ -866,7 +865,7 @@ bool __init xive_spapr_init(void) } /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3ceec2ca84fa..0f3cd7c3a436 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,7 +13,9 @@ config 32BIT config RISCV def_bool y select ACPI_GENERIC_GSI if ACPI + select ACPI_PPTT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ACPI_SPCR_TABLE if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP @@ -123,6 +125,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU @@ -154,7 +157,6 @@ config RISCV select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KPROBES if !XIP_KERNEL - select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL # https://github.com/ClangBuiltLinux/linux/issues/1881 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD @@ -820,6 +822,8 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS endchoice +source "arch/riscv/Kconfig.vendor" + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor new file mode 100644 index 000000000000..6f1cdd32ed29 --- /dev/null +++ b/arch/riscv/Kconfig.vendor @@ -0,0 +1,19 @@ +menu "Vendor extensions" + +config RISCV_ISA_VENDOR_EXT + bool + +menu "Andes" +config RISCV_ISA_VENDOR_EXT_ANDES + bool "Andes vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here if you want to disable all Andes vendor extension + support. This will cause any Andes vendor extensions that are + requested by hardware probing to be ignored. + + If you don't know what to do here, say Y. +endmenu + +endmenu diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 869c0345b908..4e9e7a28bf9b 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -18,7 +18,6 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.* loader loader.o loader.lds loader.bin targets := Image Image.* loader loader.o loader.lds loader.bin xipImage ifeq ($(CONFIG_XIP_KERNEL),y) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3f1f055866af..0d678325444f 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -7,6 +7,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -35,9 +36,6 @@ CONFIG_ARCH_THEAD=y CONFIG_ARCH_VIRT=y CONFIG_ARCH_CANAAN=y CONFIG_SMP=y -CONFIG_HOTPLUG_CPU=y -CONFIG_PM=y -CONFIG_CPU_IDLE=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m @@ -52,13 +50,11 @@ CONFIG_ACPI=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_NET=y CONFIG_PACKET=y -CONFIG_UNIX=y CONFIG_XFRM_USER=m -CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y @@ -102,9 +98,9 @@ CONFIG_NET_SCHED=y CONFIG_NET_CLS_CGROUP=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_CAN=m CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y -CONFIG_CAN=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -153,8 +149,8 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y @@ -179,7 +175,6 @@ CONFIG_DEVFREQ_THERMAL=y CONFIG_RZG2L_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y -CONFIG_RENESAS_RZG2LWDT=y CONFIG_MFD_AXP20X_I2C=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y @@ -193,11 +188,9 @@ CONFIG_DRM_NOUVEAU=m CONFIG_DRM_SUN4I=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y -CONFIG_SND_SOC_RZ=m CONFIG_SND_DESIGNWARE_I2S=m CONFIG_SND_SOC_STARFIVE=m CONFIG_SND_SOC_JH7110_PWMDAC=m @@ -239,34 +232,31 @@ CONFIG_USB_CONFIGFS_F_FS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SDHCI_OF_DWCMSHC=y +CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_SDHI=y CONFIG_MMC_DW=y CONFIG_MMC_DW_STARFIVE=y -CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m CONFIG_DW_AXI_DMAC=y -CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y -CONFIG_RENESAS_OSTM=y CONFIG_CLK_SOPHGO_CV1800=y CONFIG_SUN8I_DE2_CCU=m +CONFIG_RENESAS_OSTM=y CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y -CONFIG_ARCH_R9A07G043=y +CONFIG_PM_DEVFREQ=y CONFIG_IIO=y -CONFIG_RZG2L_ADC=m -CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_PHY_SUN4I_USB=m CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index f2708a9494a1..fc1a34faa5f3 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> #define ANDES_AX45MP_MARCHID 0x8000000000008a45UL #define ANDES_AX45MP_MIMPID 0x500UL @@ -65,6 +66,8 @@ void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct al unsigned long archid, unsigned long impid, unsigned int stage) { + BUILD_BUG_ON(ERRATA_ANDES_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_BOOT) errata_probe_iocp(stage, archid, impid); diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 716cfedad3a2..cea3b96ade11 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -12,6 +12,7 @@ #include <asm/alternative.h> #include <asm/vendorid_list.h> #include <asm/errata_list.h> +#include <asm/vendor_extensions.h> struct errata_info_t { char name[32]; @@ -96,6 +97,8 @@ void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 cpu_apply_errata = 0; u32 tmp; + BUILD_BUG_ON(ERRATA_SIFIVE_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index bf6a0a6318ee..f5120e07c318 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -18,6 +18,7 @@ #include <asm/io.h> #include <asm/patch.h> #include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> #define CSR_TH_SXSTATUS 0x5c0 #define SXSTATUS_MAEE _AC(0x200000, UL) @@ -166,6 +167,8 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 tmp; void *oldptr, *altptr; + BUILD_BUG_ON(ERRATA_THEAD_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + for (alt = begin; alt < end; alt++) { if (alt->vendor_id != THEAD_VENDOR_ID) continue; diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 7dad0cf9d701..e0a1f84404f3 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void acpi_init_rintc_map(void); struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); -u32 get_acpi_id_for_cpu(int cpu); +static inline u32 get_acpi_id_for_cpu(int cpu) +{ + return acpi_cpu_get_madt_rintc(cpu)->uid; +} + int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); -static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, u32 *cboz_size, u32 *cbop_size); #else @@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ACPI_NUMA +int acpi_numa_get_nid(unsigned int cpu); +void acpi_map_cpus_to_nodes(void); +#else +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +static inline void acpi_map_cpus_to_nodes(void) { } +#endif /* CONFIG_ACPI_NUMA */ + #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 880606b0469a..71af9ecfcfcb 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -170,7 +170,7 @@ legacy: ({ \ typeof(x) x_ = (x); \ __builtin_constant_p(x_) ? \ - (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + ((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ : \ variable_fls(x_); \ }) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 000796c2d0b1..45f9c1171a48 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -33,6 +33,31 @@ extern struct riscv_isainfo hart_isa[NR_CPUS]; void riscv_user_isa_enable(void); +#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ + .name = #_name, \ + .property = #_name, \ + .id = _id, \ + .subset_ext_ids = _subset_exts, \ + .subset_ext_size = _subset_exts_size, \ + .validate = _validate \ +} + +#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) + +#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) + +/* Used to declare pure "lasso" extension (Zk for instance) */ +#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ + _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ + ARRAY_SIZE(_bundled_exts), NULL) + +/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ +#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) +#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) + #if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); void unaligned_emulation_finish(void); @@ -79,59 +104,66 @@ extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); +#define STANDARD_EXT 0 + bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); #define riscv_isa_extension_available(isa_bitmap, ext) \ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); return true; l_no: return false; } -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); return false; l_yes: return true; } +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); @@ -139,7 +171,10 @@ static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsign static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b18b202ca141..5a0bd27fd11a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -80,19 +80,18 @@ #define RISCV_ISA_EXT_ZFA 71 #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 -#define RISCV_ISA_EXT_XANDESPMU 74 -#define RISCV_ISA_EXT_ZVE32X 75 -#define RISCV_ISA_EXT_ZVE32F 76 -#define RISCV_ISA_EXT_ZVE64X 77 -#define RISCV_ISA_EXT_ZVE64F 78 -#define RISCV_ISA_EXT_ZVE64D 79 -#define RISCV_ISA_EXT_ZIMOP 80 -#define RISCV_ISA_EXT_ZCA 81 -#define RISCV_ISA_EXT_ZCB 82 -#define RISCV_ISA_EXT_ZCD 83 -#define RISCV_ISA_EXT_ZCF 84 -#define RISCV_ISA_EXT_ZCMOP 85 -#define RISCV_ISA_EXT_ZAWRS 86 +#define RISCV_ISA_EXT_ZVE32X 74 +#define RISCV_ISA_EXT_ZVE32F 75 +#define RISCV_ISA_EXT_ZVE64X 76 +#define RISCV_ISA_EXT_ZVE64F 77 +#define RISCV_ISA_EXT_ZVE64D 78 +#define RISCV_ISA_EXT_ZIMOP 79 +#define RISCV_ISA_EXT_ZCA 80 +#define RISCV_ISA_EXT_ZCB 81 +#define RISCV_ISA_EXT_ZCD 82 +#define RISCV_ISA_EXT_ZCF 83 +#define RISCV_ISA_EXT_ZCMOP 84 +#define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 150a9877b0af..ffb9484531af 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 7 +#define RISCV_HWPROBE_MAX_KEY 9 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 235fd45d998d..7ede2111c591 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -37,7 +37,7 @@ * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) -#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) +#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* CONFIG_64BIT */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 5d473343634b..fca5c6be2b81 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -10,6 +10,7 @@ #include <asm/page.h> #include <linux/const.h> +#include <linux/sizes.h> /* thread information allocation */ #define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER diff --git a/arch/riscv/include/asm/vendor_extensions.h b/arch/riscv/include/asm/vendor_extensions.h new file mode 100644 index 000000000000..7437304a71b9 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_VENDOR_EXTENSIONS_H +#define _ASM_VENDOR_EXTENSIONS_H + +#include <asm/cpufeature.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* + * The extension keys of each vendor must be strictly less than this value. + */ +#define RISCV_ISA_VENDOR_EXT_MAX 32 + +struct riscv_isavendorinfo { + DECLARE_BITMAP(isa, RISCV_ISA_VENDOR_EXT_MAX); +}; + +struct riscv_isa_vendor_ext_data_list { + bool is_initialized; + const size_t ext_data_count; + const struct riscv_isa_ext_data *ext_data; + struct riscv_isavendorinfo per_hart_isa_bitmap[NR_CPUS]; + struct riscv_isavendorinfo all_harts_isa_bitmap; +}; + +extern struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[]; + +extern const size_t riscv_isa_vendor_ext_list_size; + +/* + * The alternatives need some way of distinguishing between vendor extensions + * and errata. Incrementing all of the vendor extension keys so they are at + * least 0x8000 accomplishes that. + */ +#define RISCV_VENDOR_EXT_ALTERNATIVES_BASE 0x8000 + +#define VENDOR_EXT_ALL_CPUS -1 + +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit); +#define riscv_cpu_isa_vendor_extension_available(cpu, vendor, ext) \ + __riscv_isa_vendor_extension_available(cpu, vendor, RISCV_ISA_VENDOR_EXT_##ext) +#define riscv_isa_vendor_extension_available(vendor, ext) \ + __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, \ + RISCV_ISA_VENDOR_EXT_##ext) + +static __always_inline bool riscv_has_vendor_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_has_vendor_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_likely(const unsigned long vendor, + int cpu, const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_unlikely(const unsigned long vendor, + int cpu, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +#endif /* _ASM_VENDOR_EXTENSIONS_H */ diff --git a/arch/riscv/include/asm/vendor_extensions/andes.h b/arch/riscv/include/asm/vendor_extensions/andes.h new file mode 100644 index 000000000000..7bb2fc43438f --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/andes.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +#define RISCV_ISA_VENDOR_EXT_XANDESPMU 0 + +/* + * Extension keys should be strictly less than max. + * It is safe to increment this when necessary. + */ +#define RISCV_ISA_VENDOR_EXT_MAX_ANDES 32 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes; + +#endif diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 8b8f6ac0eae2..1e153cda57db 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -81,6 +81,13 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 +#define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 +#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 5b243d46f4b1..06d407f1b30b 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -58,6 +58,8 @@ obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ @@ -110,3 +112,4 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls index 52087a023b3d..9668fd1faf60 100644 --- a/arch/riscv/kernel/Makefile.syscalls +++ b/arch/riscv/kernel/Makefile.syscalls @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += riscv memfd_secret -syscall_abis_64 += riscv newstat rlimit memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc..ba957aaca5cb 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include <linux/efi.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> #include <linux/pci.h> +#include <linux/serial_core.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 000000000000..ff95aeebee3e --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <asm/numa.h> + +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; + +int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1..d6c108c50cba 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/of.h> #include <asm/cacheinfo.h> @@ -64,7 +65,6 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; @@ -79,12 +79,33 @@ int populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -97,11 +118,11 @@ int populate_cache_leaves(unsigned int cpu) if (level <= levels) break; if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c1f3655238fd..f6b13e9f5e6c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> +#include <asm/vendor_extensions.h> bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -235,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -254,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -316,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -338,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 0366dc3baf33..b427188b28fc 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vector.h> +#include <asm/vendor_extensions.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -100,31 +101,6 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } -#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ - .subset_ext_ids = _subset_exts, \ - .subset_ext_size = _subset_exts_size, \ - .validate = _validate \ -} - -#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) - -#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) - -/* Used to declare pure "lasso" extension (Zk for instance) */ -#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ - _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ - ARRAY_SIZE(_bundled_exts), NULL) - -/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ -#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) -#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) - static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -405,7 +381,6 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), - __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); @@ -457,28 +432,26 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { ext = riscv_get_isa_ext_data(bit); - if (!ext) - continue; - if (ext->validate) { + if (ext && ext->validate) { ret = ext->validate(ext, resolved_isa); if (ret == -EPROBE_DEFER) { loop = true; continue; } else if (ret) { /* Disable the extension entirely */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); continue; } } - set_bit(ext->id, resolved_isa); + set_bit(bit, resolved_isa); /* No need to keep it in source isa now that it is enabled */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); /* Single letter extensions get set in hwcap */ - if (ext->id < RISCV_ISA_EXT_BASE) - *this_hwcap |= isa2hwcap[ext->id]; + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); } @@ -512,6 +485,21 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -527,8 +515,6 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -728,6 +714,61 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; @@ -760,6 +801,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) } riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); of_node_put(cpu_node); @@ -776,6 +818,8 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -918,28 +962,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 68a24cf9481a..ac2e908d4418 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -88,7 +88,6 @@ SYM_CODE_START(handle_exception) call riscv_v_context_nesting_start #endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -97,7 +96,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -105,11 +105,14 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) ASM_NOKPROBE(handle_exception) @@ -130,6 +133,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51f..34ef522f07a8 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977..d2129f2c61b8 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index a69dfa610aa8..000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/kprobes.h> - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - if (unlikely(kprobe_ftrace_disabled)) - return; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index 1026e22955cc..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -71,7 +71,7 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4f73c0ae44b2..a2cde65b69e9 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5a2edd7f027e..dcd282419456 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); get_cpu_vector_context(); riscv_v_vstate_save(¤t->thread.vstate, regs); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 19baf0d574d3..0f8f1c95ac38 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -96,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -106,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 10e311b2759d..c6d5de22463f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,7 +16,7 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); static inline int fp_is_valid(unsigned long fp, unsigned long sp) { @@ -71,7 +71,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { + if (pc == (unsigned long)handle_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 685594769535..cea0ca2bf2a2 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -9,6 +9,7 @@ #include <asm/cpufeature.h> #include <asm/hwprobe.h> #include <asm/processor.h> +#include <asm/delay.h> #include <asm/sbi.h> #include <asm/switch_to.h> #include <asm/uaccess.h> @@ -93,44 +94,45 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZACAS); + EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); EXT_KEY(ZBC); - EXT_KEY(ZBKB); EXT_KEY(ZBKC); EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); EXT_KEY(ZKNH); EXT_KEY(ZKSED); EXT_KEY(ZKSH); EXT_KEY(ZKT); - EXT_KEY(ZIHINTNTL); EXT_KEY(ZTSO); - EXT_KEY(ZACAS); - EXT_KEY(ZICOND); - EXT_KEY(ZIHINTPAUSE); - EXT_KEY(ZIMOP); - EXT_KEY(ZCA); - EXT_KEY(ZCB); - EXT_KEY(ZCMOP); - EXT_KEY(ZAWRS); /* * All the following extensions must depend on the kernel * support of V. */ if (has_vector()) { - EXT_KEY(ZVE32X); - EXT_KEY(ZVE32F); - EXT_KEY(ZVE64X); - EXT_KEY(ZVE64F); - EXT_KEY(ZVE64D); EXT_KEY(ZVBB); EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); EXT_KEY(ZVKG); EXT_KEY(ZVKNED); @@ -139,16 +141,14 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVKSED); EXT_KEY(ZVKSH); EXT_KEY(ZVKT); - EXT_KEY(ZVFH); - EXT_KEY(ZVFHMIN); } if (has_fpu()) { - EXT_KEY(ZFH); - EXT_KEY(ZFHMIN); - EXT_KEY(ZFA); EXT_KEY(ZCD); EXT_KEY(ZCF); + EXT_KEY(ZFA); + EXT_KEY(ZFH); + EXT_KEY(ZFHMIN); } #undef EXT_KEY } @@ -178,13 +178,13 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } @@ -192,12 +192,12 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus) { if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) - return RISCV_HWPROBE_MISALIGNED_FAST; + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) - return RISCV_HWPROBE_MISALIGNED_EMULATED; + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; - return RISCV_HWPROBE_MISALIGNED_SLOW; + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; } #endif @@ -225,6 +225,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; @@ -237,6 +238,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = user_max_virt_addr(); break; + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee..51ebfd23e007 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b62d5a2f4541..192cd5603e95 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -338,7 +338,7 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; #endif if (!unaligned_enabled) @@ -532,13 +532,13 @@ static bool check_unaligned_access_emulated(int cpu) unsigned long tmp_var, tmp_val; bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index a9a6bcb02acf..160628a2116d 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -34,9 +34,9 @@ static int check_unaligned_access(void *param) struct page *page = param; void *dst; void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0; /* Make an unaligned destination buffer. */ @@ -95,14 +95,14 @@ static int check_unaligned_access(void *param) } if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; ratio = div_u64((byte_cycles * 100), word_cycles); pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", cpu, ratio / 100, ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; @@ -110,7 +110,7 @@ static int check_unaligned_access(void *param) * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions. */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) cpumask_set_cpu(cpu, &fast_misaligned_access); else cpumask_clear_cpu(cpu, &fast_misaligned_access); @@ -188,7 +188,7 @@ static int riscv_online_cpu(unsigned int cpu) static struct page *buf; /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 000000000000..a8126d118341 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 000000000000..6a61aed944f1 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 000000000000..ec688c88456a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All Andes vendor extensions supported in Linux */ +const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f3733802..a9f2b4af8f3f 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -61,26 +61,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bfa2dea95354..eb0649a61b4c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,8 +234,6 @@ static void __init setup_bootmem(void) */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); - /* * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. @@ -251,6 +249,16 @@ static void __init setup_bootmem(void) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* + * The size of the linear page mapping may restrict the amount of + * usable RAM. + */ + if (IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + } + + /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: * - This memory would overlap with ERR_PTR @@ -266,6 +274,7 @@ static void __init setup_bootmem(void) memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } + phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); @@ -918,7 +927,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; + end_va = kernel_map.virt_addr + kernel_map.size; for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), @@ -1087,7 +1096,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else @@ -1284,8 +1293,6 @@ static void __init create_linear_mapping_page_table(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - if (end >= __pa(PAGE_OFFSET) + memory_limit) - end = __pa(PAGE_OFFSET) + memory_limit; create_linear_mapping_range(start, end, 0, NULL); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 5bcf3af903da..0e6ca6d5ae4b 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,6 +7,7 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ +#include <asm/asm.h> #include <linux/linkage.h> .text @@ -34,6 +35,7 @@ SYM_CODE_END(purgatory_start) .data +.align LGREG SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 59e0d861e26f..a822f952f64a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -799,17 +799,6 @@ config HAVE_PNETID menu "Virtualization" -config PROTECTED_VIRTUALIZATION_GUEST - def_bool n - prompt "Protected virtualization guest support" - help - Select this option, if you want to be able to run this - kernel as a protected virtualization KVM guest. - Protected virtualization capable machines have a mini hypervisor - located at machine level (an ultravisor). With help of the - Ultravisor, KVM will be able to run "protected" VMs, special - VMs whose memory and management data are unavailable to KVM. - config PFAULT def_bool y prompt "Pseudo page fault support" diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e7658997452b..4f476884d340 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 000000000000..abc08d2c873d --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../kernel/alternative.c" diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 18027fdc92b0..83e2ce050b6c 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -30,6 +30,8 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; @@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; + #define vmlinux _vmlinux_info +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) #define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index a21f301acd29..1773b72a6a7b 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/abs_lowcore.h> #include <asm/page-states.h> #include <asm/ebcdic.h> #include <asm/sclp.h> @@ -310,5 +311,7 @@ void parse_boot_command_line(void) prot_virt_host = 1; } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + relocate_lowcore = 1; } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index c59014945af0..ce232552bc1c 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); @@ -376,6 +377,8 @@ static void kaslr_adjust_vmlinux_info(long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -478,8 +481,12 @@ void startup_kernel(void) * before the kernel started. Therefore, in case the two sections * overlap there is no risk of corrupting any data. */ - if (kaslr_enabled()) - amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); + } if (!amode31_lma) amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); @@ -503,6 +510,9 @@ void startup_kernel(void) kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); /* * Save KASLR offset for early dumps, before vmcore_info is set. diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 1e66d2cbb096..318e6ba95bfd 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -8,12 +8,8 @@ #include "uv.h" /* will be used in arch/s390/kernel/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); -#endif -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); -#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) @@ -53,14 +49,11 @@ void uv_query_info(void) uv_info.max_secrets = uvcb.max_secrets; } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; -#endif } -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit) { if (is_prot_virt_host() && uv_info.max_sec_stor_addr) @@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void) { prot_virt_host = is_prot_virt_host_capable(); } -#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h index 0f3070856f8d..da4a4a8d48e0 100644 --- a/arch/s390/boot/uv.h +++ b/arch/s390/boot/uv.h @@ -2,21 +2,8 @@ #ifndef BOOT_UV_H #define BOOT_UV_H -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit); void sanitize_prot_virt_host(void); -#else -static inline unsigned long adjust_to_uv_max(unsigned long limit) -{ - return limit; -} -static inline void sanitize_prot_virt_host(void) {} -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif #endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index a255ca189aaa..2847cc059ab7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); enum populate_mode { POPULATE_NONE, POPULATE_DIRECT, + POPULATE_LOWCORE, POPULATE_ABS_LOWCORE, POPULATE_IDENTITY, POPULATE_KERNEL, @@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return -1; case POPULATE_DIRECT: return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); case POPULATE_KERNEL: @@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) { + unsigned long lowcore_address = 0; unsigned long start, end; unsigned long asce_type; unsigned long asce_bits; @@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + if (relocate_lowcore) + lowcore_address = LOWCORE_ALT_ADDRESS; + /* * To allow prefixing the lowcore must be mapped with 4KB pages. * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f3602414a961..ea63a7342f5f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d0d8925fdf09..d8b28ff8ff45 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 6f264b79e377..d20df8c923fc 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H +#include <asm/sections.h> #include <asm/lowcore.h> #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } +extern int __bootdata_preserved(relocate_lowcore); + +static inline int have_relocated_lowcore(void) +{ + return relocate_lowcore; +} + #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h deleted file mode 100644 index 608f6287ca9c..000000000000 --- a/arch/s390/include/asm/alternative-asm.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_ALTERNATIVE_ASM_H -#define _ASM_S390_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature - .long \orig_start - . - .long \alt_start - . - .word \feature - .byte \orig_end - \orig_start - .org . - ( \orig_end - \orig_start ) & 1 - .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) - .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature - .pushsection .altinstr_replacement,"ax" -770: \newinstr -771: .popsection -772: \oldinstr -773: .pushsection .altinstructions,"a" - alt_entry 772b, 773b, 770b, 771b, \feature - .popsection -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 - .pushsection .altinstr_replacement,"ax" -770: \newinstr1 -771: \newinstr2 -772: .popsection -773: \oldinstr -774: .pushsection .altinstructions,"a" - alt_entry 773b, 774b, 770b, 771b,\feature1 - alt_entry 773b, 774b, 771b, 772b,\feature2 - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index dd93b92c3ab6..de980c938a3e 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -2,6 +2,58 @@ #ifndef _ASM_S390_ALTERNATIVE_H #define _ASM_S390_ALTERNATIVE_H +/* + * Each alternative comes with a 32 bit feature field: + * union { + * u32 feature; + * struct { + * u32 ctx : 4; + * u32 type : 8; + * u32 data : 20; + * }; + * } + * + * @ctx is a bitfield, where only one bit must be set. Each bit defines + * in which context an alternative is supposed to be applied to the + * kernel image: + * + * - from the decompressor before the kernel itself is executed + * - from early kernel code from within the kernel + * + * @type is a number which defines the type and with that the type + * specific alternative patching. + * + * @data is additional type specific information which defines if an + * alternative should be applied. + */ + +#define ALT_CTX_EARLY 1 +#define ALT_CTX_LATE 2 +#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) + +#define ALT_TYPE_FACILITY 0 +#define ALT_TYPE_SPEC 1 +#define ALT_TYPE_LOWCORE 2 + +#define ALT_DATA_SHIFT 0 +#define ALT_TYPE_SHIFT 20 +#define ALT_CTX_SHIFT 28 + +#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) + #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -11,12 +63,30 @@ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 facility; /* facility bit set for replacement */ + union { + u32 feature; /* feature required for replacement */ + struct { + u32 ctx : 4; /* context */ + u32 type : 8; /* type of alternative */ + u32 data : 20; /* patching information */ + }; + }; u8 instrlen; /* length of original instruction */ } __packed; -void apply_alternative_instructions(void); -void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx); + +static inline void apply_alternative_instructions(void) +{ + __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE); +} + +static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + __apply_alternatives(start, end, ALT_CTX_ALL); +} /* * +---------------------------------+ @@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define OLDINSTR(oldinstr) \ "661:\n\t" oldinstr "\n662:\n" -#define ALTINSTR_ENTRY(facility, num) \ +#define ALTINSTR_ENTRY(feature, num) \ "\t.long 661b - .\n" /* old instruction */ \ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ - "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.long " __stringify(feature) "\n" /* feature */ \ "\t.byte " oldinstr_len "\n" /* instruction len */ \ "\t.org . - (" oldinstr_len ") & 1\n" \ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \ @@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" /* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ +#define ALTERNATIVE(oldinstr, altinstr, feature) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr, 1) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility, 1) \ + ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ +#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr1, 1) \ ALTINSTR_REPLACEMENT(altinstr2, 2) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility1, 1) \ - ALTINSTR_ENTRY(facility2, 2) \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" /* @@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, altinstr, facility) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") +#define alternative(oldinstr, altinstr, feature) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory") -#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ - altinstr2, facility2) ::: "memory") +#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \ + altinstr2, feature2) ::: "memory") /* Alternative inline assembly with input. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); : : input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, altinstr, facility, output, input...) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ +#define alternative_io(oldinstr, altinstr, feature, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \ : output : input) /* Use this macro if more than one output parameter is needed. */ @@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); /* Use this macro if clobbers are needed without inputs. */ #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber +#else /* __ASSEMBLY__ */ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .long \feature + .byte \orig_end - \orig_start + .org . - ( \orig_end - \orig_start ) & 1 + .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) + .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: .pushsection .altinstructions,"a" + alt_entry 772b, 773b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: .pushsection .altinstructions,"a" + alt_entry 773b, 774b, 770b, 771b,\feature1 + alt_entry 773b, 774b, 771b, 772b,\feature2 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 2b379d1d9046..742c7919cbcd 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -#ifdef __GCC_ASM_FLAG_OUTPUTS__ +/* GCC versions before 14.2.0 may die with an ICE in some configurations. */ +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200)) static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index d46cc725f024..b7d234838a36 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -20,7 +20,6 @@ #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) extern u64 stfle_fac_list[16]; -extern u64 alt_stfle_fac_list[16]; static inline void __set_facility(unsigned long nr, void *facilities) { diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h index 27db65fbf3f6..f73e181d09ae 100644 --- a/arch/s390/include/asm/kmsan.h +++ b/arch/s390/include/asm/kmsan.h @@ -12,8 +12,8 @@ static inline bool is_lowcore_addr(void *addr) { - return addr >= (void *)&S390_lowcore && - addr < (void *)(&S390_lowcore + 1); + return addr >= (void *)get_lowcore() && + addr < (void *)(get_lowcore() + 1); } static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) @@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) * order to get a distinct struct page. */ addr += (void *)lowcore_ptr[raw_smp_processor_id()] - - (void *)&S390_lowcore; + (void *)get_lowcore(); if (KMSAN_WARN_ON(is_lowcore_addr(addr))) return NULL; return kmsan_get_metadata(addr, is_origin); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index c724e71e1785..183ac29afaf8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -14,10 +14,15 @@ #include <asm/ctlreg.h> #include <asm/cpu.h> #include <asm/types.h> +#include <asm/alternative.h> #define LC_ORDER 1 #define LC_PAGES 2 +#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL) + +#ifndef __ASSEMBLY__ + struct pgm_tdb { u64 data[32]; }; @@ -97,8 +102,7 @@ struct lowcore { __u64 save_area_async[8]; /* 0x0240 */ __u64 save_area_restart[1]; /* 0x0280 */ - /* CPU flags. */ - __u64 cpu_flags; /* 0x0288 */ + __u64 pcpu; /* 0x0288 */ /* Return psws. */ psw_t return_psw; /* 0x0290 */ @@ -215,7 +219,14 @@ struct lowcore { static __always_inline struct lowcore *get_lowcore(void) { - return NULL; + struct lowcore *lc; + + if (__is_defined(__DECOMPRESSOR)) + return NULL; + asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + return lc; } extern struct lowcore *lowcore_ptr[]; @@ -225,4 +236,19 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } +#else /* __ASSEMBLY__ */ + +.macro GET_LC reg + ALTERNATIVE "llilh \reg,0", \ + __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ + ALT_LOWCORE +.endm + +.macro STMG_LC start, end, savearea + ALTERNATIVE "stmg \start, \end, \savearea", \ + __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ + ALT_LOWCORE +.endm + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index b9c1f3cae842..192835a3e24d 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -5,8 +5,17 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/facility.h> extern int nospec_disable; +extern int nobp; + +static inline bool nobp_enabled(void) +{ + if (__is_defined(__DECOMPRESSOR)) + return false; + return nobp && test_facility(82); +} void nospec_init_branches(void); void nospec_auto_detect(void); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5ec41ec3d761..06416b3f94f5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -#if IS_ENABLED(CONFIG_PGSTE) int arch_make_folio_accessible(struct folio *folio); #define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -#endif struct vm_layout { unsigned long kaslr_offset; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c87cf2b8e81a..5ecd442535b9 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,13 +14,11 @@ #include <linux/bits.h> -#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) @@ -42,21 +40,37 @@ #include <asm/irqflags.h> #include <asm/alternative.h> +struct pcpu { + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ + unsigned long flags; /* per CPU flags */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; + +DECLARE_PER_CPU(struct pcpu, pcpu_devices); + typedef long (*sys_call_ptr_t)(struct pt_regs *regs); +static __always_inline struct pcpu *this_pcpu(void) +{ + return (struct pcpu *)(get_lowcore()->pcpu); +} + static __always_inline void set_cpu_flag(int flag) { - get_lowcore()->cpu_flags |= (1UL << flag); + this_pcpu()->flags |= (1UL << flag); } static __always_inline void clear_cpu_flag(int flag) { - get_lowcore()->cpu_flags &= ~(1UL << flag); + this_pcpu()->flags &= ~(1UL << flag); } static __always_inline bool test_cpu_flag(int flag) { - return get_lowcore()->cpu_flags & (1UL << flag); + return this_pcpu()->flags & (1UL << flag); } static __always_inline bool test_and_set_cpu_flag(int flag) @@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; - - return lc->cpu_flags & (1UL << flag); + return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82)); + asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h new file mode 100644 index 000000000000..17878b1d048c --- /dev/null +++ b/arch/s390/include/asm/runtime-const.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_RUNTIME_CONST_H +#define _ASM_S390_RUNTIME_CONST_H + +#include <linux/uaccess.h> + +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + \ + asm_inline( \ + "0: iihf %[__ret],%[c1]\n" \ + " iilf %[__ret],%[c2]\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "=d" (__ret) \ + : [c1] "i" (0x01234567UL), \ + [c2] "i" (0x89abcdefUL)); \ + __ret; \ +}) + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + unsigned int __ret = (val); \ + \ + asm_inline( \ + "0: srl %[__ret],12\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "+d" (__ret)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 32-bit immediate for iihf and iilf in bits in I2 field */ +static inline void __runtime_fixup_32(u32 *p, unsigned int val) +{ + s390_kernel_write(p, &val, sizeof(val)); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __runtime_fixup_32(where + 2, val >> 32); + __runtime_fixup_32(where + 8, val); +} + +/* Immediate value is lower 12 bits of D2 field of srl */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + u32 insn = *(u32 *)where; + + insn &= 0xfffff000; + insn |= (val & 63); + s390_kernel_write(where, &insn, sizeof(insn)); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_S390_RUNTIME_CONST_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c13c79025348..cd835f4fb11a 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern void smp_call_online_cpu(void (*func)(void *), void *); extern void smp_call_ipl_cpu(void (*func)(void *), void *); extern void smp_emergency_stop(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 3e43c90ff135..77d5e804af93 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) typecheck(int, lp->lock); kcsan_release(); asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ " sth %1,%0\n" : "=R" (((unsigned short *) &lp->lock)[1]) : "d" (0) : "cc", "memory"); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index d02a709717b8..00ac01874a12 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ unsigned int cpu; /* current CPU */ + unsigned char sie; /* running in SIE context */ }; /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9213be0529ee..a81f897a81ce 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -void *s390_kernel_write(void *dst, const void *src, size_t size); +void *__s390_kernel_write(void *dst, const void *src, size_t size); + +static inline void *s390_kernel_write(void *dst, const void *src, size_t size) +{ + if (__is_defined(__DECOMPRESSOR)) + return memcpy(dst, src, size); + return __s390_kernel_write(dst, src, size); +} int __noreturn __put_kernel_bad(void); diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0679445cac0b..153d93468b77 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit) return test_bit_inv(feature_bit, &uv_info.uv_feature_indications); } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; static inline int is_prot_virt_guest(void) @@ -442,7 +441,10 @@ static inline int share(unsigned long addr, u16 cmd) if (!uv_call(0, (u64)&uvcb)) return 0; - return -EINVAL; + pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", + uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", + uvcb.header.rc, uvcb.header.rrc); + panic("System security cannot be guaranteed unless the system panics now.\n"); } /* @@ -466,13 +468,6 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -#else -#define is_prot_virt_guest() 0 -static inline int uv_set_shared(unsigned long addr) { return 0; } -static inline int uv_remove_shared(unsigned long addr) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_KVM) extern int prot_virt_host; static inline int is_prot_virt_host(void) @@ -489,29 +484,5 @@ int uv_convert_from_secure_pte(pte_t pte); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); -#else -#define is_prot_virt_host() 0 -static inline void setup_uv(void) {} - -static inline int uv_pin_shared(unsigned long paddr) -{ - return 0; -} - -static inline int uv_destroy_folio(struct folio *folio) -{ - return 0; -} - -static inline int uv_destroy_pte(pte_t pte) -{ - return 0; -} - -static inline int uv_convert_from_secure_pte(pte_t pte) -{ - return 0; -} -#endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7241fa194709..e47a4be54ff8 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o extra-y += vmlinux.lds @@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o obj-$(CONFIG_TRACEPOINTS) += trace.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index f9efc54ec4b7..09cd24cbe74e 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -4,6 +4,7 @@ #include <asm/abs_lowcore.h> unsigned long __bootdata_preserved(__abs_lowcore); +int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 1ac5f707dd70..8d5d0de35de0 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,68 +1,41 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/module.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <asm/text-patching.h> + +#include <linux/uaccess.h> +#include <asm/nospec-branch.h> +#include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> -#include <asm/nospec-branch.h> - -static int __initdata_or_module alt_instr_disabled; - -static int __init disable_alternative_instructions(char *str) -{ - alt_instr_disabled = 1; - return 0; -} - -early_param("noaltinstr", disable_alternative_instructions); -static void __init_or_module __apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - struct alt_instr *a; u8 *instr, *replacement; + struct alt_instr *a; + bool replace; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ for (a = start; a < end; a++) { + if (!(a->ctx & ctx)) + continue; + switch (a->type) { + case ALT_TYPE_FACILITY: + replace = test_facility(a->data); + break; + case ALT_TYPE_SPEC: + replace = nobp_enabled(); + break; + case ALT_TYPE_LOWCORE: + replace = have_relocated_lowcore(); + break; + default: + replace = false; + } + if (!replace) + continue; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - - if (!__test_facility(a->facility, alt_stfle_fac_list)) - continue; s390_kernel_write(instr, replacement, a->instrlen); } } - -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) -{ - if (!alt_instr_disabled) - __apply_alternatives(start, end); -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -void __init apply_alternative_instructions(void) -{ - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - -static void do_sync_core(void *info) -{ - sync_core(); -} - -void text_poke_sync(void) -{ - on_each_cpu(do_sync_core, NULL, 1); -} - -void text_poke_sync_lock(void) -{ - cpus_read_lock(); - text_poke_sync(); - cpus_read_unlock(); -} diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 26bb45d0e6f1..ffa0dd2dbaac 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -28,6 +28,7 @@ int main(void) BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); + OFFSET(__TI_sie, task_struct, thread_info.sie); BLANK(); /* pt_regs offsets */ OFFSET(__PT_PSW, pt_regs, psw); @@ -114,7 +115,7 @@ int main(void) OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_PCPU, lowcore, pcpu); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); @@ -186,5 +187,7 @@ int main(void) #endif OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + + OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 467ed4dba817..14d324865e33 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); +decompressor_handled_param(relocate_lowcore); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -190,13 +191,6 @@ static noinline __init void setup_lowcore_early(void) get_lowcore()->preempt_count = INIT_PREEMPT_COUNT; } -static noinline __init void setup_facility_list(void) -{ - memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); - if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) - __clear_facility(82, alt_stfle_fac_list); -} - static __init void detect_diag9c(void) { unsigned int cpu_address; @@ -291,7 +285,6 @@ void __init startup_init(void) lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - setup_facility_list(); detect_machine_type(); setup_arch_string(); setup_boot_command_line(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 454b6b92c7f8..749410cfdbc0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-extable.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/dwarf.h> @@ -28,49 +28,54 @@ #include <asm/setup.h> #include <asm/nmi.h> #include <asm/nospec-insn.h> +#include <asm/lowcore.h> _LPP_OFFSET = __LC_LPP .macro STBEAR address - ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193) .endm .macro LBEAR address - ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm - .macro LPSWEY address,lpswe - ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193 + .macro LPSWEY address, lpswe + ALTERNATIVE_2 "b \lpswe;nopr", \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \ + __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ + ALT_LOWCORE .endm - .macro MBEAR reg - ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 + .macro MBEAR reg, lowcore + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\ + ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea + .macro CHECK_STACK savearea, lowcore #ifdef CONFIG_CHECK_STACK tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - lghi %r14,\savearea + la %r14,\savearea(\lowcore) jz stack_overflow #endif .endm - .macro CHECK_VMAP_STACK savearea,oklabel + .macro CHECK_VMAP_STACK savearea, lowcore, oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET - clg %r14,__LC_KERNEL_STACK + clg %r14,__LC_KERNEL_STACK(\lowcore) je \oklabel - clg %r14,__LC_ASYNC_STACK + clg %r14,__LC_ASYNC_STACK(\lowcore) je \oklabel - clg %r14,__LC_MCCK_STACK + clg %r14,__LC_MCCK_STACK(\lowcore) je \oklabel - clg %r14,__LC_NODAT_STACK + clg %r14,__LC_NODAT_STACK(\lowcore) je \oklabel - clg %r14,__LC_RESTART_STACK + clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel - lghi %r14,\savearea + la %r14,\savearea(\lowcore) j stack_overflow #else j \oklabel @@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82) .endm .macro BPON - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm .macro BPENTER tif_ptr,tif_mask ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ - "j .+12; nop; nop", 82 + "j .+12; nop; nop", ALT_SPEC(82) .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ - "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm #if IS_ENABLED(CONFIG_KVM) - .macro SIEEXIT sie_control - lg %r9,\sie_control # get control block pointer - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + .macro SIEEXIT sie_control,lowcore + lg %r9,\sie_control # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lg %r9,__LC_CURRENT(\lowcore) + mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm) stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lg %r15,0(%r4,%r3) # start of kernel stack of next agr %r15,%r5 # end of kernel stack of next - stg %r3,__LC_CURRENT # store task struct of next - stg %r15,__LC_KERNEL_STACK # store end of kernel stack + GET_LC %r13 + stg %r3,__LC_CURRENT(%r13) # store task struct of next + stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next + mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -183,15 +190,16 @@ SYM_FUNC_END(__switch_to_asm) */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - lg %r12,__LC_CURRENT + GET_LC %r13 + lg %r14,__LC_CURRENT(%r13) stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 - mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 - oi __LC_CPU_FLAGS+7,_CIF_SIE + mvi __TI_sie(%r14),1 lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -210,8 +218,10 @@ SYM_FUNC_START(__sie64a) .Lsie_skip: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + GET_LC %r14 + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lg %r14,__LC_CURRENT(%r14) + mvi __TI_sie(%r14),0 # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -254,14 +264,15 @@ EXPORT_SYMBOL(sie_exit) */ SYM_CODE_START(system_call) - stpt __LC_SYS_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_SYNC + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF lghi %r14,0 .Lsysc_per: - STBEAR __LC_LAST_BREAK - lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK + STBEAR __LC_LAST_BREAK(%r13) + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) # clear user controlled register to prevent speculative use @@ -276,17 +287,17 @@ SYM_CODE_START(system_call) xgr %r10,%r10 xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC - MBEAR %r2 + mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13) + MBEAR %r2,%r13 lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(system_call) @@ -297,12 +308,13 @@ SYM_CODE_START(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + GET_LC %r13 + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(ret_from_fork) @@ -311,39 +323,40 @@ SYM_CODE_END(ret_from_fork) */ SYM_CODE_START(pgm_check_handler) - stpt __LC_SYS_ENTER_TIMER + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_SYNC lgr %r10,%r15 - lmg %r8,%r9,__LC_PGM_OLD_PSW + lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) j 3f # -> fault in user space .Lpgm_skip_asce: 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception + tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -3: lg %r15,__LC_KERNEL_STACK + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f +3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13) + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - ltg %r12,__LC_GMAP + ltg %r12,__LC_GMAP(%r13) jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10) + SIEEXIT __SF_SIE_CONTROL(%r10),%r13 #endif 5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -359,11 +372,11 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -372,11 +385,11 @@ SYM_CODE_START(pgm_check_handler) # single stepped system call # .Lpgm_svcper: - mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13) larl %r14,.Lsysc_per - stg %r14,__LC_RETURN_PSW+8 + stg %r14,__LC_RETURN_PSW+8(%r13) lghi %r14,1 - LBEAR __LC_PGM_LAST_BREAK + LBEAR __LC_PGM_LAST_BREAK(%r13) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per SYM_CODE_END(pgm_check_handler) @@ -385,25 +398,27 @@ SYM_CODE_END(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler SYM_CODE_START(\name) - stckf __LC_INT_CLOCK - stpt __LC_SYS_ENTER_TIMER - STBEAR __LC_LAST_BREAK + STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC + GET_LC %r13 + stckf __LC_INT_CLOCK(%r13) + stpt __LC_SYS_ENTER_TIMER(%r13) + STBEAR __LC_LAST_BREAK(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lmg %r8,%r9,\lc_old_psw + lmg %r8,%r9,\lc_old_psw(%r13) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA_ASYNC +0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK +1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -417,18 +432,18 @@ SYM_CODE_START(\name) xgr %r7,%r7 xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - MBEAR %r11 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13) + MBEAR %r11,%r13 stmg %r8,%r9,__PT_PSW(%r11) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,\handler - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -443,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq */ SYM_CODE_START(mcck_int_handler) BPOFF - lmg %r8,%r9,__LC_MCK_OLD_PSW - TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE + GET_LC %r13 + lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID jno .Lmcck_panic # control registers invalid -> panic ptlb - lghi %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID + lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYS_ENTER_TIMER - clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER(%r13) + clc 0(8,%r14),__LC_EXIT_TIMER(%r13) jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + la %r14,__LC_EXIT_TIMER(%r13) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13) jl 2f - la %r14,__LC_LAST_UPDATE_TIMER + la %r14,__LC_LAST_UPDATE_TIMER(%r13) 2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) +3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? jnz .Lmcck_user - TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz .Lmcck_user - # Need to compare the address instead of a CIF_SIE* flag. + # Need to compare the address instead of __TI_SIE flag. # Otherwise there would be a race between setting the flag # and entering SIE (or leaving and clearing the flag). This # would cause machine checks targeted at the guest to be @@ -480,18 +497,19 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST + lg %r10,__LC_PCPU + oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif .Lmcck_user: - lg %r15,__LC_MCCK_STACK + lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r14,__LC_GPREGS_SAVE_AREA+64 - stmg %r0,%r7,__PT_R0(%r11) + lay %r14,__LC_GPREGS_SAVE_AREA(%r13) + mvc __PT_R0(128,%r11),0(%r14) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -501,7 +519,6 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 - mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -509,12 +526,13 @@ SYM_CODE_START(mcck_int_handler) brasl %r14,s390_do_machine_check lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? jno 0f BPON - stpt __LC_EXIT_TIMER -0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 + stpt __LC_EXIT_TIMER(%r13) +0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\ + ALT_FACILITY(193) LBEAR 0(%r12) lmg %r11,%r15,__PT_R11(%r11) LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE @@ -550,7 +568,7 @@ SYM_CODE_START(mcck_int_handler) SYM_CODE_END(mcck_int_handler) SYM_CODE_START(restart_int_handler) - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f @@ -558,15 +576,17 @@ SYM_CODE_START(restart_int_handler) 0: larl %r15,daton_psw lpswe 0(%r15) # turn dat on, keep irqs off .Ldaton: - lg %r15,__LC_RESTART_STACK + GET_LC %r15 + lg %r15,__LC_RESTART_STACK(%r15) xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW + GET_LC %r13 + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13) + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - lg %r1,__LC_RESTART_FN # load fn, parm & source cpu - lg %r2,__LC_RESTART_DATA - lgf %r3,__LC_RESTART_SOURCE + lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA(%r13) + lgf %r3,__LC_RESTART_SOURCE(%r13) ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu @@ -588,7 +608,8 @@ SYM_CODE_END(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ SYM_CODE_START(stack_overflow) - lg %r15,__LC_NODAT_STACK # change to panic stack + GET_LC %r15 + lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index fa90bbdc5ef9..6f2e87920288 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags) int mask; if (flags & KERNEL_FPC) - fpu_lfpc(&state->fpc); + fpu_lfpc_safe(&state->fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_V0V7) load_fp_regs_vx(state->vxrs); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 45413b04efc5..396034b2fe67 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/lowcore.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> @@ -18,14 +19,15 @@ __HEAD SYM_CODE_START(startup_continue) larl %r1,tod_clock_base - mvc 0(16,%r1),__LC_BOOT_CLOCK + GET_LC %r2 + mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # larl %r14,init_task - stg %r14,__LC_CURRENT + stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET - stg %r15,__LC_KERNEL_STACK + stg %r15,__LC_KERNEL_STACK(%r2) brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 3a7d6e172211..f17bb7bf9392 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2112,7 +2112,7 @@ void do_restart(void *arg) tracing_off(); debug_locks_off(); lgr_info_log(); - smp_call_online_cpu(__do_restart, arg); + smp_call_ipl_cpu(__do_restart, arg); } /* on halt */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f4cf65da6d49..8f681ccfb83a 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data) * This need to be done *after* s390_reset_system set the * prefix register of this CPU to zero */ - memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), + memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); call_nodat(1, int, purgatory, int, 1); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9b8c24ebb008..e11ec15960a1 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -4,6 +4,8 @@ #include <linux/cpu.h> #include <asm/nospec-branch.h> +int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP); + static int __init nobp_setup_early(char *str) { bool enabled; @@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str) * The user explicitly requested nobp=1, enable it and * disable the expoline support. */ - __set_facility(82, alt_stfle_fac_list); + nobp = 1; if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_disable = 1; } else { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } return 0; } @@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early); static int __init nospec_setup_early(char *str) { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; return 0; } early_param("nospec", nospec_setup_early); @@ -40,7 +42,7 @@ static int __init nospec_report(void) pr_info("Spectre V2 mitigation: etokens\n"); if (nospec_uses_trampoline()) pr_info("Spectre V2 mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } @@ -66,14 +68,14 @@ void __init nospec_auto_detect(void) */ if (__is_defined(CC_USING_EXPOLINE)) nospec_disable = 1; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } else if (__is_defined(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. */ nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } /* * If the kernel has not been compiled with expolines the @@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } if (str && !strncmp(str, "off", 3)) nospec_disable = 1; diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 52d4353188ad..a95188818637 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Mitigation: etokens\n"); if (nospec_uses_trampoline()) return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) return sprintf(buf, "Mitigation: limited branch prediction\n"); return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1434642e9cba..6968be98af11 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) struct cf_trailer_entry *trailer_start, *trailer_stop; struct cf_ctrset_entry *ctrstart, *ctrstop; size_t offset = 0; + int i; - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; - do { + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + /* Counter set not authorized */ + if (!(auth & cpumf_ctr_ctl[i])) + continue; + /* Counter set size zero was not saved */ + if (!cpum_cf_read_setsize(i)) + continue; + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { pr_err_once("cpum_cf_diag counter set compare error " "in set %i\n", ctrstart->set); return 0; } - auth &= ~cpumf_ctr_ctl[ctrstart->set]; if (ctrstart->def == CF_DIAG_CTRSET_DEF) { cfdiag_diffctrset((u64 *)(ctrstart + 1), (u64 *)(ctrstop + 1), ctrstart->ctr); offset += ctrstart->ctr * sizeof(u64) + sizeof(*ctrstart); } - } while (ctrstart->def && auth); + } /* Save time_stamp from start of event in stop's trailer */ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 65c1464eea4f..5ce9a795a0fe 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -17,7 +17,8 @@ #include <linux/mm_types.h> #include <linux/delay.h> #include <linux/cpu.h> - +#include <linux/smp.h> +#include <asm/text-patching.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) } } +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} + /* * cpu_init - initializes state that is per-CPU. */ diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 88087a32ebc6..69fcaf54d5ca 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,7 @@ #include <asm/asm-offsets.h> #include <asm/nospec-insn.h> #include <asm/sigp.h> +#include <asm/lowcore.h> GEN_BR_THUNK %r9 @@ -20,20 +21,15 @@ # r3 = Parameter for function # SYM_CODE_START(store_status) - /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_RESTART + STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA /* General purpose registers */ - lghi %r1,__LC_GPREGS_SAVE_AREA - stmg %r0,%r15,0(%r1) - mvc 8(8,%r1),__LC_SAVE_AREA_RESTART + GET_LC %r13 /* Control registers */ - lghi %r1,__LC_CREGS_SAVE_AREA - stctg %c0,%c15,0(%r1) + stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13) /* Access registers */ - lghi %r1,__LC_AREGS_SAVE_AREA - stam %a0,%a15,0(%r1) + stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13) /* Floating point registers */ - lghi %r1,__LC_FPREGS_SAVE_AREA + lay %r1,__LC_FPREGS_SAVE_AREA(%r13) std %f0, 0x00(%r1) std %f1, 0x08(%r1) std %f2, 0x10(%r1) @@ -51,21 +47,21 @@ SYM_CODE_START(store_status) std %f14,0x70(%r1) std %f15,0x78(%r1) /* Floating point control register */ - lghi %r1,__LC_FP_CREG_SAVE_AREA + lay %r1,__LC_FP_CREG_SAVE_AREA(%r13) stfpc 0(%r1) /* CPU timer */ - lghi %r1,__LC_CPU_TIMER_SAVE_AREA + lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13) stpt 0(%r1) /* Store prefix register */ - lghi %r1,__LC_PREFIX_SAVE_AREA + lay %r1,__LC_PREFIX_SAVE_AREA(%r13) stpx 0(%r1) /* Clock comparator - seven bytes */ - lghi %r1,__LC_CLOCK_COMP_SAVE_AREA larl %r4,clkcmp stckc 0(%r4) + lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13) mvc 1(7,%r1),1(%r4) /* Program status word */ - lghi %r1,__LC_PSW_SAVE_AREA + lay %r1,__LC_PSW_SAVE_AREA(%r13) epsw %r4,%r5 st %r4,0(%r1) st %r5,4(%r1) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3993f4caf224..4ec99f73fa27 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable); struct physmem_info __bootdata(physmem_info); struct vm_layout __bootdata_preserved(vm_layout); -EXPORT_SYMBOL_GPL(vm_layout); +EXPORT_SYMBOL(vm_layout); int __bootdata_preserved(__kaslr_enabled); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); -u64 alt_stfle_fac_list[16]; struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; @@ -406,6 +405,7 @@ static void __init setup_lowcore(void) panic("%s: Failed to allocate %zu bytes align=%zx\n", __func__, sizeof(*lc), sizeof(*lc)); + lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0); lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; lc->restart_psw.addr = __pa(restart_int_handler); lc->external_new_psw.mask = PSW_KERNEL_BITS; @@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p) else pr_info("Linux is running as a guest in 64-bit mode\n"); + if (have_relocated_lowcore()) + pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); + log_component_list(); /* Have one command line that is parsed and saved in /proc/cmdline */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c3c54adf67bc..fbba37ec53cf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,16 +74,15 @@ enum { CPU_STATE_CONFIGURED, }; -struct pcpu { - unsigned long ec_mask; /* bit mask for ec_xxx functions */ - unsigned long ec_clk; /* sigp timestamp for ec_xxx */ - signed char state; /* physical cpu state */ - signed char polarization; /* physical polarization */ - u16 address; /* physical cpu address */ -}; - static u8 boot_core_type; -static struct pcpu pcpu_devices[NR_CPUS]; +DEFINE_PER_CPU(struct pcpu, pcpu_devices); +/* + * Pointer to the pcpu area of the boot CPU. This is required when a restart + * interrupt is triggered on an offline CPU. For that case accessing percpu + * data with the common primitives does not work, since the percpu offset is + * stored in a non existent lowcore. + */ +static struct pcpu *ipl_pcpu; unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -174,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) int cpu; for_each_cpu(cpu, mask) - if (pcpu_devices[cpu].address == address) - return pcpu_devices + cpu; + if (per_cpu(pcpu_devices, cpu).address == address) + return &per_cpu(pcpu_devices, cpu); return NULL; } @@ -230,13 +229,11 @@ out: return -ENOMEM; } -static void pcpu_free_lowcore(struct pcpu *pcpu) +static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; async_stack = lc->async_stack - STACK_INIT_OFFSET; @@ -259,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->pcpu = (unsigned long)pcpu; lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -277,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) arch_spin_lock_setup(cpu); } -static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +static void pcpu_attach_task(int cpu, struct task_struct *tsk) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)tsk; @@ -296,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->steal_timer = 0; } -static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; lc->restart_source = -1U; - pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); + pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); } typedef void (pcpu_delegate_fn)(void *); @@ -320,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, +static void pcpu_delegate(struct pcpu *pcpu, int cpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; - lc = lowcore_ptr[pcpu - pcpu_devices]; + lc = lowcore_ptr[cpu]; source_cpu = stap(); if (pcpu->address == source_cpu) { @@ -377,38 +371,22 @@ static int pcpu_set_smt(unsigned int mtid) smp_cpu_mt_shift = 0; while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) smp_cpu_mt_shift++; - pcpu_devices[0].address = stap(); + per_cpu(pcpu_devices, 0).address = stap(); } return cc; } /* - * Call function on an online CPU. - */ -void smp_call_online_cpu(void (*func)(void *), void *data) -{ - struct pcpu *pcpu; - - /* Use the current cpu if it is online. */ - pcpu = pcpu_find_address(cpu_online_mask, stap()); - if (!pcpu) - /* Use the first online cpu. */ - pcpu = pcpu_devices + cpumask_first(cpu_online_mask); - pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); -} - -/* * Call function on the ipl CPU. */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; - if (pcpu_devices[0].address == stap()) + if (ipl_pcpu->address == stap()) lc = get_lowcore(); - pcpu_delegate(&pcpu_devices[0], func, data, - lc->nodat_stack); + pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -416,21 +394,21 @@ int smp_find_processor_id(u16 address) int cpu; for_each_present_cpu(cpu) - if (pcpu_devices[cpu].address == address) + if (per_cpu(pcpu_devices, cpu).address == address) return cpu; return -1; } void schedule_mcck_handler(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); } bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; - if (pcpu_running(pcpu_devices + cpu)) + if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) return false; return true; } @@ -442,7 +420,7 @@ void notrace smp_yield_cpu(int cpu) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); + : : "d" (per_cpu(pcpu_devices, cpu).address)); } EXPORT_SYMBOL_GPL(smp_yield_cpu); @@ -463,7 +441,7 @@ void notrace smp_emergency_stop(void) end = get_tod_clock() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && @@ -472,7 +450,7 @@ void notrace smp_emergency_stop(void) } while (get_tod_clock() < end) { for_each_cpu(cpu, &cpumask) - if (pcpu_stopped(pcpu_devices + cpu)) + if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); if (cpumask_empty(&cpumask)) break; @@ -487,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop); */ void smp_send_stop(void) { + struct pcpu *pcpu; int cpu; /* Disable all interrupts/machine checks */ @@ -502,8 +481,9 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; - pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu_devices + cpu)) + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -517,7 +497,7 @@ static void smp_handle_ext_call(void) unsigned long bits; /* handle bit signal external calls */ - bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) @@ -542,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } /* @@ -557,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu) */ void arch_smp_send_reschedule(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_schedule); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); } #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); } #endif @@ -575,7 +555,7 @@ int smp_store_status(int cpu) struct pcpu *pcpu; unsigned long pa; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); lc = lowcore_ptr[cpu]; pa = __pa(&lc->floating_pt_save_area); if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, @@ -683,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void) void smp_cpu_set_polarization(int cpu, int val) { - pcpu_devices[cpu].polarization = val; + per_cpu(pcpu_devices, cpu).polarization = val; } int smp_cpu_get_polarization(int cpu) { - return pcpu_devices[cpu].polarization; + return per_cpu(pcpu_devices, cpu).polarization; } int smp_cpu_get_cpu_address(int cpu) { - return pcpu_devices[cpu].address; + return per_cpu(pcpu_devices, cpu).address; } static void __ref smp_get_core_info(struct sclp_core_info *info, int early) @@ -732,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { if (pcpu_find_address(cpu_present_mask, address + i)) continue; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); pcpu->address = address + i; if (configured) pcpu->state = CPU_STATE_CONFIGURED; @@ -767,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) * that all SMT threads get subsequent logical CPU numbers. */ if (early) { - core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; for (i = 0; i < info->configured; i++) { core = &info->core[i]; if (core->core_id == core_id) { @@ -867,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); int rc; if (pcpu->state != CPU_STATE_CONFIGURED) @@ -885,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) */ system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, tidle); - pcpu_start_fn(pcpu, smp_start_secondary, NULL); + pcpu_attach_task(cpu, tidle); + pcpu_start_fn(cpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); @@ -931,18 +911,19 @@ void __cpu_die(unsigned int cpu) struct pcpu *pcpu; /* Wait until target cpu is down */ - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); while (!pcpu_stopped(pcpu)) cpu_relax(); - pcpu_free_lowcore(pcpu); + pcpu_free_lowcore(pcpu, cpu); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + pcpu->flags = 0; } void __noreturn cpu_die(void) { idle_task_exit(); - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); for (;;) ; } @@ -972,11 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { - struct pcpu *pcpu = pcpu_devices; + struct lowcore *lc = get_lowcore(); WARN_ON(!cpu_present(0) || !cpu_online(0)); - pcpu->state = CPU_STATE_CONFIGURED; - get_lowcore()->percpu_offset = __per_cpu_offset[0]; + lc->percpu_offset = __per_cpu_offset[0]; + ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); + ipl_pcpu->state = CPU_STATE_CONFIGURED; + lc->pcpu = (unsigned long)ipl_pcpu; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } @@ -984,8 +967,8 @@ void __init smp_setup_processor_id(void) { struct lowcore *lc = get_lowcore(); - pcpu_devices[0].address = stap(); lc->cpu_nr = 0; + per_cpu(pcpu_devices, 0).address = stap(); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; } @@ -1007,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -1033,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_online(cpu + i)) goto out; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); rc = 0; switch (val) { case 0: @@ -1045,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_STANDBY; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1060,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_CONFIGURED; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1079,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); + return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -1105,14 +1088,14 @@ static struct attribute_group cpu_online_attr_group = { static int smp_cpu_online(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); } static int smp_cpu_pre_down(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); return 0; @@ -1125,7 +1108,7 @@ bool arch_cpu_is_hotpluggable(int cpu) int arch_register_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); int rc; c->hotpluggable = arch_cpu_is_hotpluggable(cpu); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index fa62fa0e369f..36db065c7cf7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -18,11 +18,22 @@ #include <asm/sections.h> #include <asm/uv.h> +#if !IS_ENABLED(CONFIG_KVM) +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + return 0; +} + +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + return 0; +} +#endif + /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); -#endif /* * uv_info contains both host and guest information but it's currently only @@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); EXPORT_SYMBOL(uv_info); -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); @@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page) return arch_make_folio_accessible(page_folio(page)); } EXPORT_SYMBOL_GPL(arch_make_page_accessible); -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = { static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST - val = prot_virt_guest; -#endif - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_guest); } static ssize_t uv_is_prot_virt_host(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#if IS_ENABLED(CONFIG_KVM) - val = prot_virt_host; -#endif - - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_host); } static struct kobj_attribute uv_prot_virt_guest = @@ -790,4 +786,3 @@ out_kobj: return rc; } device_initcall(uv_info_init); -#endif diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a1ce3925ec71..e67cd409b858 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -59,14 +59,6 @@ SECTIONS } :text = 0x0700 RO_DATA(PAGE_SIZE) - .data.rel.ro : { - *(.data.rel.ro .data.rel.ro.*) - } - .got : { - __got_start = .; - *(.got) - __got_end = .; - } . = ALIGN(PAGE_SIZE); _sdata = .; /* Start of data section */ @@ -80,6 +72,15 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) .data.rel : { *(.data.rel*) @@ -190,6 +191,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); @@ -219,6 +223,8 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) + QUAD(__alt_instructions) + QUAD(__alt_instructions_end) #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bf8534218af3..e680c6bf0c9d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); + u32 gd; + + if (!kvm->arch.gisa_int.origin) + return 0; + + gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 0c9a73a18826..9f86ad8fa8b4 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 45db5f47b22d..0a67fcee4414 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include <linux/ptdump.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/sort.h> #include <linux/mm.h> #include <linux/kfence.h> #include <linux/kasan.h> @@ -15,13 +16,15 @@ static unsigned long max_addr; struct addr_marker { + int is_start; unsigned long start_address; const char *name; }; enum address_markers_idx { - IDENTITY_BEFORE_NR = 0, - IDENTITY_BEFORE_END_NR, + KVA_NR = 0, + LOWCORE_START_NR, + LOWCORE_END_NR, AMODE31_START_NR, AMODE31_END_NR, KERNEL_START_NR, @@ -30,12 +33,22 @@ enum address_markers_idx { KFENCE_START_NR, KFENCE_END_NR, #endif - IDENTITY_AFTER_NR, - IDENTITY_AFTER_END_NR, + IDENTITY_START_NR, + IDENTITY_END_NR, VMEMMAP_NR, VMEMMAP_END_NR, VMALLOC_NR, VMALLOC_END_NR, +#ifdef CONFIG_KMSAN + KMSAN_VMALLOC_SHADOW_START_NR, + KMSAN_VMALLOC_SHADOW_END_NR, + KMSAN_VMALLOC_ORIGIN_START_NR, + KMSAN_VMALLOC_ORIGIN_END_NR, + KMSAN_MODULES_SHADOW_START_NR, + KMSAN_MODULES_SHADOW_END_NR, + KMSAN_MODULES_ORIGIN_START_NR, + KMSAN_MODULES_ORIGIN_END_NR, +#endif MODULES_NR, MODULES_END_NR, ABS_LOWCORE_NR, @@ -49,33 +62,44 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { - [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, - [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, - [AMODE31_START_NR] = {0, "Amode31 Area Start"}, - [AMODE31_END_NR] = {0, "Amode31 Area End"}, - [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [KVA_NR] = {0, 0, "Kernel Virtual Address Space"}, + [LOWCORE_START_NR] = {1, 0, "Lowcore Start"}, + [LOWCORE_END_NR] = {0, 0, "Lowcore End"}, + [IDENTITY_START_NR] = {1, 0, "Identity Mapping Start"}, + [IDENTITY_END_NR] = {0, 0, "Identity Mapping End"}, + [AMODE31_START_NR] = {1, 0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, 0, "Amode31 Area End"}, + [KERNEL_START_NR] = {1, (unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {0, (unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE - [KFENCE_START_NR] = {0, "KFence Pool Start"}, - [KFENCE_END_NR] = {0, "KFence Pool End"}, + [KFENCE_START_NR] = {1, 0, "KFence Pool Start"}, + [KFENCE_END_NR] = {0, 0, "KFence Pool End"}, +#endif + [VMEMMAP_NR] = {1, 0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, 0, "vmemmap Area End"}, + [VMALLOC_NR] = {1, 0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, 0, "vmalloc Area End"}, +#ifdef CONFIG_KMSAN + [KMSAN_VMALLOC_SHADOW_START_NR] = {1, 0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, 0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {1, 0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, 0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {1, 0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, 0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {1, 0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, 0, "Kmsan Modules Origins End"}, #endif - [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, - [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, - [VMEMMAP_NR] = {0, "vmemmap Area Start"}, - [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, - [VMALLOC_NR] = {0, "vmalloc Area Start"}, - [VMALLOC_END_NR] = {0, "vmalloc Area End"}, - [MODULES_NR] = {0, "Modules Area Start"}, - [MODULES_END_NR] = {0, "Modules Area End"}, - [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, - [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, - [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, - [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, + [MODULES_NR] = {1, 0, "Modules Area Start"}, + [MODULES_END_NR] = {0, 0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {1, 0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, 0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {1, 0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, 0, "Real Memory Copy Area End"}, #ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, - [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, + [KASAN_SHADOW_START_NR] = {1, KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {0, KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - { -1, NULL } + {1, -1UL, NULL} }; struct pg_state { @@ -143,6 +167,19 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level) +{ + struct seq_file *m = st->seq; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -166,9 +203,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, addr = max_addr; if (st->level == -1) { pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { note_prot_wx(st, addr); @@ -182,13 +217,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } } @@ -260,22 +289,25 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ -/* - * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple - * insertion sort to preserve the original order of markers with the same - * start address. - */ -static void sort_address_markers(void) +static int ptdump_cmp(const void *a, const void *b) { - struct addr_marker tmp; - int i, j; + const struct addr_marker *ama = a; + const struct addr_marker *amb = b; - for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { - tmp = address_markers[i]; - for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) - address_markers[j + 1] = address_markers[j]; - address_markers[j + 1] = tmp; - } + if (ama->start_address > amb->start_address) + return 1; + if (ama->start_address < amb->start_address) + return -1; + /* + * If the start addresses of two markers are identical consider the + * marker which defines the start of an area higher than the one which + * defines the end of an area. This keeps pairs of markers sorted. + */ + if (ama->is_start) + return 1; + if (amb->is_start) + return -1; + return 0; } static int pt_dump_init(void) @@ -283,6 +315,8 @@ static int pt_dump_init(void) #ifdef CONFIG_KFENCE unsigned long kfence_start = (unsigned long)__kfence_pool; #endif + unsigned long lowcore = (unsigned long)get_lowcore(); + /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions @@ -290,7 +324,10 @@ static int pt_dump_init(void) */ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[LOWCORE_START_NR].start_address = lowcore; + address_markers[LOWCORE_END_NR].start_address = lowcore + sizeof(struct lowcore); + address_markers[IDENTITY_START_NR].start_address = __identity_base; + address_markers[IDENTITY_END_NR].start_address = __identity_base + ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; @@ -307,7 +344,18 @@ static int pt_dump_init(void) address_markers[KFENCE_START_NR].start_address = kfence_start; address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; #endif - sort_address_markers(); +#ifdef CONFIG_KMSAN + address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START; + address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END; + address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START; + address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END; + address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START; + address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END; + address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; + address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; +#endif + sort(address_markers, ARRAY_SIZE(address_markers) - 1, + sizeof(address_markers[0]), ptdump_cmp, NULL); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ddcd39ef4346..e3d258f9e726 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -108,6 +108,8 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; + if (MACHINE_HAS_NX) + system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } @@ -170,13 +172,6 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ } -void free_initmem(void) -{ - set_memory_rwnx((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - unsigned long memory_block_size_bytes(void) { /* diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 632c3a55feed..28a18c42ba99 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz } /* - * s390_kernel_write - write to kernel memory bypassing DAT + * __s390_kernel_write - write to kernel memory bypassing DAT * @dst: destination address * @src: source address * @size: number of bytes to copy @@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -notrace void *s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *__s390_kernel_write(void *dst, const void *src, size_t size) { void *tmp = dst; unsigned long flags; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 41c714e21292..665b8228afeb 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -661,7 +661,6 @@ void __init vmem_map_init(void) { __set_memory_rox(_stext, _etext); __set_memory_ro(_etext, __end_rodata); - __set_memory_rox(_sinittext, _einittext); __set_memory_rox(__stext_amode31, __etext_amode31); /* * If the BEAR-enhancement facility is not installed the first @@ -670,16 +669,8 @@ void __init vmem_map_init(void) */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - if (debug_pagealloc_enabled()) { - /* - * Use RELOC_HIDE() as long as __va(0) translates to NULL, - * since performing pointer arithmetic on a NULL pointer - * has undefined behavior and generates compiler warnings. - */ - __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); - } - if (MACHINE_HAS_NX) - system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); + if (debug_pagealloc_enabled()) + __set_memory_4k(__va(0), __va(0) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 0ef83b6ac0db..84482a921332 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, } } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, + unsigned long *bit) { - struct zpci_dev *zdev = to_zpci(pdev); - unsigned int hwirq, msi_vecs, cpu; - unsigned long bit; - struct msi_desc *msi; - struct msi_msg msg; - int cpu_addr; - int rc, irq; - - zdev->aisb = -1UL; - zdev->msi_first_bit = -1U; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); - if (irq_delivery == DIRECTED) { /* Allocate cpu vector bits */ - bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); - if (bit == -1UL) + *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); + if (*bit == -1UL) return -EIO; } else { /* Allocate adapter summary indicator bit */ - bit = airq_iv_alloc_bit(zpci_sbv); - if (bit == -1UL) + *bit = airq_iv_alloc_bit(zpci_sbv); + if (*bit == -1UL) return -EIO; - zdev->aisb = bit; + zdev->aisb = *bit; /* Create adapter interrupt vector */ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); @@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -ENOMEM; /* Wire up shortcut pointer */ - zpci_ibv[bit] = zdev->aibv; + zpci_ibv[*bit] = zdev->aibv; /* Each function has its own interrupt vector */ - bit = 0; + *bit = 0; } + return 0; +} - /* Request MSI interrupts */ +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; + struct zpci_dev *zdev = to_zpci(pdev); + struct msi_desc *msi; + struct msi_msg msg; + unsigned long bit; + int cpu_addr; + int rc, irq; + + zdev->aisb = -1UL; + zdev->msi_first_bit = -1U; + + msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); + if (msi_vecs < nvec) { + pr_info("%s requested %d irqs, allocate system limit of %d", + pci_name(pdev), nvec, zdev->max_msi); + } + + rc = __alloc_airq(zdev, msi_vecs, &bit); + if (rc < 0) + return rc; + + /* + * Request MSI interrupts: + * When using MSI, nvec_used interrupt sources and their irq + * descriptors are controlled through one msi descriptor. + * Thus the outer loop over msi descriptors shall run only once, + * while two inner loops iterate over the interrupt vectors. + * When using MSI-X, each interrupt vector/irq descriptor + * is bound to exactly one msi descriptor (nvec_used is one). + * So the inner loops are executed once, while the outer iterates + * over the MSI-X descriptors. + */ hwirq = bit; msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); + irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); + irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; - rc = irq_set_msi_desc(irq, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq, &zpci_irq_chip, - handle_percpu_irq); + + for (i = 0; i < irqs_per_msi; i++) { + rc = irq_set_msi_desc_off(irq, i, msi); + if (rc) + return rc; + irq_set_chip_and_handler(irq + i, &zpci_irq_chip, + handle_percpu_irq); + } + msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { if (msi->affinity) @@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) msg.address_lo |= (cpu_addr << 8); for_each_possible_cpu(cpu) { - airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zpci_ibv[cpu], + hwirq + i, irq + i); } } else { msg.address_lo = zdev->msi_addr & 0xffffffff; - airq_iv_set_data(zdev->aibv, hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); } msg.address_hi = zdev->msi_addr >> 32; pci_write_msi_msg(irq, &msg); - hwirq++; + hwirq += irqs_per_msi; } zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = msi_vecs; + zdev->msi_nr_irqs = hwirq - bit; rc = zpci_set_irq(zdev); if (rc) return rc; - return (msi_vecs == nvec) ? 0 : msi_vecs; + return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; } void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); struct msi_desc *msi; + unsigned int i; int rc; /* Disable interrupts */ @@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); + for (i = 0; i < msi->nvec_used; i++) { + irq_set_msi_desc(msi->irq + i, NULL); + irq_free_desc(msi->irq + i); + } msi->msg.address_lo = 0; msi->msg.address_hi = 0; msi->msg.data = 0; diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index e24298a734be..a04cd13c6315 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) return NULL; } +#ifndef MIN #define MIN(a,b) ((a)<(b) ? (a):(b)) +#endif #define STRINGX(x) #x #define STRING(x) STRINGX(x) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc..de1df0cb45da 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -163,7 +163,7 @@ struct sev_config { */ use_cas : 1, - __reserved : 62; + __reserved : 61; }; static struct sev_config sev_cfg __read_mostly; diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 83073fa3c989..7093ee21c0d1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal @@ -385,7 +386,6 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 12f2a0c14d33..be01823b1bb4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1520,20 +1520,23 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + unsigned long *cntr_mask, *fixed_cntr_mask; + struct event_constraint *pebs_constraints; + struct cpu_hw_events *cpuc; u64 pebs, debugctl; - int cpu = smp_processor_id(); - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); - unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); - struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); - unsigned long flags; - int idx; + int cpu, idx; + + guard(irqsave)(); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + cntr_mask = hybrid(cpuc->pmu, cntr_mask); + fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); + pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); if (!*(u64 *)cntr_mask) return; - local_irq_save(flags); - if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -1577,7 +1580,6 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } - local_irq_restore(flags); } void x86_pmu_stop(struct perf_event *event, int flags) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index be58cfb012dd..9f116dfc4728 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -64,7 +64,7 @@ * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL,ARL,LNL + * RPL,SPR,MTL,ARL,LNL,SRF * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 @@ -693,7 +693,8 @@ static const struct cstate_model srf_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), - .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), }; diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index 6faaf27e8899..6cbd9ae58b21 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,6 +2,10 @@ #ifndef _ASM_X86_CMDLINE_H #define _ASM_X86_CMDLINE_H +#include <asm/setup.h> + +extern char builtin_cmdline[COMMAND_LINE_SIZE]; + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); int cmdline_find_option(const char *cmdline_ptr, const char *option, char *buffer, int bufsize); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 950a03e0181e..4a68cb3eba78 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1305,6 +1305,7 @@ struct kvm_arch { u8 vm_type; bool has_private_mem; bool has_protected_state; + bool pre_fault_allowed; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -2191,6 +2192,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, #define kvm_arch_has_private_mem(kvm) false #endif +#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) + static inline u16 kvm_read_ldt(void) { u16 ldt; diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index a053c1293975..68da67df304d 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu) #ifdef CONFIG_PARAVIRT /* - * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. * - * Native (and PV wanting native due to vCPU pinning) should disable this key. - * It is done in this backwards fashion to only have a single direction change, - * which removes ordering between native_pv_spin_init() and HV setup. + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. */ -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); /* * Shortcut for the queued_spin_lock_slowpath() function that allows diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 64fbd2dbc5b7..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,11 +62,6 @@ void xen_arch_unregister_cpu(int num); #ifdef CONFIG_PVH void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); -#ifdef CONFIG_XEN_PVH -void __init xen_reserve_extra_memory(struct boot_params *bootp); -#else -static inline void xen_reserve_extra_memory(struct boot_params *bootp) { } -#endif #endif /* Lazy mode for batching updates / context switch */ diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 6cfe762be28b..d5ef6215583b 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -19,7 +19,7 @@ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; /* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index be5889bded49..1e0fe5f8ab84 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -462,7 +462,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x40 ... 0x4f: - case 0x70 ... 0x7f: + case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index b3fa61d45352..0b69bfbf345d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -306,7 +306,7 @@ static void freq_invariance_enable(void) WARN_ON_ONCE(1); return; } - static_branch_enable(&arch_scale_freq_key); + static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } @@ -323,8 +323,10 @@ static void __init bp_init_freq_invariance(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (intel_set_max_freq_ratio()) + if (intel_set_max_freq_ratio()) { + guard(cpus_read_lock)(); freq_invariance_enable(); + } } static void disable_freq_invariance_workfn(struct work_struct *work) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 767bf1c71aad..2a2fc14955cd 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -609,7 +609,7 @@ void mtrr_save_state(void) { int first_cpu; - if (!mtrr_enabled()) + if (!mtrr_enabled() || !mtrr_state.have_fixed) return; first_cpu = cpumask_first(cpu_online_mask); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5358d43886ad..fec381533555 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -51,13 +51,12 @@ DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif -DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); void __init native_pv_lock_init(void) { - if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && - !boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_disable(&virt_spin_lock_key); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&virt_spin_lock_key); } static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d34cad9b7b1..6129dc2ba784 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -164,7 +164,7 @@ unsigned long saved_video_mode; static char __initdata command_line[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; bool builtin_cmdline_added __ro_after_init; #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4287a8071a3a..472a1537b7a9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -141,8 +141,8 @@ config KVM_AMD_SEV depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM select KVM_GENERIC_PRIVATE_MEM - select HAVE_KVM_GMEM_PREPARE - select HAVE_KVM_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 923e64903da9..913bfc96959c 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return HV_STATUS_ACCESS_DENIED; } static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {} -static inline void kvm_hv_free_pa_page(struct kvm *kvm) {} static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector) { return false; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2..5bb481aefcbc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, @@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* @@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, * split to ICR+ICR2 in userspace for backwards compatibility. */ if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4..928cf84778b0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, if (req_max_level) max_level = min(max_level, req_max_level); - return req_max_level; + return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, @@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, u64 end; int r; + if (!vcpu->kvm->arch.pre_fault_allowed) + return -EOPNOTSUPP; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) - return kvm_range_has_memory_attributes(kvm, start, end, attrs); + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a16c873b3232..714c517dd4b7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2276,30 +2276,24 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { struct sev_data_snp_launch_update fw_args = {0}; - bool assigned; + bool assigned = false; int level; - if (!kvm_mem_is_private(kvm, gfn)) { - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", - __func__, gfn); - ret = -EINVAL; - goto err; - } - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); if (ret || assigned) { pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", __func__, gfn, ret, assigned); - ret = -EINVAL; + ret = ret ? -EINVAL : -EEXIST; goto err; } if (src) { void *vaddr = kmap_local_pfn(pfn + i); - ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE); - if (ret) + if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { + ret = -EFAULT; goto err; + } kunmap_local(vaddr); } @@ -2549,6 +2543,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) data->gctx_paddr = __psp_pa(sev->snp_context); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + /* + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages + * can be given to the guest simply by marking the RMP entry as private. + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. + */ + if (!ret) + kvm->arch.pre_fault_allowed = true; + kfree(id_auth); e_free_id_block: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c115d26844f7..d6f252555ab3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) to_kvm_sev_info(kvm)->need_init = true; kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; } if (!pause_filter_count || !pause_filter_thresh) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c8cf6a37a..70219e406987 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void) int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); int err; value = (value & mask) | (msrs->values[slot].host & ~mask); @@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); static void drop_user_return_notifiers(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); if (msrs->registered) kvm_on_user_return(&msrs->urn); @@ -12646,6 +12644,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vm_type = type; kvm->arch.has_private_mem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; ret = kvm_page_track_init(kvm); if (ret) @@ -13641,19 +13642,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_SNP_VM; -} - +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); } #endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { kvm_x86_call(gmem_invalidate)(start, end); diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 384da1fdd5c6..c65cd5550454 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -207,18 +207,29 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, int cmdline_find_option_bool(const char *cmdline, const char *option) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + ret = __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option_bool(builtin_cmdline, COMMAND_LINE_SIZE, option); + + return ret; } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; + + ret = __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option(builtin_cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); - return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, - buffer, bufsize); + return ret; } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a314622aa093..d066aecf8aeb 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -88,12 +88,14 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) +#ifndef CONFIG_X86_64 + xor %ecx,%ecx +#endif check_range size=8 ASM_STAC #ifdef CONFIG_X86_64 UACCESS movq (%_ASM_AX),%rdx #else - xor %ecx,%ecx UACCESS movl (%_ASM_AX),%edx UACCESS movl 4(%_ASM_AX),%ecx #endif diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 93e54ba91fbf..f5931499c2d6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -110,7 +110,7 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) #define MAX_UNSHARED_PTRS_PER_PGD \ - max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) + MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 2e69abf4f852..851ec8f1363a 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!pmd) return NULL; - /* We can't do anything sensible if we hit a large mapping. */ + /* Large PMD mapping found */ if (pmd_leaf(*pmd)) { - WARN_ON(1); - return NULL; + /* Clear the PMD if we hit a large mapping from the first round */ + if (late_text) { + set_pmd(pmd, __pmd(0)); + } else { + WARN_ON_ONCE(1); + return NULL; + } } if (pmd_none(*pmd)) { @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void) if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) return; - target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false); if (WARN_ON(!target_pte)) return; @@ -301,7 +306,7 @@ enum pti_clone_level { static void pti_clone_pgtable(unsigned long start, unsigned long end, - enum pti_clone_level level) + enum pti_clone_level level, bool late_text) { unsigned long addr; @@ -374,14 +379,14 @@ pti_clone_pgtable(unsigned long start, unsigned long end, */ *target_pmd = *pmd; - addr += PMD_SIZE; + addr = round_up(addr + 1, PMD_SIZE); } else if (level == PTI_CLONE_PTE) { /* Walk the page-table down to the pte level */ pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); continue; } @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, return; /* Allocate PTE in the user page-table */ - target_pte = pti_user_pagetable_walk_pte(addr); + target_pte = pti_user_pagetable_walk_pte(addr, late_text); if (WARN_ON(!target_pte)) return; @@ -401,7 +406,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Clone the PTE */ *target_pte = *pte; - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); } else { BUG(); @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void) phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); pte_t *target_pte; - target_pte = pti_user_pagetable_walk_pte(va); + target_pte = pti_user_pagetable_walk_pte(va, false); if (WARN_ON(!target_pte)) return; @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void) start = CPU_ENTRY_AREA_BASE; end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); - pti_clone_pgtable(start, end, PTI_CLONE_PMD); + pti_clone_pgtable(start, end, PTI_CLONE_PMD, false); } #endif /* CONFIG_X86_64 */ @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry text and force it RO. */ -static void pti_clone_entry_text(void) +static void pti_clone_entry_text(bool late) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_CLONE_PMD); + PTI_LEVEL_KERNEL_IMAGE, late); } /* @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void) * pti_set_kernel_image_nonglobal() did to clear the * global bit. */ - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); + pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false); /* * pti_clone_pgtable() will set the global bit in any PMDs @@ -638,8 +643,15 @@ void __init pti_init(void) /* Undo all global bits from the init pagetables in head_64.S: */ pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ - pti_clone_entry_text(); + /* + * This is very early in boot. Device and Late initcalls can do + * modprobe before free_initmem() and mark_readonly(). This + * pti_clone_entry_text() allows those user-mode-helpers to function, + * but notably the text is still RW. + */ + pti_clone_entry_text(false); pti_setup_espfix64(); pti_setup_vsyscall(); } @@ -656,10 +668,11 @@ void pti_finalize(void) if (!boot_cpu_has(X86_FEATURE_PTI)) return; /* - * We need to clone everything (again) that maps parts of the - * kernel image. + * This is after free_initmem() (all initcalls are done) and we've done + * mark_readonly(). Text is now NX which might've split some PMDs + * relative to the early clone. */ - pti_clone_entry_text(); + pti_clone_entry_text(true); pti_clone_kernel_text(); debug_checkwx_user(); diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 8c2d4b8de25d..944e0290f2c0 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -75,9 +75,6 @@ static void __init init_pvh_bootparams(bool xen_guest) } else xen_raw_printk("Warning: Can fit ISA range into e820\n"); - if (xen_guest) - xen_reserve_extra_memory(&pvh_bootparams); - pvh_bootparams.hdr.cmd_line_ptr = pvh_start_info.cmdline_paddr; diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 27a2a02ef8fb..728a4366ca85 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -9,6 +9,7 @@ #include <asm/io_apic.h> #include <asm/hypervisor.h> #include <asm/e820/api.h> +#include <asm/setup.h> #include <xen/xen.h> #include <asm/xen/interface.h> @@ -27,54 +28,6 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); -void __init xen_pvh_init(struct boot_params *boot_params) -{ - u32 msr; - u64 pfn; - - xen_pvh = 1; - xen_domain_type = XEN_HVM_DOMAIN; - xen_start_flags = pvh_start_info.flags; - - msr = cpuid_ebx(xen_cpuid_base() + 2); - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - - if (xen_initial_domain()) - x86_init.oem.arch_setup = xen_add_preferred_consoles; - x86_init.oem.banner = xen_banner; - - xen_efi_init(boot_params); - - if (xen_initial_domain()) { - struct xen_platform_op op = { - .cmd = XENPF_get_dom0_console, - }; - int ret = HYPERVISOR_platform_op(&op); - - if (ret > 0) - xen_init_vga(&op.u.dom0_console, - min(ret * sizeof(char), - sizeof(op.u.dom0_console)), - &boot_params->screen_info); - } -} - -void __init mem_map_via_hcall(struct boot_params *boot_params_p) -{ - struct xen_memory_map memmap; - int rc; - - memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); - set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); - if (rc) { - xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); - BUG(); - } - boot_params_p->e820_entries = memmap.nr_entries; -} - /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * @@ -89,8 +42,9 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p) * hypervisor should notify us which memory ranges are suitable for creating * foreign mappings, but that's not yet implemented. */ -void __init xen_reserve_extra_memory(struct boot_params *bootp) +static void __init pvh_reserve_extra_memory(void) { + struct boot_params *bootp = &boot_params; unsigned int i, ram_pages = 0, extra_pages; for (i = 0; i < bootp->e820_entries; i++) { @@ -141,3 +95,58 @@ void __init xen_reserve_extra_memory(struct boot_params *bootp) xen_add_extra_mem(PFN_UP(e->addr), pages); } } + +static void __init pvh_arch_setup(void) +{ + pvh_reserve_extra_memory(); + + if (xen_initial_domain()) + xen_add_preferred_consoles(); +} + +void __init xen_pvh_init(struct boot_params *boot_params) +{ + u32 msr; + u64 pfn; + + xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; + xen_start_flags = pvh_start_info.flags; + + msr = cpuid_ebx(xen_cpuid_base() + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + x86_init.oem.arch_setup = pvh_arch_setup; + x86_init.oem.banner = xen_banner; + + xen_efi_init(boot_params); + + if (xen_initial_domain()) { + struct xen_platform_op op = { + .cmd = XENPF_get_dom0_console, + }; + int ret = HYPERVISOR_platform_op(&op); + + if (ret > 0) + xen_init_vga(&op.u.dom0_console, + min(ret * sizeof(char), + sizeof(op.u.dom0_console)), + &boot_params->screen_info); + } +} + +void __init mem_map_via_hcall(struct boot_params *boot_params_p) +{ + struct xen_memory_map memmap; + int rc; + + memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); + set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc) { + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); + BUG(); + } + boot_params_p->e820_entries = memmap.nr_entries; +} diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index d4cefd8a9af4..10c660fae8b3 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,8 +54,9 @@ struct mc_debug_data { static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static struct mc_debug_data mc_debug_data_early __initdata; -static struct mc_debug_data __percpu *mc_debug_data __refdata = +static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) = &mc_debug_data_early; +static struct mc_debug_data __percpu *mc_debug_data_ptr; DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); static struct static_key mc_debug __ro_after_init; @@ -70,16 +71,20 @@ static int __init xen_parse_mc_debug(char *arg) } early_param("xen_mc_debug", xen_parse_mc_debug); +void mc_percpu_init(unsigned int cpu) +{ + per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu); +} + static int __init mc_debug_enable(void) { - struct mc_debug_data __percpu *mcdb; unsigned long flags; if (!mc_debug_enabled) return 0; - mcdb = alloc_percpu(struct mc_debug_data); - if (!mcdb) { + mc_debug_data_ptr = alloc_percpu(struct mc_debug_data); + if (!mc_debug_data_ptr) { pr_err("xen_mc_debug inactive\n"); static_key_slow_dec(&mc_debug); return -ENOMEM; @@ -88,7 +93,7 @@ static int __init mc_debug_enable(void) /* Be careful when switching to percpu debug data. */ local_irq_save(flags); xen_mc_flush(); - mc_debug_data = mcdb; + mc_percpu_init(0); local_irq_restore(flags); pr_info("xen_mc_debug active\n"); @@ -150,7 +155,7 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); if (static_key_false(&mc_debug)) { - mcdb = this_cpu_ptr(mc_debug_data); + mcdb = __this_cpu_read(mc_debug_data); memcpy(mcdb->entries, b->entries, b->mcidx * sizeof(struct multicall_entry)); } @@ -230,7 +235,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.mc = &b->entries[b->mcidx]; if (static_key_false(&mc_debug)) { - struct mc_debug_data *mcdb = this_cpu_ptr(mc_debug_data); + struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data); mcdb->caller[b->mcidx] = __builtin_return_address(0); mcdb->argsz[b->mcidx] = args; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a0c3e77e3d5b..806ddb2391d9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -690,6 +690,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long maxmem_pages; int i; int op; @@ -761,8 +762,8 @@ char * __init xen_memory_setup(void) * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), - extra_pages, max_pages - max_pfn); + maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)); + extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ea57f728b89..6863d3da7dec 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -305,6 +305,7 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) return rc; xen_pmu_init(cpu); + mc_percpu_init(cpu); /* * Why is this a BUG? If the hypercall fails then everything can be diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index e7775dff9452..0cf16fc79e0b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -257,6 +257,9 @@ void xen_mc_callback(void (*fn)(void *), void *data); */ struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); +/* Do percpu data initialization for multicalls. */ +void mc_percpu_init(unsigned int cpu); + extern bool is_xen_pmu; irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0b..2cafcf11ee8b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; + unsigned long flags; struct blk_mq_tags *tags = hctx->tags; /* @@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) return; } - spin_lock_irq(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); users = tags->active_queues + 1; WRITE_ONCE(tags->active_queues, users); blk_mq_update_wake_batch(tags, users); - spin_unlock_irq(&tags->lock); + spin_unlock_irqrestore(&tags->lock, flags); } /* diff --git a/block/blk-throttle.c b/block/blk-throttle.c index dc6140fa3de0..6943ec720f39 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) -/* We measure latency for request size from <= 4k to >= 1M */ -#define LATENCY_BUCKET_SIZE 9 - -struct latency_bucket { - unsigned long total_latency; /* ns / 1024 */ - int samples; -}; - struct throtl_data { /* service tree for active throtl groups */ @@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) return tg->iops[rw]; } -#define request_bucket_index(sectors) \ - clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) - /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported diff --git a/block/genhd.c b/block/genhd.c index 4dc95a463505..1c05dd4c6980 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -663,12 +663,12 @@ void del_gendisk(struct gendisk *disk) */ if (!test_bit(GD_DEAD, &disk->state)) blk_report_disk_dead(disk, false); - __blk_mark_disk_dead(disk); /* * Drop all partitions now that the disk is marked dead. */ mutex_lock(&disk->open_mutex); + __blk_mark_disk_dead(disk); xa_for_each_start(&disk->part_tbl, idx, part, 1) drop_partition(part); mutex_unlock(&disk->open_mutex); diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 2133085deda7..1c5218b79fc2 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -188,13 +188,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, u8 acpi_ns_is_locked); void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function); -void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, - acpi_adr_space_type space_id); - acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index dc6004daf624..cf53b9535f18 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,6 +20,10 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ +static void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, + acpi_adr_space_type space_id); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -61,6 +65,7 @@ acpi_status acpi_ev_initialize_op_regions(void) acpi_gbl_default_address_spaces [i])) { acpi_ev_execute_reg_methods(acpi_gbl_root_node, + ACPI_UINT32_MAX, acpi_gbl_default_address_spaces [i], ACPI_REG_CONNECT); } @@ -668,6 +673,7 @@ cleanup1: * FUNCTION: acpi_ev_execute_reg_methods * * PARAMETERS: node - Namespace node for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * function - Passed to _REG: On (1) or Off (0) * @@ -679,7 +685,7 @@ cleanup1: ******************************************************************************/ void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function) { struct acpi_reg_walk_info info; @@ -713,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, * regions and _REG methods. (i.e. handlers must be installed for all * regions of this Space ID before we can run any _REG methods) */ - (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, + (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &info, NULL); @@ -814,7 +820,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -void +static void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 624361a5f34d..95f78383bbdb 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -85,7 +85,8 @@ acpi_install_address_space_handler_internal(acpi_handle device, /* Run all _REG methods for this address space */ if (run_reg) { - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id, + ACPI_REG_CONNECT); } unlock_and_exit: @@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * FUNCTION: acpi_execute_reg_methods * * PARAMETERS: device - Handle for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * * RETURN: Status @@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * ******************************************************************************/ acpi_status -acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +acpi_execute_reg_methods(acpi_handle device, u32 max_depth, + acpi_adr_space_type space_id) { struct acpi_namespace_node *node; acpi_status status; @@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, max_depth, space_id, + ACPI_REG_CONNECT); } else { status = AE_BAD_PARAMETER; } @@ -306,57 +310,3 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) - -/******************************************************************************* - * - * FUNCTION: acpi_execute_orphan_reg_method - * - * PARAMETERS: device - Handle for the device - * space_id - The address space ID - * - * RETURN: Status - * - * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI - * device. This is a _REG method that has no corresponding region - * within the device's scope. - * - ******************************************************************************/ -acpi_status -acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) -{ - struct acpi_namespace_node *node; - acpi_status status; - - ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); - - /* Parameter validation */ - - if (!device) { - return_ACPI_STATUS(AE_BAD_PARAMETER); - } - - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Convert and validate the device handle */ - - node = acpi_ns_validate_handle(device); - if (node) { - - /* - * If an "orphan" _REG method is present in the device's scope - * for the given address space ID, run it. - */ - - acpi_ev_execute_orphan_reg_method(node, space_id); - } else { - status = AE_BAD_PARAMETER; - } - - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - return_ACPI_STATUS(status); -} - -ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 299ec653388c..38d2f6e6b12b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { - acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; acpi_status status; acpi_ec_start(ec, false); if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { + acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; + acpi_ec_enter_noirq(ec); status = acpi_install_address_space_handler_no_reg(scope_handle, ACPI_ADR_SPACE_EC, @@ -1506,10 +1507,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); - if (scope_handle != ec->handle) - acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); - + acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } @@ -1724,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_device *device) } } +void acpi_ec_register_opregions(struct acpi_device *adev) +{ + if (first_ec && first_ec->handle != adev->handle) + acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC); +} + static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 601b670356e5..aadd4c218b32 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); +void acpi_ec_register_opregions(struct acpi_device *adev); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index 849c2bd820b9..f33194d1e43f 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config ACPI_NUMA - bool "NUMA support" - depends on NUMA - depends on (X86 || ARM64 || LOONGARCH) - default y if ARM64 + def_bool NUMA && !X86 config ACPI_HMAT bool "ACPI Heterogeneous Memory Attribute Table Support" diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index e3f26e71637a..44f91f2c6c5d 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -167,6 +167,19 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) } } break; + + case ACPI_SRAT_TYPE_RINTC_AFFINITY: + { + struct acpi_srat_rintc_affinity *p = + (struct acpi_srat_rintc_affinity *)header; + pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", + p->acpi_processor_uid, + p->proximity_domain, + (p->flags & ACPI_SRAT_RINTC_ENABLED) ? + "enabled" : "disabled"); + } + break; + default: pr_warn("Found unsupported SRAT entry (type = 0x%x)\n", header->type); @@ -450,6 +463,21 @@ acpi_parse_gi_affinity(union acpi_subtable_headers *header, } #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ +static int __init +acpi_parse_rintc_affinity(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *rintc_affinity; + + rintc_affinity = (struct acpi_srat_rintc_affinity *)header; + acpi_table_print_srat_entry(&header->common); + + /* let architecture-dependent part to do it */ + acpi_numa_rintc_affinity_init(rintc_affinity); + + return 0; +} + static int __init acpi_parse_srat(struct acpi_table_header *table) { struct acpi_table_srat *srat = (struct acpi_table_srat *)table; @@ -485,7 +513,7 @@ int __init acpi_numa_init(void) /* SRAT: System Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { - struct acpi_subtable_proc srat_proc[4]; + struct acpi_subtable_proc srat_proc[5]; memset(srat_proc, 0, sizeof(srat_proc)); srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; @@ -496,6 +524,8 @@ int __init acpi_numa_init(void) srat_proc[2].handler = acpi_parse_gicc_affinity; srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY; srat_proc[3].handler = acpi_parse_gi_affinity; + srat_proc[4].id = ACPI_SRAT_TYPE_RINTC_AFFINITY; + srat_proc[4].handler = acpi_parse_rintc_affinity; acpi_table_parse_entries_array(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 59771412686b..22ae7829a915 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2273,6 +2273,8 @@ static int acpi_bus_attach(struct acpi_device *device, void *first_pass) if (device->handler) goto ok; + acpi_ec_register_opregions(device); + if (!device->flags.initialized) { device->flags.power_manageable = device->power.states[ACPI_STATE_D0].flags.valid; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f26286e3713e..905290c98c3c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1044,13 +1044,13 @@ static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, } /* Find the smallest unused descriptor the "slow way" */ -static u32 slow_desc_lookup_olocked(struct binder_proc *proc) +static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) { struct binder_ref *ref; struct rb_node *n; u32 desc; - desc = 1; + desc = offset; for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->data.desc > desc) @@ -1071,21 +1071,18 @@ static int get_ref_desc_olocked(struct binder_proc *proc, u32 *desc) { struct dbitmap *dmap = &proc->dmap; + unsigned int nbits, offset; unsigned long *new, bit; - unsigned int nbits; /* 0 is reserved for the context manager */ - if (node == proc->context->binder_context_mgr_node) { - *desc = 0; - return 0; - } + offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; if (!dbitmap_enabled(dmap)) { - *desc = slow_desc_lookup_olocked(proc); + *desc = slow_desc_lookup_olocked(proc, offset); return 0; } - if (dbitmap_acquire_first_zero_bit(dmap, &bit) == 0) { + if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { *desc = bit; return 0; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b00961944ab1..b3acbc4174fb 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -939,9 +939,9 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) __free_page(alloc->pages[i].page_ptr); page_count++; } - kvfree(alloc->pages); } spin_unlock(&alloc->lock); + kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h index b8ac7b4764fd..956f1bd087d1 100644 --- a/drivers/android/dbitmap.h +++ b/drivers/android/dbitmap.h @@ -6,8 +6,7 @@ * * Used by the binder driver to optimize the allocation of the smallest * available descriptor ID. Each bit in the bitmap represents the state - * of an ID, with the exception of BIT(0) which is used exclusively to - * reference binder's context manager. + * of an ID. * * A dbitmap can grow or shrink as needed. This part has been designed * considering that users might need to briefly release their locks in @@ -58,11 +57,7 @@ static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) if (bit < (dmap->nbits >> 2)) return dmap->nbits >> 1; - /* - * Note that find_last_bit() returns dmap->nbits when no bits - * are set. While this is technically not possible here since - * BIT(0) is always set, this check is left for extra safety. - */ + /* find_last_bit() returns dmap->nbits when no bits are set. */ if (bit == dmap->nbits) return NBITS_MIN; @@ -132,16 +127,17 @@ dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) } /* - * Finds and sets the first zero bit in the bitmap. Upon success @bit + * Finds and sets the next zero bit in the bitmap. Upon success @bit * is populated with the index and 0 is returned. Otherwise, -ENOSPC * is returned to indicate that a dbitmap_grow() is needed. */ static inline int -dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) +dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, + unsigned long *bit) { unsigned long n; - n = find_first_zero_bit(dmap->map, dmap->nbits); + n = find_next_zero_bit(dmap->map, dmap->nbits, offset); if (n == dmap->nbits) return -ENOSPC; @@ -154,9 +150,7 @@ dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) static inline void dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) { - /* BIT(0) should always set for the context manager */ - if (bit) - clear_bit(bit, dmap->map); + clear_bit(bit, dmap->map); } static inline int dbitmap_init(struct dbitmap *dmap) @@ -168,8 +162,6 @@ static inline int dbitmap_init(struct dbitmap *dmap) } dmap->nbits = NBITS_MIN; - /* BIT(0) is reserved for the context manager */ - set_bit(0, dmap->map); return 0; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d6f5e25e1ed8..473e00a58a8b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -951,8 +951,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) &sense_key, &asc, &ascq); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { - /* ATA PASS-THROUGH INFORMATION AVAILABLE */ - ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D); + /* + * ATA PASS-THROUGH INFORMATION AVAILABLE + * + * Note: we are supposed to call ata_scsi_set_sense(), which + * respects the D_SENSE bit, instead of unconditionally + * generating the sense data in descriptor format. However, + * because hdparm, hddtemp, and udisks incorrectly assume sense + * data in descriptor format, without even looking at the + * RESPONSE CODE field in the returned sense data (to see which + * format the returned sense data is in), we are stuck with + * being bug compatible with older kernels. + */ + scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D); } } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e7f713cd70d3..a876024d8a05 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) rpp->len += skb->len; if (stat & SAR_RSQE_EPDU) { + unsigned int len, truesize; unsigned char *l1l2; - unsigned int len; l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6); @@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); + truesize = skb->truesize; vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - if (skb->truesize > SAR_FB_SIZE_3) + if (truesize > SAR_FB_SIZE_3) add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); - else if (skb->truesize > SAR_FB_SIZE_2) + else if (truesize > SAR_FB_SIZE_2) add_rx_skb(card, 2, SAR_FB_SIZE_2, 1); - else if (skb->truesize > SAR_FB_SIZE_1) + else if (truesize > SAR_FB_SIZE_1) add_rx_skb(card, 1, SAR_FB_SIZE_1, 1); else add_rx_skb(card, 0, SAR_FB_SIZE_0, 1); diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 69d2138d7efb..21545ffba065 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -316,7 +316,7 @@ endif # PARPORT_PANEL config PANEL_CHANGE_MESSAGE bool "Change LCD initialization message ?" - depends on CHARLCD + depends on CHARLCD || LINEDISP help This allows you to replace the boot message indicating the kernel version and the driver version with a custom message. This is useful on appliances diff --git a/drivers/auxdisplay/arm-charlcd.c b/drivers/auxdisplay/arm-charlcd.c index 0b1c99cca733..a7eae99a48f7 100644 --- a/drivers/auxdisplay/arm-charlcd.c +++ b/drivers/auxdisplay/arm-charlcd.c @@ -270,7 +270,7 @@ static int __init charlcd_probe(struct platform_device *pdev) struct charlcd *lcd; struct resource *res; - lcd = kzalloc(sizeof(struct charlcd), GFP_KERNEL); + lcd = kzalloc(sizeof(*lcd), GFP_KERNEL); if (!lcd) return -ENOMEM; diff --git a/drivers/auxdisplay/charlcd.h b/drivers/auxdisplay/charlcd.h index eed80063a6d2..4d4287209d04 100644 --- a/drivers/auxdisplay/charlcd.h +++ b/drivers/auxdisplay/charlcd.h @@ -36,6 +36,8 @@ enum charlcd_lines { CHARLCD_LINES_2, }; +struct charlcd_ops; + struct charlcd { const struct charlcd_ops *ops; const unsigned char *char_conv; /* Optional */ diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 7ac0b1b1d548..025dc6855cb2 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -230,7 +230,7 @@ static int hd44780_probe(struct platform_device *pdev) if (!lcd) goto fail1; - hd = kzalloc(sizeof(struct hd44780), GFP_KERNEL); + hd = kzalloc(sizeof(*hd), GFP_KERNEL); if (!hd) goto fail2; diff --git a/drivers/auxdisplay/hd44780_common.c b/drivers/auxdisplay/hd44780_common.c index 7cbf375b0fa5..4ef87c3118c0 100644 --- a/drivers/auxdisplay/hd44780_common.c +++ b/drivers/auxdisplay/hd44780_common.c @@ -366,4 +366,5 @@ struct hd44780_common *hd44780_common_alloc(void) } EXPORT_SYMBOL_GPL(hd44780_common_alloc); +MODULE_DESCRIPTION("Common functions for HD44780 (and compatibles) LCD displays"); MODULE_LICENSE("GPL"); diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index ce987944662c..8a7034b41d50 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -483,6 +483,7 @@ static int ht16k33_led_probe(struct device *dev, struct led_classdev *led, led->max_brightness = MAX_BRIGHTNESS; err = devm_led_classdev_register_ext(dev, led, &init_data); + fwnode_handle_put(init_data.fwnode); if (err) dev_err(dev, "Failed to register LED\n"); diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index e2b546210f8d..731ffdfafc4e 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c @@ -8,7 +8,9 @@ * Copyright (C) 2021 Glider bv */ +#ifndef CONFIG_PANEL_BOOT_MESSAGE #include <generated/utsrelease.h> +#endif #include <linux/container_of.h> #include <linux/device.h> @@ -312,6 +314,12 @@ static int linedisp_init_map(struct linedisp *linedisp) return 0; } +#ifdef CONFIG_PANEL_BOOT_MESSAGE +#define LINEDISP_INIT_TEXT CONFIG_PANEL_BOOT_MESSAGE +#else +#define LINEDISP_INIT_TEXT "Linux " UTS_RELEASE " " +#endif + /** * linedisp_register - register a character line display * @linedisp: pointer to character line display structure @@ -359,7 +367,7 @@ int linedisp_register(struct linedisp *linedisp, struct device *parent, goto out_del_timer; /* display a default message */ - err = linedisp_display(linedisp, "Linux " UTS_RELEASE " ", -1); + err = linedisp_display(linedisp, LINEDISP_INIT_TEXT, -1); if (err) goto out_del_dev; @@ -388,4 +396,5 @@ void linedisp_unregister(struct linedisp *linedisp) } EXPORT_SYMBOL_NS_GPL(linedisp_unregister, LINEDISP); +MODULE_DESCRIPTION("Character line display core support"); MODULE_LICENSE("GPL"); diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 5b59d133b6af..555aee3ee8e7 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -445,7 +445,7 @@ static int __init arch_acpi_numa_init(void) ret = acpi_numa_init(); if (ret) { - pr_info("Failed to initialise from firmware\n"); + pr_debug("Failed to initialise from firmware\n"); return ret; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 730cae66607c..8c0733d3aad8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -25,6 +25,7 @@ #include <linux/mutex.h> #include <linux/pm_runtime.h> #include <linux/netdevice.h> +#include <linux/rcupdate.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/string_helpers.h> @@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj) static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); + struct device_driver *driver; int retval = 0; /* add device node properties if present */ @@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Synchronize with module_remove_driver() */ + rcu_read_lock(); + driver = READ_ONCE(dev->driver); + if (driver) + add_uevent_var(env, "DRIVER=%s", driver->name); + rcu_read_unlock(); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; diff --git a/drivers/base/module.c b/drivers/base/module.c index 7af224e6914a..f742ad2a21da 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -7,6 +7,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/rcupdate.h> #include "base.h" static char *make_driver_name(const struct device_driver *drv) @@ -97,6 +98,9 @@ void module_remove_driver(const struct device_driver *drv) if (!drv) return; + /* Synchronize with dev_uevent() */ + synchronize_rcu(); + sysfs_remove_link(&drv->p->kobj, "module"); if (drv->owner) diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index f0df2da6d522..2dea9d259c49 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -110,7 +110,8 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, struct maple_tree *mt = map->cache; MA_STATE(mas, mt, min, max); unsigned long *entry, *lower, *upper; - unsigned long lower_index, lower_last; + /* initialized to work around false-positive -Wuninitialized warning */ + unsigned long lower_index = 0, lower_last = 0; unsigned long upper_index, upper_last; int ret = 0; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f92673f05c7a..a9e49b212341 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3422,6 +3422,7 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) /** * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() * @device: DRBD device. + * @peer_device: Peer DRBD device. * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ @@ -3448,6 +3449,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device, /** * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() * @device: DRBD device. + * @peer_device: Peer DRBD device. * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ @@ -3501,6 +3503,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) * @done: callback to be called after the bitmap IO was performed * @why: Descriptive text of the reason for doing the IO * @flags: Bitmap flags + * @peer_device: Peer DRBD device. * * While IO on the bitmap happens we freeze application IO thus we ensure * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be @@ -3549,6 +3552,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * @io_fn: IO callback to be called when bitmap IO is possible * @why: Descriptive text of the reason for doing the IO * @flags: Bitmap flags + * @peer_device: Peer DRBD device. * * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 008e850555f4..9c8b19a22c2a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -362,7 +362,7 @@ enum rbd_watch_state { enum rbd_lock_state { RBD_LOCK_STATE_UNLOCKED, RBD_LOCK_STATE_LOCKED, - RBD_LOCK_STATE_RELEASING, + RBD_LOCK_STATE_QUIESCING, }; /* WatchNotify::ClientId */ @@ -422,7 +422,7 @@ struct rbd_device { struct list_head running_list; struct completion acquire_wait; int acquire_err; - struct completion releasing_wait; + struct completion quiescing_wait; spinlock_t object_map_lock; u8 *object_map; @@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) lockdep_assert_held(&rbd_dev->lock_rwsem); return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || - rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING; + rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING; } static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) @@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req) lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); if (!list_empty(&img_req->lock_item)) { + rbd_assert(!list_empty(&rbd_dev->running_list)); list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING && list_empty(&rbd_dev->running_list)); } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) - complete(&rbd_dev->releasing_wait); + complete(&rbd_dev->quiescing_wait); } static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) @@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) if (rbd_lock_add_request(img_req)) return 1; - if (rbd_dev->opts->exclusive) { - WARN_ON(1); /* lock got released? */ - return -EROFS; - } - /* * Note the use of mod_delayed_work() in rbd_acquire_lock() * and cancel_delayed_work() in wake_lock_waiters(). @@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) /* * Ensure that all in-flight IO is flushed. */ - rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; - rbd_assert(!completion_done(&rbd_dev->releasing_wait)); + rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING; + rbd_assert(!completion_done(&rbd_dev->quiescing_wait)); if (list_empty(&rbd_dev->running_list)) return true; up_write(&rbd_dev->lock_rwsem); - wait_for_completion(&rbd_dev->releasing_wait); + wait_for_completion(&rbd_dev->quiescing_wait); down_write(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) + if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING) return false; rbd_assert(list_empty(&rbd_dev->running_list)); @@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev) rbd_warn(rbd_dev, "failed to update lock cookie: %d", ret); + if (rbd_dev->opts->exclusive) + rbd_warn(rbd_dev, + "temporarily releasing lock on exclusive mapping"); + /* * Lock cookie cannot be updated on older OSDs, so do * a manual release and queue an acquire. @@ -5376,7 +5376,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec) INIT_LIST_HEAD(&rbd_dev->acquiring_list); INIT_LIST_HEAD(&rbd_dev->running_list); init_completion(&rbd_dev->acquire_wait); - init_completion(&rbd_dev->releasing_wait); + init_completion(&rbd_dev->quiescing_wait); spin_lock_init(&rbd_dev->object_map_lock); @@ -6582,11 +6582,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) if (ret) return ret; - /* - * The lock may have been released by now, unless automatic lock - * transitions are disabled. - */ - rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); return 0; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6fb799ec0d88..890c08792ba8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -48,6 +48,9 @@ #define UBLK_MINORS (1U << MINORBITS) +/* private ioctl command mirror */ +#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC) + /* All UBLK_F_* have to be included into UBLK_F_ALL */ #define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \ | UBLK_F_URING_CMD_COMP_IN_TASK \ @@ -2903,7 +2906,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, case UBLK_CMD_DEL_DEV: ret = ublk_ctrl_del_dev(&ub, true); break; - case UBLK_U_CMD_DEL_DEV_ASYNC: + case UBLK_CMD_DEL_DEV_ASYNC: ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 90a94a111e67..769fa288179d 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -413,6 +413,7 @@ config BT_ATH3K config BT_MTKSDIO tristate "MediaTek HCI SDIO driver" depends on MMC + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI SDIO driver. @@ -425,6 +426,7 @@ config BT_MTKSDIO config BT_MTKUART tristate "MediaTek HCI UART driver" depends on SERIAL_DEV_BUS + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI UART driver. diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e7a612504ab1..2ebc970e6573 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -3085,6 +3085,9 @@ static int btintel_setup_combined(struct hci_dev *hdev) btintel_set_dsm_reset_method(hdev, &ver_tlv); err = btintel_bootloader_setup_tlv(hdev, &ver_tlv); + if (err) + goto exit_error; + btintel_register_devcoredump_support(hdev); btintel_print_fseq_info(hdev); break; diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index b7c348687a77..2b7c80043aa2 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -437,6 +437,7 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(btmtk_process_coredump); +#if IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) static void btmtk_usb_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; @@ -1262,7 +1263,8 @@ int btmtk_usb_suspend(struct hci_dev *hdev) struct btmtk_data *btmtk_data = hci_get_priv(hdev); /* Stop urb anchor for iso data transmission */ - usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); + if (test_bit(BTMTK_ISOPKT_RUNNING, &btmtk_data->flags)) + usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); return 0; } @@ -1487,6 +1489,7 @@ int btmtk_usb_shutdown(struct hci_dev *hdev) return 0; } EXPORT_SYMBOL_GPL(btmtk_usb_shutdown); +#endif MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>"); MODULE_AUTHOR("Mark Chen <mark-yw.chen@mediatek.com>"); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca6466676902..45adc1560d94 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2160,7 +2160,7 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; - if (power->pwrseq) { + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); return; @@ -2187,10 +2187,6 @@ static void qca_power_shutdown(struct hci_uart *hu) } break; - case QCA_QCA6390: - pwrseq_power_off(qcadev->bt_power->pwrseq); - break; - default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2416,11 +2412,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) break; case QCA_QCA6390: - qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, - "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + if (dev_of_node(&serdev->dev)) { + qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, + "bluetooth"); + if (IS_ERR(qcadev->bt_power->pwrseq)) + return PTR_ERR(qcadev->bt_power->pwrseq); + break; + } + fallthrough; default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 8767e04d6c89..2b59ef61dda2 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2291,11 +2291,9 @@ static int sysc_init_idlemode(struct sysc *ddata, u8 *idlemodes, const char *name) { struct device_node *np = ddata->dev->of_node; - struct property *prop; - const __be32 *p; u32 val; - of_property_for_each_u32(np, name, prop, p, val) { + of_property_for_each_u32(np, name, val) { if (val >= SYSC_NR_IDLEMODES) { dev_err(ddata->dev, "invalid idlemode: %i\n", val); return -EINVAL; diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig index 94abd8f632a7..db51386c663a 100644 --- a/drivers/cache/Kconfig +++ b/drivers/cache/Kconfig @@ -18,6 +18,7 @@ config STARFIVE_STARLINK_CACHE bool "StarFive StarLink Cache controller" depends on RISCV depends on ARCH_STARFIVE + depends on 64BIT select RISCV_DMA_NONCOHERENT select RISCV_NONSTANDARD_CACHE_OPS help diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index cf89a9631107..a4f4291b4492 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -421,4 +421,5 @@ static void __exit ds1620_exit(void) module_init(ds1620_init); module_exit(ds1620_exit); +MODULE_DESCRIPTION("Dallas Semiconductor DS1620 thermometer driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index ea378c0ed549..92cee5717237 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -241,6 +241,7 @@ static void __exit nwbutton_exit (void) MODULE_AUTHOR("Alex Holden"); +MODULE_DESCRIPTION("NetWinder button driver"); MODULE_LICENSE("GPL"); module_init(nwbutton_init); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 0973c2c2b01a..9f52f0306ef7 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -618,6 +618,7 @@ static void __exit nwflash_exit(void) iounmap((void *)FLASH_BASE); } +MODULE_DESCRIPTION("NetWinder flash memory driver"); MODULE_LICENSE("GPL"); module_param(flashdebug, bool, 0644); diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8c..45771b1a3716 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -1906,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1935,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1971,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1980,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } @@ -2096,9 +2111,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2258,6 +2275,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,12 +2286,17 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c index 1a4e6340f95c..058420562020 100644 --- a/drivers/clk/clk-conf.c +++ b/drivers/clk/clk-conf.c @@ -81,13 +81,11 @@ err: static int __set_clk_rates(struct device_node *node, bool clk_supplier) { struct of_phandle_args clkspec; - struct property *prop; - const __be32 *cur; int rc, index = 0; struct clk *clk; u32 rate; - of_property_for_each_u32(node, "assigned-clock-rates", prop, cur, rate) { + of_property_for_each_u32(node, "assigned-clock-rates", rate) { if (rate) { rc = of_parse_phandle_with_args(node, "assigned-clocks", "#clock-cells", index, &clkspec); diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c index 4ce83c5265b8..a4c92c5ef3ff 100644 --- a/drivers/clk/clk-si5351.c +++ b/drivers/clk/clk-si5351.c @@ -1175,8 +1175,8 @@ static int si5351_dt_parse(struct i2c_client *client, { struct device_node *child, *np = client->dev.of_node; struct si5351_platform_data *pdata; - struct property *prop; - const __be32 *p; + u32 array[4]; + int sz, i; int num = 0; u32 val; @@ -1191,20 +1191,24 @@ static int si5351_dt_parse(struct i2c_client *client, * property silabs,pll-source : <num src>, [<..>] * allow to selectively set pll source */ - of_property_for_each_u32(np, "silabs,pll-source", prop, p, num) { + sz = of_property_read_variable_u32_array(np, "silabs,pll-source", array, 2, 4); + sz = (sz == -EINVAL) ? 0 : sz; /* Missing property is OK */ + if (sz < 0) + return dev_err_probe(&client->dev, sz, "invalid pll-source\n"); + if (sz % 2) + return dev_err_probe(&client->dev, -EINVAL, + "missing pll-source for pll %d\n", array[sz - 1]); + + for (i = 0; i < sz; i += 2) { + num = array[i]; + val = array[i + 1]; + if (num >= 2) { dev_err(&client->dev, "invalid pll %d on pll-source prop\n", num); return -EINVAL; } - p = of_prop_next_u32(prop, p, &val); - if (!p) { - dev_err(&client->dev, - "missing pll-source for pll %d\n", num); - return -EINVAL; - } - switch (val) { case 0: pdata->pll_src[num] = SI5351_PLL_SRC_XTAL; @@ -1232,19 +1236,24 @@ static int si5351_dt_parse(struct i2c_client *client, pdata->pll_reset[0] = true; pdata->pll_reset[1] = true; - of_property_for_each_u32(np, "silabs,pll-reset-mode", prop, p, num) { + sz = of_property_read_variable_u32_array(np, "silabs,pll-reset-mode", array, 2, 4); + sz = (sz == -EINVAL) ? 0 : sz; /* Missing property is OK */ + if (sz < 0) + return dev_err_probe(&client->dev, sz, "invalid pll-reset-mode\n"); + if (sz % 2) + return dev_err_probe(&client->dev, -EINVAL, + "missing pll-reset-mode for pll %d\n", array[sz - 1]); + + for (i = 0; i < sz; i += 2) { + num = array[i]; + val = array[i + 1]; + if (num >= 2) { dev_err(&client->dev, "invalid pll %d on pll-reset-mode prop\n", num); return -EINVAL; } - p = of_prop_next_u32(prop, p, &val); - if (!p) { - dev_err(&client->dev, - "missing pll-reset-mode for pll %d\n", num); - return -EINVAL; - } switch (val) { case 0: diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8cca52be993f..285ed1ad8a37 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -5364,9 +5364,8 @@ EXPORT_SYMBOL_GPL(of_clk_get_parent_count); const char *of_clk_get_parent_name(const struct device_node *np, int index) { struct of_phandle_args clkspec; - struct property *prop; const char *clk_name; - const __be32 *vp; + bool found = false; u32 pv; int rc; int count; @@ -5383,15 +5382,16 @@ const char *of_clk_get_parent_name(const struct device_node *np, int index) /* if there is an indices property, use it to transfer the index * specified into an array offset for the clock-output-names property. */ - of_property_for_each_u32(clkspec.np, "clock-indices", prop, vp, pv) { + of_property_for_each_u32(clkspec.np, "clock-indices", pv) { if (index == pv) { index = count; + found = true; break; } count++; } /* We went off the end of 'clock-indices' without finding it */ - if (prop && !vp) + if (of_property_present(clkspec.np, "clock-indices") && !found) return NULL; if (of_property_read_string_index(clkspec.np, "clock-output-names", @@ -5504,14 +5504,12 @@ static int parent_ready(struct device_node *np) int of_clk_detect_critical(struct device_node *np, int index, unsigned long *flags) { - struct property *prop; - const __be32 *cur; uint32_t idx; if (!np || !flags) return -EINVAL; - of_property_for_each_u32(np, "clock-critical", prop, cur, idx) + of_property_for_each_u32(np, "clock-critical", idx) if (index == idx) *flags |= CLK_IS_CRITICAL; diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c index ad2d0df43dc6..ec60ecb517f1 100644 --- a/drivers/clk/davinci/da8xx-cfgchip.c +++ b/drivers/clk/davinci/da8xx-cfgchip.c @@ -508,7 +508,7 @@ da8xx_cfgchip_register_usb0_clk48(struct device *dev, const char * const parent_names[] = { "usb_refclkin", "pll0_auxclk" }; struct clk *fck_clk; struct da8xx_usb0_clk48 *usb0; - struct clk_init_data init; + struct clk_init_data init = {}; int ret; fck_clk = devm_clk_get(dev, "fck"); @@ -583,7 +583,7 @@ da8xx_cfgchip_register_usb1_clk48(struct device *dev, { const char * const parent_names[] = { "usb0_clk48", "usb_refclkin" }; struct da8xx_usb1_clk48 *usb1; - struct clk_init_data init; + struct clk_init_data init = {}; int ret; usb1 = devm_kzalloc(dev, sizeof(*usb1), GFP_KERNEL); diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index ea3788ba46f7..33cc1f73c69d 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -227,11 +227,9 @@ EXPORT_SYMBOL_GPL(qcom_cc_register_sleep_clk); static void qcom_cc_drop_protected(struct device *dev, struct qcom_cc *cc) { struct device_node *np = dev->of_node; - struct property *prop; - const __be32 *p; u32 i; - of_property_for_each_u32(np, "protected-clocks", prop, p, i) { + of_property_for_each_u32(np, "protected-clocks", i) { if (i >= cc->num_rclks) continue; diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index a026ccca7315..28945b6b0ee1 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -1040,19 +1040,20 @@ static unsigned long __init exynos4_get_xom(void) static void __init exynos4_clk_register_finpll(struct samsung_clk_provider *ctx) { struct samsung_fixed_rate_clock fclk; - struct clk *clk; - unsigned long finpll_f = 24000000; + unsigned long finpll_f; + unsigned int parent; char *parent_name; unsigned int xom = exynos4_get_xom(); parent_name = xom & 1 ? "xusbxti" : "xxti"; - clk = clk_get(NULL, parent_name); - if (IS_ERR(clk)) { + parent = xom & 1 ? CLK_XUSBXTI : CLK_XXTI; + + finpll_f = clk_hw_get_rate(ctx->clk_data.hws[parent]); + if (!finpll_f) { pr_err("%s: failed to lookup parent clock %s, assuming " "fin_pll clock frequency is 24MHz\n", __func__, parent_name); - } else { - finpll_f = clk_get_rate(clk); + finpll_f = 24000000; } fclk.id = CLK_FIN_PLL; diff --git a/drivers/clk/sophgo/clk-sg2042-pll.c b/drivers/clk/sophgo/clk-sg2042-pll.c index 9695e64fc23b..ff9deeef509b 100644 --- a/drivers/clk/sophgo/clk-sg2042-pll.c +++ b/drivers/clk/sophgo/clk-sg2042-pll.c @@ -387,7 +387,7 @@ static int sg2042_clk_pll_set_rate(struct clk_hw *hw, struct sg2042_pll_clock *pll = to_sg2042_pll_clk(hw); struct sg2042_pll_ctrl pctrl_table; unsigned long flags; - u32 value; + u32 value = 0; int ret; spin_lock_irqsave(pll->lock, flags); diff --git a/drivers/clk/sunxi/clk-simple-gates.c b/drivers/clk/sunxi/clk-simple-gates.c index 0399627c226a..845efc1ec800 100644 --- a/drivers/clk/sunxi/clk-simple-gates.c +++ b/drivers/clk/sunxi/clk-simple-gates.c @@ -21,11 +21,9 @@ static void __init sunxi_simple_gates_setup(struct device_node *node, { struct clk_onecell_data *clk_data; const char *clk_parent, *clk_name; - struct property *prop; struct resource res; void __iomem *clk_reg; void __iomem *reg; - const __be32 *p; int number, i = 0, j; u8 clk_bit; u32 index; @@ -47,7 +45,7 @@ static void __init sunxi_simple_gates_setup(struct device_node *node, if (!clk_data->clks) goto err_free_data; - of_property_for_each_u32(node, "clock-indices", prop, p, index) { + of_property_for_each_u32(node, "clock-indices", index) { of_property_read_string_index(node, "clock-output-names", i, &clk_name); diff --git a/drivers/clk/sunxi/clk-sun8i-bus-gates.c b/drivers/clk/sunxi/clk-sun8i-bus-gates.c index b87f331f63c9..8482ac8e5898 100644 --- a/drivers/clk/sunxi/clk-sun8i-bus-gates.c +++ b/drivers/clk/sunxi/clk-sun8i-bus-gates.c @@ -24,11 +24,9 @@ static void __init sun8i_h3_bus_gates_init(struct device_node *node) const char *parents[PARENT_MAX]; struct clk_onecell_data *clk_data; const char *clk_name; - struct property *prop; struct resource res; void __iomem *clk_reg; void __iomem *reg; - const __be32 *p; int number, i; u8 clk_bit; int index; @@ -58,7 +56,7 @@ static void __init sun8i_h3_bus_gates_init(struct device_node *node) goto err_free_data; i = 0; - of_property_for_each_u32(node, "clock-indices", prop, p, index) { + of_property_for_each_u32(node, "clock-indices", index) { of_property_read_string_index(node, "clock-output-names", i, &clk_name); diff --git a/drivers/clk/thead/Kconfig b/drivers/clk/thead/Kconfig index 1710d50bf9d4..95e0d9eb965e 100644 --- a/drivers/clk/thead/Kconfig +++ b/drivers/clk/thead/Kconfig @@ -3,6 +3,7 @@ config CLK_THEAD_TH1520_AP bool "T-HEAD TH1520 AP clock support" depends on ARCH_THEAD || COMPILE_TEST + depends on 64BIT default ARCH_THEAD select REGMAP_MMIO help diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c09..17e32ae08720 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 6e46781bc9ac..b9561e3f196c 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -418,8 +418,6 @@ void __init samsung_pwm_clocksource_init(void __iomem *base, static int __init samsung_pwm_alloc(struct device_node *np, const struct samsung_pwm_variant *variant) { - struct property *prop; - const __be32 *cur; u32 val; int i, ret; @@ -427,7 +425,7 @@ static int __init samsung_pwm_alloc(struct device_node *np, for (i = 0; i < SAMSUNG_PWM_NUM; ++i) pwm.irq[i] = irq_of_parse_and_map(np, i); - of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) { + of_property_for_each_u32(np, "samsung,pwm-outputs", val) { if (val >= SAMSUNG_PWM_NUM) { pr_warn("%s: invalid channel index in samsung,pwm-outputs property\n", __func__); continue; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 392a8000b238..c0278d023cfc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3405,6 +3405,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, 179, 64, 16)), X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 571069863c62..82b78e331d8e 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -22,56 +22,42 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -/* - * Find a targets entry (n) in the host bridge interleave list. - * CXL Specification 3.0 Table 9-22 - */ -static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, - int ig) -{ - int i = 0, n = 0; - u8 eiw; - - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ - if (iw != 3) { - for (i = 0; i < cximsd->nr_maps; i++) - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; - } - /* IW: 3,6,12 add a modulo calculation to 'n' */ - if (!is_power_of_2(iw)) { - if (ways_to_eiw(iw, &eiw)) - return -1; - hpa &= GENMASK_ULL(51, eiw + ig); - n |= do_div(hpa, 3) << i; - } - return n; -} -static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) +static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int ig = cxld->interleave_granularity; - int iw = cxld->interleave_ways; - int n = 0; - u64 hpa; - - if (dev_WARN_ONCE(&cxld->dev, - cxld->interleave_ways != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; + int hbiw = cxlrd->cxlsd.nr_targets; + u64 val; + int pos; - hpa = cxlrd->res->start + pos * ig; + /* No xormaps for host bridge interleave ways of 1 or 3 */ + if (hbiw == 1 || hbiw == 3) + return hpa; - /* Entry (n) is 0 for no interleave (iw == 1) */ - if (iw != 1) - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); + /* + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore + * the position bit to its value before the xormap was applied at + * HPA->DPA translation. + * + * pos is the lowest set bit in an XORMAP + * val is the XORALLBITS(HPA & XORMAP) + * + * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS + * as an operation that outputs a single bit by XORing all the + * bits in the input (hpa & xormap). Implement XORALLBITS using + * hweight64(). If the hamming weight is even the XOR of those + * bits results in val==0, if odd the XOR result is val==1. + */ - if (n < 0) - return NULL; + for (int i = 0; i < cximsd->nr_maps; i++) { + if (!cximsd->xormaps[i]) + continue; + pos = __ffs(cximsd->xormaps[i]); + val = (hweight64(hpa & cximsd->xormaps[i]) & 1); + hpa = (hpa & ~(1ULL << pos)) | (val << pos); + } - return cxlrd->cxlsd.target[n]; + return hpa; } struct cxl_cxims_context { @@ -361,7 +347,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; - cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; int rc; @@ -389,13 +374,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, if (rc) return rc; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) - cxl_calc_hb = cxl_hb_modulo; - else - cxl_calc_hb = cxl_hb_xor; - struct cxl_root_decoder *cxlrd __free(put_cxlrd) = - cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); + cxl_root_decoder_alloc(root_port, ways); + if (IS_ERR(cxlrd)) return PTR_ERR(cxlrd); @@ -434,6 +415,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) + cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + rc = cxl_decoder_add(cxld, target_map); if (rc) return rc; @@ -482,6 +466,8 @@ struct cxl_chbs_context { unsigned long long uid; resource_size_t base; u32 cxl_version; + int nr_versions; + u32 saved_version; }; static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, @@ -490,22 +476,31 @@ static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, struct cxl_chbs_context *ctx = arg; struct acpi_cedt_chbs *chbs; - if (ctx->base != CXL_RESOURCE_NONE) - return 0; - chbs = (struct acpi_cedt_chbs *) header; - if (ctx->uid != chbs->uid) + if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 && + chbs->length != CXL_RCRB_SIZE) return 0; - ctx->cxl_version = chbs->cxl_version; if (!chbs->base) return 0; - if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 && - chbs->length != CXL_RCRB_SIZE) + if (ctx->saved_version != chbs->cxl_version) { + /* + * cxl_version cannot be overwritten before the next two + * checks, then use saved_version + */ + ctx->saved_version = chbs->cxl_version; + ctx->nr_versions++; + } + + if (ctx->base != CXL_RESOURCE_NONE) return 0; + if (ctx->uid != chbs->uid) + return 0; + + ctx->cxl_version = chbs->cxl_version; ctx->base = chbs->base; return 0; @@ -529,10 +524,19 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, .uid = uid, .base = CXL_RESOURCE_NONE, .cxl_version = UINT_MAX, + .saved_version = UINT_MAX, }; acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs_iter, ctx); + if (ctx->nr_versions > 1) { + /* + * Disclaim eRCD support given some component register may + * only be found via CHBCR + */ + dev_info(dev, "Unsupported platform config, mixed Virtual Host and Restricted CXL Host hierarchy."); + } + return 0; } @@ -921,6 +925,7 @@ static void __exit cxl_acpi_exit(void) /* load before dax_hmem sees 'Soft Reserved' CXL ranges */ subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); +MODULE_DESCRIPTION("CXL ACPI: Platform Support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(ACPI); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 625394486459..72a506c9dbd0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -28,12 +28,12 @@ int cxl_region_init(void); void cxl_region_exit(void); int cxl_get_poison_by_endpoint(struct cxl_port *port); struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa); -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa); +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa); #else -static inline u64 -cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) +static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, + const struct cxl_memdev *cxlmd, u64 dpa) { return ULLONG_MAX; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2626f3fff201..e5cdeafdf76e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -875,10 +875,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, guard(rwsem_read)(&cxl_region_rwsem); guard(rwsem_read)(&cxl_dpa_rwsem); - dpa = le64_to_cpu(evt->common.phys_addr) & CXL_DPA_MASK; + dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - hpa = cxl_trace_hpa(cxlr, cxlmd, dpa); + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, cxlr, hpa, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 8567dd11eaac..51132a575b27 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -338,10 +338,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, if (rc) return rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; @@ -368,6 +364,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index cb730050d3d4..1d5007e3795a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1733,21 +1733,6 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, return 0; } -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) -{ - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int iw; - - iw = cxld->interleave_ways; - if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; - - return cxlrd->cxlsd.target[pos % iw]; -} -EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL); - static struct lock_class_key cxl_decoder_key; /** @@ -1807,7 +1792,6 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * cxl_root_decoder_alloc - Allocate a root level decoder * @port: owning CXL root of this decoder * @nr_targets: static number of downstream targets - * @calc_hb: which host bridge covers the n'th position by granularity * * Return: A new cxl decoder to be registered by cxl_decoder_add(). A * 'CXL root' decoder is one that decodes from a top-level / static platform @@ -1815,8 +1799,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * topology. */ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb) + unsigned int nr_targets) { struct cxl_root_decoder *cxlrd; struct cxl_switch_decoder *cxlsd; @@ -1838,7 +1821,6 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, return ERR_PTR(rc); } - cxlrd->calc_hb = calc_hb; mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; @@ -2356,5 +2338,6 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); +MODULE_DESCRIPTION("CXL: Core Compute Express Link support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 538ebd5a64fd..21ad5f242875 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -9,6 +9,7 @@ #include <linux/uuid.h> #include <linux/sort.h> #include <linux/idr.h> +#include <linux/memory-tiers.h> #include <cxlmem.h> #include <cxl.h> #include "core.h" @@ -1632,10 +1633,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr, const struct cxl_dport *dport, int pos) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int iw = cxld->interleave_ways; struct cxl_port *iter; int rc; - if (cxlrd->calc_hb(cxlrd, pos) != dport) { + if (dport != cxlrd->cxlsd.target[pos % iw]) { dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), dev_name(&cxlrd->cxlsd.cxld.dev)); @@ -2310,6 +2314,7 @@ static void unregister_region(void *_cxlr) int i; unregister_memory_notifier(&cxlr->memory_notifier); + unregister_mt_adistance_algorithm(&cxlr->adist_notifier); device_del(&cxlr->dev); /* @@ -2386,14 +2391,23 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) return true; } +static int cxl_region_nid(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct resource *res; + + guard(rwsem_read)(&cxl_region_rwsem); + res = p->res; + if (!res) + return NUMA_NO_NODE; + return phys_to_target_node(res->start); +} + static int cxl_region_perf_attrs_callback(struct notifier_block *nb, unsigned long action, void *arg) { struct cxl_region *cxlr = container_of(nb, struct cxl_region, memory_notifier); - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = p->targets[0]; - struct cxl_decoder *cxld = &cxled->cxld; struct memory_notify *mnb = arg; int nid = mnb->status_change_nid; int region_nid; @@ -2401,7 +2415,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid == NUMA_NO_NODE || action != MEM_ONLINE) return NOTIFY_DONE; - region_nid = phys_to_target_node(cxld->hpa_range.start); + region_nid = cxl_region_nid(cxlr); if (nid != region_nid) return NOTIFY_DONE; @@ -2411,6 +2425,27 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, return NOTIFY_OK; } +static int cxl_region_calculate_adistance(struct notifier_block *nb, + unsigned long nid, void *data) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + adist_notifier); + struct access_coordinate *perf; + int *adist = data; + int region_nid; + + region_nid = cxl_region_nid(cxlr); + if (nid != region_nid) + return NOTIFY_OK; + + perf = &cxlr->coord[ACCESS_COORDINATE_CPU]; + + if (mt_perf_to_adistance(perf, adist)) + return NOTIFY_OK; + + return NOTIFY_STOP; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2453,6 +2488,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; register_memory_notifier(&cxlr->memory_notifier); + cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; + cxlr->adist_notifier.priority = 100; + register_mt_adistance_algorithm(&cxlr->adist_notifier); + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); @@ -2816,20 +2855,13 @@ struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) return ctx.cxlr; } -static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) +static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) { struct cxl_region_params *p = &cxlr->params; int gran = p->interleave_granularity; int ways = p->interleave_ways; u64 offset; - /* Is the hpa within this region at all */ - if (hpa < p->res->start || hpa > p->res->end) { - dev_dbg(&cxlr->dev, - "Addr trans fail: hpa 0x%llx not in region\n", hpa); - return false; - } - /* Is the hpa in an expected chunk for its pos(-ition) */ offset = hpa - p->res->start; offset = do_div(offset, gran * ways); @@ -2842,15 +2874,26 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) return false; } -static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled) +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; struct cxl_region_params *p = &cxlr->params; - int pos = cxled->pos; + struct cxl_endpoint_decoder *cxled = NULL; u16 eig = 0; u8 eiw = 0; + int pos; + + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxlmd == cxled_to_memdev(cxled)) + break; + } + if (!cxled || cxlmd != cxled_to_memdev(cxled)) + return ULLONG_MAX; + pos = cxled->pos; ways_to_eiw(p->interleave_ways, &eiw); granularity_to_eig(p->interleave_granularity, &eig); @@ -2884,27 +2927,21 @@ static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, /* Apply the hpa_offset to the region base address */ hpa = hpa_offset + p->res->start; - if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) - return ULLONG_MAX; + /* Root decoder translation overrides typical modulo decode */ + if (cxlrd->hpa_to_spa) + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); - return hpa; -} - -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa) -{ - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = NULL; - - for (int i = 0; i < p->nr_targets; i++) { - cxled = p->targets[i]; - if (cxlmd == cxled_to_memdev(cxled)) - break; + if (hpa < p->res->start || hpa > p->res->end) { + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in region\n", hpa); + return ULLONG_MAX; } - if (!cxled || cxlmd != cxled_to_memdev(cxled)) + + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; - return cxl_dpa_to_hpa(dpa, cxlr, cxled); + return hpa; } static struct lock_class_key cxl_pmem_region_key; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ee5cd4eb2f16..9167cfba7f59 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -340,23 +340,23 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID; /* General Media */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; /* Mask after flags have been parsed */ __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->device = get_unaligned_le24(rec->device); memcpy(__entry->comp_id, &rec->component_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE); - __entry->validity_flags = get_unaligned_le16(&rec->validity_flags); + __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); __entry->hpa = hpa; if (cxlr) { __assign_str(region_name); @@ -440,19 +440,19 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_DRAM_UUID; /* DRAM */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->validity_flags = get_unaligned_le16(rec->validity_flags); - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); __entry->bank_group = rec->bank_group; __entry->bank = rec->bank; @@ -704,8 +704,8 @@ TRACE_EVENT(cxl_poison, if (cxlr) { __assign_str(region); memcpy(__entry->uuid, &cxlr->params.uuid, 16); - __entry->hpa = cxl_trace_hpa(cxlr, cxlmd, - __entry->dpa); + __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, + __entry->dpa); } else { __assign_str(region); memset(__entry->uuid, 0, 16); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 720aa07976b0..9afb407d438f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -434,14 +434,13 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, - int pos); +typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event - * @calc_hb: which host bridge covers the n'th position by granularity + * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie @@ -450,7 +449,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, struct cxl_root_decoder { struct resource *res; atomic_t region_id; - cxl_calc_hb_fn calc_hb; + cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; @@ -524,6 +523,7 @@ struct cxl_region_params { * @params: active + config params for the region * @coord: QoS access coordinates for the region * @memory_notifier: notifier for setting the access coordinates to node + * @adist_notifier: notifier for calculating the abstract distance of node */ struct cxl_region { struct device dev; @@ -536,6 +536,7 @@ struct cxl_region { struct cxl_region_params params; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; struct notifier_block memory_notifier; + struct notifier_block adist_notifier; }; struct cxl_nvdimm_bridge { @@ -774,9 +775,7 @@ bool is_root_decoder(struct device *dev); bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb); -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos); + unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index af8169ccdbc0..afb53d058d62 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -161,7 +161,7 @@ struct cxl_mbox_cmd { C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ C(PADDR, -EFAULT, "physical address specified is invalid"), \ - C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(POISONLMT, -EBUSY, "poison injection limit has been reached"), \ C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ C(ABORT, -ENXIO, "background cmd was aborted by device"), \ C(SECURITY, -ENXIO, "not valid in the current security state"), \ @@ -563,7 +563,7 @@ enum cxl_opcode { 0x3b, 0x3f, 0x17) #define DEFINE_CXL_VENDOR_DEBUG_UUID \ - UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ + UUID_INIT(0x5e1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ 0x40, 0x3d, 0x86) struct cxl_mbox_get_supported_logs { diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2f1b49bfe162..7de232eaeb17 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -253,6 +253,7 @@ static struct cxl_driver cxl_mem_driver = { module_cxl_driver(cxl_mem_driver); +MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index e53646e9f2fb..4be35dc22202 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1066,5 +1066,6 @@ static void __exit cxl_pci_driver_exit(void) module_init(cxl_pci_driver_init); module_exit(cxl_pci_driver_exit); +MODULE_DESCRIPTION("CXL: PCI manageability"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 2ecdaee63021..4ef93da22335 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -453,6 +453,7 @@ static __exit void cxl_pmem_exit(void) cxl_driver_unregister(&cxl_nvdimm_bridge_driver); } +MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); module_init(cxl_pmem_init); module_exit(cxl_pmem_exit); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 97c21566677a..d7d5d982ce69 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -209,6 +209,7 @@ static struct cxl_driver cxl_port_driver = { }; module_cxl_driver(cxl_port_driver); +MODULE_DESCRIPTION("CXL: Port enumeration and services"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_PORT); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index cbc92d3683e6..e5c05a876947 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -109,8 +109,8 @@ static const u32 knl_interleave_list[] = { 0x104, 0x10c, 0x114, 0x11c, /* 20-23 */ }; #define MAX_INTERLEAVE \ - (max_t(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ - max_t(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ + (MAX_T(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ + MAX_T(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ ARRAY_SIZE(knl_interleave_list)))) struct interleave_pkg { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 11faf1db4fa4..473421ba7a18 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -45,7 +45,6 @@ #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 1f32d6cf98d6..ed4e8ddbe76a 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -27,8 +27,10 @@ cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_STACKLEAK_PLUGIN) \ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ - $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax + $(call cc-option,-mno-single-pic-base) \ + $(DISABLE_STACKLEAK_PLUGIN) +cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax \ + $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt @@ -56,6 +58,10 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) +# The .data section would be renamed to .data.efistub, therefore, remove +# `-fdata-sections` flag from KBUILD_CFLAGS_KERNEL +KBUILD_CFLAGS_KERNEL := $(filter-out -fdata-sections, $(KBUILD_CFLAGS_KERNEL)) + lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 46ac5a8beab7..e2e1e9df6115 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -1444,5 +1444,6 @@ static void fsi_exit(void) } module_exit(fsi_exit); module_param(discard_errors, int, 0664); +MODULE_DESCRIPTION("FSI core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(discard_errors, "Don't invoke error handling on bus accesses"); diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index b0b624c3717b..6f5e1bdf7e40 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -670,4 +670,5 @@ static struct platform_driver fsi_master_aspeed_driver = { }; module_platform_driver(fsi_master_aspeed_driver); +MODULE_DESCRIPTION("FSI master driver for AST2600"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c index f8c776ce1b56..a4c37ff8edd6 100644 --- a/drivers/fsi/fsi-master-ast-cf.c +++ b/drivers/fsi/fsi-master-ast-cf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2018 IBM Corp /* - * A FSI master controller, using a simple GPIO bit-banging interface + * A FSI master based on Aspeed ColdFire coprocessor */ #include <linux/crc4.h> @@ -1438,5 +1438,6 @@ static struct platform_driver fsi_master_acf = { }; module_platform_driver(fsi_master_acf); +MODULE_DESCRIPTION("A FSI master based on Aspeed ColdFire coprocessor"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME); diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c index 10fc344b6b22..f761344f4873 100644 --- a/drivers/fsi/fsi-master-gpio.c +++ b/drivers/fsi/fsi-master-gpio.c @@ -892,4 +892,5 @@ static struct platform_driver fsi_master_gpio_driver = { }; module_platform_driver(fsi_master_gpio_driver); +MODULE_DESCRIPTION("A FSI master controller, using a simple GPIO bit-banging interface"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-hub.c b/drivers/fsi/fsi-master-hub.c index 6d8b6e8854e5..6568fed7db3c 100644 --- a/drivers/fsi/fsi-master-hub.c +++ b/drivers/fsi/fsi-master-hub.c @@ -295,4 +295,5 @@ static struct fsi_driver hub_master_driver = { }; module_fsi_driver(hub_master_driver); +MODULE_DESCRIPTION("FSI hub master driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index 61dbda9dbe2b..411ddc018cd8 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -625,4 +625,5 @@ static void scom_exit(void) module_init(scom_init); module_exit(scom_exit); +MODULE_DESCRIPTION("SCOM FSI Client device driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 8dce78ea7139..5762e517338e 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -591,8 +591,6 @@ static int brcmstb_gpio_probe(struct platform_device *pdev) void __iomem *reg_base; struct brcmstb_gpio_priv *priv; struct resource *res; - struct property *prop; - const __be32 *p; u32 bank_width; int num_banks = 0; int num_gpios = 0; @@ -636,8 +634,7 @@ static int brcmstb_gpio_probe(struct platform_device *pdev) flags = BGPIOF_BIG_ENDIAN_BYTE_ORDER; #endif - of_property_for_each_u32(np, "brcm,gpio-bank-widths", prop, p, - bank_width) { + of_property_for_each_u32(np, "brcm,gpio-bank-widths", bank_width) { struct brcmstb_gpio_bank *bank; struct gpio_chip *gc; diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index d5906d419b0a..10ea71273c89 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver); diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 0e0d55da4f01..ccc47ea0b3e1 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -805,7 +805,7 @@ static int gpio_virtuser_dbgfs_init_line_attrs(struct device *dev, return -ENOMEM; data->ad.desc = desc; - sprintf(data->consumer, id); + strscpy(data->consumer, id); atomic_set(&data->irq, 0); atomic_set(&data->irq_count, 0); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fd0749c0c630..6b2c6b91f962 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -268,6 +268,7 @@ config DRM_EXEC config DRM_GPUVM tristate depends on DRM + select DRM_EXEC help GPU-VM representation providing helpers to manage a GPUs virtual address space diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9dd8294032ef..38408e4e158e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -106,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9..6dfdff58bffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); @@ -1778,7 +1781,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1793,13 +1796,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1..eb605e79ae0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b241f61fe9c9..ac108fca64fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = &df_v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = &df_v4_15_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 78089f2f79f5..094498a0964b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -116,9 +116,10 @@ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ + * - 3.58.0 - Add GFX12 DCC support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 57 +#define KMS_DRIVER_MINOR 58 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..c770cb201e64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..908e13455152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b..1cb1ec7beefe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && @@ -1573,7 +1592,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71..0bc837dab578 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -83,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -113,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 800cc7a148b2..189574d53ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1591,6 +1591,66 @@ static void psp_ras_ta_check_status(struct psp_context *psp) } } +static int psp_ras_send_cmd(struct psp_context *psp, + enum ras_command cmd_id, void *in, void *out) +{ + struct ta_ras_shared_memory *ras_cmd; + uint32_t cmd = cmd_id; + int ret = 0; + + if (!in) + return -EINVAL; + + mutex_lock(&psp->ras_context.mutex); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + switch (cmd) { + case TA_RAS_COMMAND__ENABLE_FEATURES: + case TA_RAS_COMMAND__DISABLE_FEATURES: + memcpy(&ras_cmd->ras_in_message, + in, sizeof(ras_cmd->ras_in_message)); + break; + case TA_RAS_COMMAND__TRIGGER_ERROR: + memcpy(&ras_cmd->ras_in_message.trigger_error, + in, sizeof(ras_cmd->ras_in_message.trigger_error)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + memcpy(&ras_cmd->ras_in_message.address, + in, sizeof(ras_cmd->ras_in_message.address)); + break; + default: + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd); + ret = -EINVAL; + goto err_out; + } + + ras_cmd->cmd_id = cmd; + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + + switch (cmd) { + case TA_RAS_COMMAND__TRIGGER_ERROR: + if (!ret && out) + memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + ret = -EINVAL; + else if (out) + memcpy(out, + &ras_cmd->ras_out_message.address, + sizeof(ras_cmd->ras_out_message.address)); + break; + default: + break; + } + +err_out: + mutex_unlock(&psp->ras_context.mutex); + + return ret; +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1632,23 +1692,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { - struct ta_ras_shared_memory *ras_cmd; + enum ras_command cmd_id; int ret; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - if (enable) - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; - else - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; - - ras_cmd->ras_in_message = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + cmd_id = enable ? + TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES; + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL); if (ret) return -EINVAL; @@ -1672,6 +1724,8 @@ int psp_ras_terminate(struct psp_context *psp) psp->ras_context.context.initialized = false; + mutex_destroy(&psp->ras_context.mutex); + return ret; } @@ -1756,9 +1810,10 @@ int psp_ras_initialize(struct psp_context *psp) ret = psp_ta_load(psp, &psp->ras_context.context); - if (!ret && !ras_cmd->ras_status) + if (!ret && !ras_cmd->ras_status) { psp->ras_context.context.initialized = true; - else { + mutex_init(&psp->ras_context.mutex); + } else { if (ras_cmd->ras_status) dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); @@ -1772,12 +1827,12 @@ int psp_ras_initialize(struct psp_context *psp) int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask) { - struct ta_ras_shared_memory *ras_cmd; struct amdgpu_device *adev = psp->adev; int ret; uint32_t dev_mask; + uint32_t ras_status = 0; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; switch (info->block_id) { @@ -1801,13 +1856,8 @@ int psp_ras_trigger_error(struct psp_context *psp, dev_mask &= AMDGPU_RAS_INST_MASK; info->sub_block_index |= dev_mask; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; - ras_cmd->ras_in_message.trigger_error = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status); if (ret) return -EINVAL; @@ -1817,9 +1867,9 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) + if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) return -EACCES; - else if (ras_cmd->ras_status) + else if (ras_status) return -EINVAL; return 0; @@ -1829,25 +1879,16 @@ int psp_ras_query_address(struct psp_context *psp, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out) { - struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras_context.context.initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; - ras_cmd->ras_in_message.address = *addr_in; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + if (!psp->ras_context.context.initialized || + !addr_in || !addr_out) return -EINVAL; - *addr_out = ras_cmd->ras_out_message.address; + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out); - return 0; + return ret; } // ras end diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3635303e6548..74a96516c913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -200,6 +200,7 @@ struct psp_xgmi_context { struct psp_ras_context { struct ta_context context; struct amdgpu_ras *ras; + struct mutex mutex; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 8e8afbd237bc..0c856005df6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size context->session_id = ta_id; + mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) goto err_free_shared_buf; @@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret = -EFAULT; err_free_shared_buf: + mutex_unlock(&psp->ras_context.mutex); kfree(shared_buf); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index eae0a555df3c..aab8077e5098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1011,6 +1011,9 @@ Out: uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { + /* get available eeprom table version first before eeprom table init */ + amdgpu_ras_set_eeprom_table_version(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..e6344a6b0a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 8d65b096db90..43f44cc201cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -147,6 +147,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } + /* from vcn4 and above, only unified queue is used */ + adev->vcn.using_unified_queue = + amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); @@ -275,18 +279,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } -/* from vcn4 and above, only unified queue is used */ -static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - bool ret = false; - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) - ret = true; - - return ret; -} - bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -397,7 +389,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) for (i = 0; i < adev->vcn.num_enc_rings; ++i) fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (fence[j] || @@ -443,7 +437,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { @@ -469,8 +465,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); atomic_dec(&ring->adev->vcn.total_submission_cnt); @@ -724,12 +724,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); - bool sq = amdgpu_vcn_using_unified_queue(ring); uint32_t *ib_checksum; uint32_t ib_pack_in_dw; int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -742,7 +741,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (sq) { + if (adev->vcn.using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -761,7 +760,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); @@ -851,15 +850,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -873,7 +872,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -895,7 +894,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -918,15 +917,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -940,7 +939,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -962,7 +961,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 9f06def236fd..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -329,6 +329,7 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool using_unified_queue; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -460,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..a060c28f0877 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += lower_32_bits(vm->generation); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index 000000000000..2a573e33908b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); + } +} + +const struct amdgpu_df_funcs df_v4_15_funcs = { + .hw_init = df_v4_15_hw_init +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h new file mode 100644 index 000000000000..dddf2422112a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_15_H__ +#define __DF_V4_15_H__ + +extern const struct amdgpu_df_funcs df_v4_15_funcs; + +#endif /* __DF_V4_15_H__ */ + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..2c611b8577a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) @@ -3519,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3553,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 04d8966423de..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -32,6 +33,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -621,6 +625,13 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } +static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through JPEG ring. + */ +} + /** * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer * @@ -772,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -817,7 +832,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -858,7 +879,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1062,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1072,6 +1100,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_jpeg_dec_ring_test_ring, .test_ib = amdgpu_jpeg_dec_ring_test_ib, .insert_nop = jpeg_v4_0_3_dec_ring_nop, @@ -1225,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1..2ea8223eb969 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1163,6 +1173,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; @@ -1198,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1214,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1330,9 +1339,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1350,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1395,7 +1404,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad59..e39a58d262c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -551,10 +591,12 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -732,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -755,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -772,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -788,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1083,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1099,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1118,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1144,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1176,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1198,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1237,14 +1267,13 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1252,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1272,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1282,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1337,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1358,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1388,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1401,9 +1435,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1416,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1439,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1468,7 +1508,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1511,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index cc9e961f0078..af1e90159ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,6 +176,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -1647,6 +1655,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) * but it shouldn't hurt for other parts since * this GFXOFF will be disallowed anyway when SDMA is * active, this just makes it explicit. + * sdma_v5_2_ring_set_wptr() takes advantage of this + * to update the wptr because sometimes SDMA seems to miss + * doorbells when entering PG. If you remove this, update + * sdma_v5_2_ring_set_wptr() as well! */ amdgpu_gfx_off_ctrl(adev, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..ecee9e7d7e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** @@ -1616,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 04c797d54511..0af648931df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index d27fb4ea6612..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | @@ -484,6 +486,10 @@ static int soc24_common_hw_init(void *handle) */ if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); + + if (adev->df.funcs->hw_init) + adev->df.funcs->hw_init(adev); + /* enable the doorbell aperture */ soc24_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index f6d96a44d75f..776c539bfdda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1045,6 +1045,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1498,6 +1501,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f53054e39ebb..9bae95538b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -1375,6 +1378,50 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through VCN ring. + */ +} + /** * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer * @@ -1414,7 +1461,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -1422,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index f45495de6875..8d75061f9f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -958,6 +958,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1162,6 +1165,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 070b56610c7d..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -721,6 +721,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -898,6 +901,9 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..983a977632ff 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 5fd1b6b44577..2d7755e2b6c3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -137,6 +137,13 @@ struct vblank_control_work { bool enable; }; +/** + * struct idle_workqueue - Work data for periodic action in idle + * @work: Kernel work data for the work event + * @dm: amdgpu display manager device + * @enable: true if idle worker is enabled + * @running: true if idle worker is running + */ struct idle_workqueue { struct work_struct work; struct amdgpu_display_manager *dm; @@ -502,6 +509,12 @@ struct amdgpu_display_manager { * Deferred work for vblank control events. */ struct workqueue_struct *vblank_control_workqueue; + + /** + * @idle_workqueue: + * + * Periodic work for idle events. + */ struct idle_workqueue *idle_workqueue; struct drm_atomic_state *cached_state; @@ -587,7 +600,9 @@ struct amdgpu_display_manager { */ struct mutex dpia_aux_lock; - /* + /** + * @bb_from_dmub: + * * Bounding box data read from dmub during early initialization for DCN4+ */ struct dml2_soc_bb *bb_from_dmub; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index be2638c763d7..9a406d74c0dd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -35,8 +35,10 @@ #include "dc_stream_priv.h" #define DC_LOGGER dc->ctx->logger +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(x, y) ((x > y) ? x : y) +#endif /******************************************************************************* * Private functions diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 067f6555cfdf..ccbb15f1638c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -143,7 +143,8 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - pipe_ctx->plane_state->status.is_flip_pending = false; + if (pipe_ctx->plane_state) + pipe_ctx->plane_state->status.is_flip_pending = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..4d960dc5ce89 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index defe13436a2c..e73579f1a88e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -64,8 +64,6 @@ double math_ceil(const double arg) double math_ceil2(const double arg, const double significance) { - ASSERT(significance != 0); - return ((int)(arg / significance + 0.99999)) * significance; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..14a902ff3b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -3587,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3680,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..f115c7a285e7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 9ac7fc717a92..0150f2581ee4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -147,16 +147,28 @@ struct cnv_color_keyer_params { int color_keyer_blue_high; }; -/* new for dcn2: set the 8bit alpha values based on the 2 bit alpha - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0 default: 0b00000000 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1 default: 0b01010101 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2 default: 0b10101010 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3 default: 0b11111111 +/** + * struct cnv_alpha_2bit_lut - Set the 8bit alpha values based on the 2 bit alpha */ struct cnv_alpha_2bit_lut { + /** + * @lut0: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0. Default: 0b00000000 + */ int lut0; + + /** + * @lut1: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1. Default: 0b01010101 + */ int lut1; + + /** + * @lut2: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2. Default: 0b10101010 + */ int lut2; + + /** + * @lut3: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3. Default: 0b11111111 + */ int lut3; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 40a9b3471208..3a89cc0cffc1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -1039,6 +1039,20 @@ struct mpc_funcs { */ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable, bool lut_bank_a, int mpcc_id); + /** + * @program_3dlut_size: + * + * Program 3D LUT size. + * + * Parameters: + * - [in/out] mpc - MPC context. + * - [in] is_17x17x17 - is 3dlut 17x17x17 + * - [in] mpcc_id + * + * Return: + * + * void + */ void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index 127fb1a51654..747679cb4944 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -205,9 +205,24 @@ struct gamma_coefficients { struct fixed31_32 user_brightness; }; +/** + * struct pwl_float_data - Fixed point RGB color + */ struct pwl_float_data { + /** + * @r: Component Red. + */ struct fixed31_32 r; + + /** + * @g: Component Green. + */ + struct fixed31_32 g; + + /** + * @b: Component Blue. + */ struct fixed31_32 b; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 50459d7a0f85..b76737b7b9e4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -26,6 +26,16 @@ #include "core_types.h" #include "link_enc_cfg.h" +/** + * DOC: overview + * + * Display Input Output (DIO), is the display input and output unit in DCN. It + * includes output encoders to support different display output, like + * DisplayPort, HDMI, DVI interface, and others. It also includes the control + * and status channels for these interfaces. + */ + + void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -254,12 +264,31 @@ static const struct link_hwss dio_link_hwss = { }, }; +/** + * can_use_dio_link_hwss - Check if the link_hwss is accessible + * + * @link: Reference a link struct containing one or more sinks and the + * connective status. + * @link_res: Mappable hardware resource used to enable a link. + * + * Returns: + * Return true if the link encoder is accessible from link. + */ bool can_use_dio_link_hwss(const struct dc_link *link, const struct link_resource *link_res) { return link->link_enc != NULL; } +/** + * get_dio_link_hwss - Return link_hwss reference + * + * This function behaves like a get function to return the link_hwss populated + * in the link_hwss_dio.c file. + * + * Returns: + * Return the reference to the filled struct of link_hwss. + */ const struct link_hwss *get_dio_link_hwss(void) { return &dio_link_hwss; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h index a1f72fe378ee..45f0e091fcb0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h @@ -23,15 +23,6 @@ * */ -/** - * DOC: overview - * - * Display Input Output (DIO), is the display input and output unit in DCN. It - * includes output encoders to support different display output, like - * DisplayPort, HDMI, DVI interface, and others. It also includes the control - * and status channels for these interfaces. - */ - #ifndef __LINK_HWSS_DIO_H__ #define __LINK_HWSS_DIO_H__ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 336488c0574e..94427875bcdd 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -945,19 +945,10 @@ void optc1_set_drr( OTG_FORCE_LOCK_ON_EVENT, 0, OTG_SET_V_TOTAL_MIN_MASK_EN, 0, OTG_SET_V_TOTAL_MIN_MASK, 0); - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); - - } else { - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_SET_V_TOTAL_MIN_MASK, 0, - OTG_V_TOTAL_MIN_SEL, 0, - OTG_V_TOTAL_MAX_SEL, 0, - OTG_FORCE_LOCK_ON_EVENT, 0); - - optc->funcs->set_vtotal_min_max(optc, 0, 0); } + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); } void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c index 43417cff2c9b..b4694985a40a 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c @@ -453,6 +453,16 @@ void optc2_setup_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + /* Set the min/max selectors unconditionally so that + * DMCUB fw may change OTG timings when necessary + * TODO: Remove the w/a after fixing the issue in DMCUB firmware + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + REG_SET_8(OTG_TRIGA_CNTL, 0, OTG_TRIGA_SOURCE_SELECT, 21, OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..8e0588b1cf30 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..34b02147881d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..bb46f30d11d0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 7ecf76aea950..6e064e6ae949 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -25,7 +25,9 @@ #include "hdcp.h" +#ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif #define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/ #define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */ #define HDCP_MAX_AUX_TRANSACTION_SIZE 16 diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h new file mode 100644 index 000000000000..c2b009752f60 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_OFFSET_HEADER +#define _df_4_15_OFFSET_HEADER + +#define regNCSConfigurationRegister1 0x0901 +#define regNCSConfigurationRegister1_BASE_IDX 4 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h new file mode 100644 index 000000000000..9868a9c32795 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_SH_MASK_HEADER +#define _df_4_15_SH_MASK_HEADER + +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT 0x3 +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing_MASK 0x0003FFF8L + +#endif diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d362251..21ceafce1f9b 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ @@ -94,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -239,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf..8b7d6ed7e2ed 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h index 6f54c410c2f9..409aeec6baa9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h @@ -22,12 +22,18 @@ */ #include <asm/div64.h> -#define SHIFT_AMOUNT 16 /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ +enum ppevvmath_constants { + /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ + SHIFT_AMOUNT = 16, -#define PRECISION 5 /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + PRECISION = 5, -#define SHIFTED_2 (2 << SHIFT_AMOUNT) -#define MAX (1 << (SHIFT_AMOUNT - 1)) - 1 /* 32767 - Might change in the future */ + SHIFTED_2 = (2 << SHIFT_AMOUNT), + + /* 32767 - Might change in the future */ + MAX = (1 << (SHIFT_AMOUNT - 1)) - 1, +}; /* ------------------------------------------------------------------------------- * NEW TYPE - fINT diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb8643d25d1b..9d7454b3c314 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1924,20 +1924,12 @@ static int smu_disable_dpms(struct smu_context *smu) } /* - * For SMU 13.0.4/11 and 14.0.0, PMFW will handle the features disablement properly + * For GFX11 and subsequent APUs, PMFW will handle the features disablement properly * for gpu reset and S0i3 cases. Driver involvement is unnecessary. */ - if (amdgpu_in_reset(adev) || adev->in_s0ix) { - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 4): - case IP_VERSION(13, 0, 11): - case IP_VERSION(14, 0, 0): - case IP_VERSION(14, 0, 1): - return 0; - default: - break; - } - } + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) >= 11 && + smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) + return 0; /* * For gpu reset, runpm and hibernation through BACO, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 5d47d58944f6..8798ebfcea83 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -69,6 +69,9 @@ #define SMU_14_0_0_UMD_PSTATE_SOCCLK 678 #define SMU_14_0_0_UMD_PSTATE_FCLK 1800 +#define SMU_14_0_4_UMD_PSTATE_GFXCLK 938 +#define SMU_14_0_4_UMD_PSTATE_SOCCLK 938 + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -1296,19 +1299,28 @@ static int smu_v14_0_common_get_dpm_profile_freq(struct smu_context *smu, switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + clk_limit = SMU_14_0_4_UMD_PSTATE_GFXCLK; + else + clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &clk_limit); else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &clk_limit, NULL); break; case SMU_SOCCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + clk_limit = SMU_14_0_4_UMD_PSTATE_SOCCLK; + else + clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &clk_limit); break; case SMU_FCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); + else + clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca..e1a27903c80a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 88eefef05fae..91ad434bcdae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -794,7 +794,7 @@ static const char *smu_get_feature_name(struct smu_context *smu, size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu, char *buf) { - int8_t sort_feature[max(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; + int8_t sort_feature[MAX(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; uint64_t feature_mask; int i, feature_index; uint32_t count = 0; diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 1e9259416980..e6c7f0d64e99 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -158,7 +158,14 @@ void ast_dp_launch(struct drm_device *dev) ASTDP_HOST_EDID_READ_DONE); } +bool ast_dp_power_is_on(struct ast_device *ast) +{ + u8 vgacre3; + + vgacre3 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xe3); + return !(vgacre3 & AST_DP_PHY_SLEEP); +} void ast_dp_power_on_off(struct drm_device *dev, bool on) { diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index aae019e79bda..225817087b4d 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -391,6 +391,11 @@ static int ast_drm_freeze(struct drm_device *dev) static int ast_drm_thaw(struct drm_device *dev) { + struct ast_device *ast = to_ast_device(dev); + + ast_enable_vga(ast->ioregs); + ast_open_key(ast->ioregs); + ast_enable_mmio(dev->dev, ast->ioregs); ast_post_gpu(dev); return drm_mode_config_helper_resume(dev); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index ba3d86973995..47bab5596c16 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -472,6 +472,7 @@ void ast_init_3rdtx(struct drm_device *dev); bool ast_astdp_is_connected(struct ast_device *ast); int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata); void ast_dp_launch(struct drm_device *dev); +bool ast_dp_power_is_on(struct ast_device *ast); void ast_dp_power_on_off(struct drm_device *dev, bool no); void ast_dp_set_on_off(struct drm_device *dev, bool no); void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index dc8f639e82fd..049ee1477c33 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -28,6 +28,7 @@ * Authors: Dave Airlie <airlied@redhat.com> */ +#include <linux/delay.h> #include <linux/export.h> #include <linux/pci.h> @@ -1687,11 +1688,35 @@ static int ast_astdp_connector_helper_detect_ctx(struct drm_connector *connector struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_device *dev = connector->dev; struct ast_device *ast = to_ast_device(connector->dev); + enum drm_connector_status status = connector_status_disconnected; + struct drm_connector_state *connector_state = connector->state; + bool is_active = false; + + mutex_lock(&ast->modeset_lock); + + if (connector_state && connector_state->crtc) { + struct drm_crtc_state *crtc_state = connector_state->crtc->state; + + if (crtc_state && crtc_state->active) + is_active = true; + } + + if (!is_active && !ast_dp_power_is_on(ast)) { + ast_dp_power_on_off(dev, true); + msleep(50); + } if (ast_astdp_is_connected(ast)) - return connector_status_connected; - return connector_status_disconnected; + status = connector_status_connected; + + if (!is_active && status == connector_status_disconnected) + ast_dp_power_on_off(dev, false); + + mutex_unlock(&ast->modeset_lock); + + return status; } static const struct drm_connector_helper_funcs ast_astdp_connector_helper_funcs = { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 22bbb2d83e30..7936c2023955 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1070,21 +1070,17 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } - if (async_flip && prop != config->prop_fb_id) { + if (async_flip && + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 0869b663f17e..a4fbf1eb7ac5 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -443,10 +443,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, panel_bridge = bridge; } - if (connector_type == DRM_MODE_CONNECTOR_Unknown) { - kfree(bridge_connector); + if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - } if (bridge_connector->bridge_hdmi) ret = drmm_connector_hdmi_init(drm, connector, @@ -461,10 +459,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) { - kfree(bridge_connector); + if (ret) return ERR_PTR(ret); - } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2803ac111bbd..bfedcbf516db 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -355,7 +355,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer, err_drm_gem_vmap_unlocked: drm_gem_unlock(gem); - return 0; + return ret; } EXPORT_SYMBOL(drm_client_buffer_vmap_local); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a09..cee5eafbfb81 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index d021497841b8..3969dc548cff 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -532,7 +532,7 @@ int drm_plane_create_color_properties(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct drm_property *prop; - struct drm_prop_enum_list enum_list[max_t(int, DRM_COLOR_ENCODING_MAX, + struct drm_prop_enum_list enum_list[MAX_T(int, DRM_COLOR_ENCODING_MAX, DRM_COLOR_RANGE_MAX)]; int i, len; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 18565ec68451..56ac37ea2f27 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -624,6 +624,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { + /* + * This function may be invoked by panic() to flush the frame + * buffer, where all CPUs except the panic CPU are stopped. + * During the following schedule_work(), the panic CPU needs + * the worker_pool lock, which might be held by a stopped CPU, + * causing schedule_work() and panic() to block. Return early on + * oops_in_progress to prevent this blocking. + */ + if (oops_in_progress) + return; + drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3f84d7527793..0830cae9a4d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), @@ -414,6 +426,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), }, .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* OrangePi Neo */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "NEO-01"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d1..6c3333136737 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3903f6ead6e6..59f11af3b0a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5314,6 +5314,8 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + intel_dp->link_trained = false; + intel_dp_check_frl_training(intel_dp); intel_dp_pcon_dsc_configure(intel_dp, crtc_state); intel_dp_start_link_train(NULL, intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 1bc4ef84ff3b..d044c8e36bb3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; } -static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) +{ + return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == + DP_PHY_REPEATER_MODE_TRANSPARENT; +} + +/* + * Read the LTTPR common capabilities and switch the LTTPR PHYs to + * non-transparent mode if this is supported. Preserve the + * transparent/non-transparent mode on an active link. + * + * Return the number of detected LTTPRs in non-transparent mode or 0 if the + * LTTPRs are in transparent mode or the detection failed. + */ +static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { int lttpr_count; - int i; if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) return 0; @@ -135,6 +149,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI return 0; /* + * Don't change the mode on an active link, to prevent a loss of link + * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR + * resetting its internal state when the mode is changed from + * non-transparent to transparent. + */ + if (intel_dp->link_trained) { + if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) + goto out_reset_lttpr_count; + + return lttpr_count; + } + + /* * See DP Standard v2.0 3.6.6.1. about the explicit disabling of * non-transparent mode and the disable->enable non-transparent mode * sequence. @@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI "Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n"); intel_dp_set_lttpr_transparent_mode(intel_dp, true); - intel_dp_reset_lttpr_count(intel_dp); - return 0; + goto out_reset_lttpr_count; } + return lttpr_count; + +out_reset_lttpr_count: + intel_dp_reset_lttpr_count(intel_dp); + + return 0; +} + +static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + int lttpr_count; + int i; + + lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); + for (i = 0; i < lttpr_count; i++) intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); @@ -1482,10 +1523,10 @@ void intel_dp_start_link_train(struct intel_atomic_state *state, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; bool passed; - /* - * TODO: Reiniting LTTPRs here won't be needed once proper connector - * HW state readout is added. + * Reinit the LTTPRs here to ensure that they are switched to + * non-transparent mode. During an earlier LTTPR detection this + * could've been prevented by an active link. */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 90998b037349..292d163036b1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) struct skl_wrpll_params wrpll_params = {}; int ret; - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index a568a457e532..f590d7f48ba7 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -251,7 +251,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba86..7ce926241e83 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625..cac6d4184506 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; @@ -1084,6 +1125,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0f..5c72462d1f57 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { @@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 21829439e686..72090f52fb85 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3315,11 +3315,7 @@ static void remove_from_engine(struct i915_request *rq) static bool can_preempt(struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) > 8) - return true; - - /* GPGPU on bdw requires extra w/a; not implemented */ - return engine->class != RENDER_CLASS; + return GRAPHICS_VER(engine->i915) > 8; } static void kick_execlists(const struct i915_request *rq, int prio) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b1cd4c7a525..025a79fe5920 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2749,26 +2749,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, } static int -gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config, - struct i915_active *active) -{ - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE), - CTX_R_PWR_CLK_STATE, - }, - }; - - if (stream->engine->class != RENDER_CLASS) - return 0; - - return oa_configure_all_contexts(stream, - regs, ARRAY_SIZE(regs), - active); -} - -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, struct i915_active *active) @@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; u32 sqcnt1; @@ -2919,15 +2898,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = gen12_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; - - /* * For Gen12, performance counters are context * saved/restored. Only enable it for the context that * requested this. @@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); } - /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL, NULL); - /* disable the context save/restore or OAR counters */ if (stream->ctx) gen12_configure_oar_context(stream, NULL); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 172dfa7c3588..d40ee1b42110 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -368,8 +368,10 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) goto out_cleanup; } - mem->id = i; - i915->mm.regions[i] = mem; + if (mem) { /* Skip on non-fatal errors */ + mem->id = i; + i915->mm.regions[i] = mem; + } } for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e..77b50c56c124 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0712d0b15170..70fb003a6666 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -898,7 +898,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, * Without this the operation can timeout and we'll fallback to a * software copy, which might take several minutes to finish. */ - nouveau_fence_wait(fence, false); + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, new_reg); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 66fca95c10c7..7c97b2886807 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) ret = nouveau_fence_new(&fence, chan); if (!ret) { - ret = nouveau_fence_wait(fence, false); + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 6719353e2e13..6fb65b01d778 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { if (fence) { - nouveau_fence_wait(*fence, false); + nouveau_fence_wait(*fence, true, false); nouveau_fence_unref(fence); } else { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ba469767a20f..93f08f9479d8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -311,11 +311,39 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) return timeout - t; } +static int +nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) +{ + int ret = 0; + + while (!nouveau_fence_done(fence)) { + if (time_after_eq(jiffies, fence->timeout)) { + ret = -EBUSY; + break; + } + + __set_current_state(intr ? + TASK_INTERRUPTIBLE : + TASK_UNINTERRUPTIBLE); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + + __set_current_state(TASK_RUNNING); + return ret; +} + int -nouveau_fence_wait(struct nouveau_fence *fence, bool intr) +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) { long ret; + if (!lazy) + return nouveau_fence_wait_busy(fence, intr); + ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 1b63197b744a..8bc065acfe35 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -int nouveau_fence_wait(struct nouveau_fence *, bool intr); +int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); struct nouveau_fence_chan { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2e535caa7d6e..5a887d67dc0e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -928,7 +928,7 @@ revalidate: } if (sync) { - if (!(ret = nouveau_fence_wait(fence, false))) { + if (!(ret = nouveau_fence_wait(fence, false, false))) { if ((ret = dma_fence_get_status(&fence->base)) == 1) ret = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b58ab595faf8..cd95446d6851 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -64,7 +64,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); obj = ERR_PTR(-ENOMEM); goto unlock; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 9402fa320a7e..48f105239f42 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1803,6 +1803,7 @@ nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0); return nouveau_bo_validate(nvbo, true, false); } diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d2..64e440a2649b 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 1fe6e0d883c7..e5577d2a19ef 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -33,8 +33,10 @@ #include "evergreen_reg_safe.h" #include "cayman_reg_safe.h" +#ifndef MIN #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a2946..dec6913cec5b 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index c3758faa1b83..d8d0e4d1682f 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index a47f00b443d3..5982941d933b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -265,7 +265,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) struct v3d_dev *v3d; int ret; u32 mmu_debug; - u32 ident1; + u32 ident1, ident3; u64 mask; v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm); @@ -298,6 +298,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ + ident3 = V3D_READ(V3D_HUB_IDENT3); + v3d->rev = V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV); + if (v3d->ver >= 71) v3d->max_counters = V3D_V71_NUM_PERFCOUNTERS; else if (v3d->ver >= 42) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 099b962bdfde..a0febdb6f214 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -98,10 +98,12 @@ struct v3d_perfmon { struct v3d_dev { struct drm_device drm; - /* Short representation (e.g. 33, 41) of the V3D tech version - * and revision. - */ + /* Short representation (e.g. 33, 41) of the V3D tech version */ int ver; + + /* Short representation (e.g. 5, 6) of the V3D tech revision */ + int rev; + bool single_irq_line; /* Different revisions of V3D have different total number of performance @@ -563,6 +565,10 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); /* v3d_sched.c */ +void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count); +void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count); void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 03df37a3acf5..b8682818bafa 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -73,24 +73,44 @@ v3d_sched_job_free(struct drm_sched_job *sched_job) v3d_job_cleanup(job); } +void +v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + +void +v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + static void v3d_cpu_job_free(struct drm_sched_job *sched_job) { struct v3d_cpu_job *job = to_cpu_job(sched_job); - struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; - struct v3d_performance_query_info *performance_query = &job->performance_query; - if (timestamp_query->queries) { - for (int i = 0; i < timestamp_query->count; i++) - drm_syncobj_put(timestamp_query->queries[i].syncobj); - kvfree(timestamp_query->queries); - } + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); - if (performance_query->queries) { - for (int i = 0; i < performance_query->count; i++) - drm_syncobj_put(performance_query->queries[i].syncobj); - kvfree(performance_query->queries); - } + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); v3d_job_cleanup(&job->base); } @@ -295,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -315,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); @@ -331,7 +359,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); struct drm_v3d_submit_csd *args = &indirect_csd->job->args; - u32 *wg_counts; + struct v3d_dev *v3d = job->base.v3d; + u32 num_batches, *wg_counts; v3d_get_bo_vaddr(bo); v3d_get_bo_vaddr(indirect); @@ -344,8 +373,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]); + + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) + args->cfg[4] = num_batches - 1; + else + args->cfg[4] = num_batches; + + WARN_ON(args->cfg[4] == ~0); for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 88f63d526b22..4cdfabbf4964 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -452,6 +452,8 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_timestamp_query timestamp; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -480,26 +482,34 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, offsets = u64_to_user_ptr(timestamp.offsets); syncs = u64_to_user_ptr(timestamp.syncs); - for (int i = 0; i < timestamp.count; i++) { + for (i = 0; i < timestamp.count; i++) { u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = timestamp.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -509,6 +519,8 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, { u32 __user *syncs; struct drm_v3d_reset_timestamp_query reset; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -533,21 +545,29 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; job->timestamp_query.queries[i].offset = reset.offset + 8 * i; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = reset.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } /* Get data for the copy timestamp query results job submission. */ @@ -558,7 +578,8 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_copy_timestamp_query copy; - int i; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -591,18 +612,22 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = copy.count; @@ -613,6 +638,10 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -623,6 +652,8 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_reset_performance_query reset; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -637,6 +668,9 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, if (copy_from_user(&reset, ext, sizeof(reset))) return -EFAULT; + if (reset.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(reset.count, @@ -648,39 +682,47 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < reset.nperfmons; j++) { + for (j = 0; j < reset.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = reset.count; job->performance_query.nperfmons = reset.nperfmons; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } static int @@ -691,6 +733,8 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_copy_performance_query copy; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -708,6 +752,9 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, if (copy.pad) return -EINVAL; + if (copy.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(copy.count, @@ -719,34 +766,38 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(copy.syncs); kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); - for (int i = 0; i < copy.count; i++) { + for (i = 0; i < copy.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < copy.nperfmons; j++) { + for (j = 0; j < copy.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = copy.count; job->performance_query.nperfmons = copy.nperfmons; @@ -759,6 +810,10 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 1c7c7f61a222..7d34cf83f5f2 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -48,7 +48,7 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { - u32 context = submit->fence_ctx + submit->ring_idx; + u64 context = submit->fence_ctx + submit->ring_idx; if (dma_fence_match_context(in_fence, context)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h index b0d87c5f58d8..1ac3cb151b11 100644 --- a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h +++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT + * + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -31,6 +33,10 @@ #include <drm/vmwgfx_drm.h> +#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) ((svga3d_flags) >> 32) +#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ + ((svga3d_flags) & ((uint64_t)U32_MAX)) + static inline u32 clamped_umul32(u32 a, u32 b) { uint64_t tmp = (uint64_t) a*b; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 00144632c600..f42ebc4a7c22 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2011-2023 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,15 +28,39 @@ #include "vmwgfx_bo.h" #include "vmwgfx_drv.h" - +#include "vmwgfx_resource_priv.h" #include <drm/ttm/ttm_placement.h> static void vmw_bo_release(struct vmw_bo *vbo) { + struct vmw_resource *res; + WARN_ON(vbo->tbo.base.funcs && kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); + + xa_destroy(&vbo->detached_resources); + WARN_ON(vbo->is_dumb && !vbo->dumb_surface); + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + WARN_ON(vbo != res->guest_memory_bo); + WARN_ON(!res->guest_memory_bo); + if (res->guest_memory_bo) { + /* Reserve and switch the backing mob. */ + mutex_lock(&res->dev_priv->cmdbuf_mutex); + (void)vmw_resource_reserve(res, false, true); + vmw_resource_mob_detach(res); + if (res->coherent) + vmw_bo_dirty_release(res->guest_memory_bo); + res->guest_memory_bo = NULL; + res->guest_memory_offset = 0; + vmw_resource_unreserve(res, false, false, false, NULL, + 0); + mutex_unlock(&res->dev_priv->cmdbuf_mutex); + } + vmw_surface_unreference(&vbo->dumb_surface); + } drm_gem_object_release(&vbo->tbo.base); } @@ -326,6 +350,11 @@ void vmw_bo_pin_reserved(struct vmw_bo *vbo, bool pin) */ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) { + return vmw_bo_map_and_cache_size(vbo, vbo->tbo.base.size); +} + +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) +{ struct ttm_buffer_object *bo = &vbo->tbo; bool not_used; void *virtual; @@ -335,9 +364,10 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) if (virtual) return virtual; - ret = ttm_bo_kmap(bo, 0, PFN_UP(bo->base.size), &vbo->map); + ret = ttm_bo_kmap(bo, 0, PFN_UP(size), &vbo->map); if (ret) - DRM_ERROR("Buffer object map failed: %d.\n", ret); + DRM_ERROR("Buffer object map failed: %d (size: bo = %zu, map = %zu).\n", + ret, bo->base.size, size); return ttm_kmap_obj_virtual(&vbo->map, ¬_used); } @@ -390,6 +420,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; + xa_init(&vmw_bo->detached_resources); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); @@ -654,52 +685,6 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, dma_fence_put(&fence->base); } - -/** - * vmw_dumb_create - Create a dumb kms buffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @args: Pointer to a struct drm_mode_create_dumb structure - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm create_dumb functionality. - * Note that this is very similar to the vmw_bo_alloc ioctl, except - * that the arguments have a different format. - */ -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_bo *vbo; - int cpp = DIV_ROUND_UP(args->bpp, 8); - int ret; - - switch (cpp) { - case 1: /* DRM_FORMAT_C8 */ - case 2: /* DRM_FORMAT_RGB565 */ - case 4: /* DRM_FORMAT_XRGB8888 */ - break; - default: - /* - * Dumb buffers don't allow anything else. - * This is tested via IGT's dumb_buffers - */ - return -EINVAL; - } - - args->pitch = args->width * cpp; - args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); - - ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, - args->size, &args->handle, - &vbo); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&vbo->tbo.base); - return ret; -} - /** * vmw_bo_swap_notify - swapout notify callback. * @@ -853,3 +838,43 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } + +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); +} + +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_erase(&vbo->detached_resources, (unsigned long)res); +} + +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo) +{ + unsigned long index; + struct vmw_resource *res = NULL; + struct vmw_surface *surf = NULL; + struct rb_node *rb_itr = vbo->res_tree.rb_node; + + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + goto out; + } + + xa_for_each(&vbo->detached_resources, index, res) { + if (res->func->res_type == vmw_res_surface) + goto out; + } + + for (rb_itr = rb_first(&vbo->res_tree); rb_itr; + rb_itr = rb_next(rb_itr)) { + res = rb_entry(rb_itr, struct vmw_resource, mob_node); + if (res->func->res_type == vmw_res_surface) + goto out; + } + +out: + if (res) + surf = vmw_res_to_srf(res); + return surf; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index f349642e6190..62b4342d5f7c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2023-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ #include <linux/rbtree_types.h> #include <linux/types.h> +#include <linux/xarray.h> struct vmw_bo_dirty; struct vmw_fence_obj; struct vmw_private; struct vmw_resource; +struct vmw_surface; enum vmw_bo_domain { VMW_BO_DOMAIN_SYS = BIT(0), @@ -85,11 +88,15 @@ struct vmw_bo { struct rb_root res_tree; u32 res_prios[TTM_MAX_BO_PRIORITY]; + struct xarray detached_resources; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; struct vmw_bo_dirty *dirty; + + bool is_dumb; + struct vmw_surface *dumb_surface; }; void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain); @@ -124,15 +131,21 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); void *vmw_bo_map_and_cache(struct vmw_bo *vbo); +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size); void vmw_bo_unmap(struct vmw_bo *vbo); void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); + int vmw_user_bo_lookup(struct drm_file *filp, u32 handle, struct vmw_bo **out); + /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority * according to attached resources diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a1ce41e1c468..32f50e595809 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -763,6 +764,26 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); /** + * User handles + */ +struct vmw_user_object { + struct vmw_surface *surface; + struct vmw_bo *buffer; +}; + +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, + u32 handle, struct vmw_user_object *uo); +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo); +void vmw_user_object_unref(struct vmw_user_object *uo); +bool vmw_user_object_is_null(struct vmw_user_object *uo); +struct vmw_surface *vmw_user_object_surface(struct vmw_user_object *uo); +struct vmw_bo *vmw_user_object_buffer(struct vmw_user_object *uo); +void *vmw_user_object_map(struct vmw_user_object *uo); +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); +void vmw_user_object_unmap(struct vmw_user_object *uo); +bool vmw_user_object_is_mapped(struct vmw_user_object *uo); + +/** * Resource utilities - vmwgfx_resource.c */ struct vmw_user_resource_conv; @@ -776,11 +797,6 @@ extern int vmw_resource_validate(struct vmw_resource *res, bool intr, extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); -extern int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf); extern int vmw_user_resource_lookup_handle( struct vmw_private *dev_priv, struct ttm_object_file *tfile, @@ -1057,9 +1073,6 @@ int vmw_kms_suspend(struct drm_device *dev); int vmw_kms_resume(struct drm_device *dev); void vmw_kms_lost_device(struct drm_device *dev); -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); @@ -1176,6 +1189,15 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, int vmw_gb_surface_define(struct vmw_private *dev_priv, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out); +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); /* * Shader management - vmwgfx_shader.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 5efc6a766f64..588d50ababf6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -32,7 +32,6 @@ #define VMW_FENCE_WRAP (1 << 31) struct vmw_fence_manager { - int num_fence_objects; struct vmw_private *dev_priv; spinlock_t lock; struct list_head fence_list; @@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); - struct vmw_fence_manager *fman = fman_from_fence(fence); - spin_lock(&fman->lock); - list_del_init(&fence->head); - --fman->num_fence_objects; - spin_unlock(&fman->lock); + if (!list_empty(&fence->head)) { + spin_lock(&fman->lock); + list_del_init(&fence->head); + spin_unlock(&fman->lock); + } fence->destroy(fence); } @@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = { .release = vmw_fence_obj_destroy, }; - /* * Execute signal actions on fences recently signaled. * This is done from a workqueue so we don't have to execute @@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, goto out_unlock; } list_add_tail(&fence->head, &fman->fence_list); - ++fman->num_fence_objects; out_unlock: spin_unlock(&fman->lock); @@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - struct vmw_fence_obj *fence; + struct vmw_fence_obj *fence, *next_fence; if (likely(!fman->seqno_valid)) return false; @@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, return false; fman->seqno_valid = false; - list_for_each_entry(fence, &fman->fence_list, head) { + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; vmw_fence_goal_write(fman->dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 07185c108218..b9857f37ca1a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* - * Copyright 2021-2023 VMware, Inc. + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -78,6 +79,59 @@ static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages); } +static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + int ret; + + if (obj->import_attach) { + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); + if (!ret) { + if (drm_WARN_ON(obj->dev, map->is_iomem)) { + dma_buf_vunmap(obj->import_attach->dmabuf, map); + return -EIO; + } + } + } else { + ret = ttm_bo_vmap(bo, map); + } + + return ret; +} + +static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + if (obj->import_attach) + dma_buf_vunmap(obj->import_attach->dmabuf, map); + else + drm_gem_ttm_vunmap(obj, map); +} + +static int vmw_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + if (obj->import_attach) { + /* + * Reset both vm_ops and vm_private_data, so we don't end up with + * vm_ops pointing to our implementation if the dma-buf backend + * doesn't set those fields. + */ + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + + ret = dma_buf_mmap(obj->dma_buf, vma, 0); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + if (!ret) + drm_gem_object_put(obj); + + return ret; + } + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct vm_operations_struct vmw_vm_ops = { .pfn_mkwrite = vmw_bo_vm_mkwrite, .page_mkwrite = vmw_bo_vm_mkwrite, @@ -94,9 +148,9 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .pin = vmw_gem_object_pin, .unpin = vmw_gem_object_unpin, .get_sg_table = vmw_gem_object_get_sg_table, - .vmap = drm_gem_ttm_vmap, - .vunmap = drm_gem_ttm_vunmap, - .mmap = drm_gem_ttm_mmap, + .vmap = vmw_gem_vmap, + .vunmap = vmw_gem_vunmap, + .mmap = vmw_gem_mmap, .vm_ops = &vmw_vm_ops, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 00c4ff684130..288ed0bb75cb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -193,13 +194,16 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h) */ static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps) { - if (vps->surf) { - if (vps->surf_mapped) - return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - return vps->surf->snooper.image; - } else if (vps->bo) - return vmw_bo_map_and_cache(vps->bo); - return NULL; + struct vmw_surface *surf; + + if (vmw_user_object_is_null(&vps->uo)) + return NULL; + + surf = vmw_user_object_surface(&vps->uo); + if (surf && !vmw_user_object_is_mapped(&vps->uo)) + return surf->snooper.image; + + return vmw_user_object_map(&vps->uo); } static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, @@ -536,22 +540,16 @@ void vmw_du_primary_plane_destroy(struct drm_plane *plane) * vmw_du_plane_unpin_surf - unpins resource associated with a framebuffer surface * * @vps: plane state associated with the display surface - * @unreference: true if we also want to unreference the display. */ -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference) +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps) { - if (vps->surf) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + + if (surf) { if (vps->pinned) { - vmw_resource_unpin(&vps->surf->res); + vmw_resource_unpin(&surf->res); vps->pinned--; } - - if (unreference) { - if (vps->pinned) - DRM_ERROR("Surface still pinned\n"); - vmw_surface_unreference(&vps->surf); - } } } @@ -572,7 +570,7 @@ vmw_du_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - vmw_du_plane_unpin_surf(vps, false); + vmw_du_plane_unpin_surf(vps); } @@ -661,25 +659,14 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane, struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } + if (!vmw_user_object_is_null(&vps->uo)) + vmw_user_object_unmap(&vps->uo); vmw_du_cursor_plane_unmap_cm(vps); vmw_du_put_cursor_mob(vcp, vps); - vmw_du_plane_unpin_surf(vps, false); - - if (vps->surf) { - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; - } + vmw_du_plane_unpin_surf(vps); + vmw_user_object_unref(&vps->uo); } @@ -698,64 +685,48 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); + struct vmw_bo *bo = NULL; int ret = 0; - if (vps->surf) { - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; + if (!vmw_user_object_is_null(&vps->uo)) { + vmw_user_object_unmap(&vps->uo); + vmw_user_object_unref(&vps->uo); } if (fb) { if (vmw_framebuffer_to_vfb(fb)->bo) { - vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer; - vmw_bo_reference(vps->bo); + vps->uo.buffer = vmw_framebuffer_to_vfbd(fb)->buffer; + vps->uo.surface = NULL; } else { - vps->surf = vmw_framebuffer_to_vfbs(fb)->surface; - vmw_surface_reference(vps->surf); + memcpy(&vps->uo, &vmw_framebuffer_to_vfbs(fb)->uo, sizeof(vps->uo)); } + vmw_user_object_ref(&vps->uo); } - if (!vps->surf && vps->bo) { - const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { + struct ttm_operation_ctx ctx = {false, false}; - /* - * Not using vmw_bo_map_and_cache() helper here as we need to - * reserve the ttm_buffer_object first which - * vmw_bo_map_and_cache() omits. - */ - ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL); - - if (unlikely(ret != 0)) + ret = ttm_bo_reserve(&bo->tbo, true, false, NULL); + if (ret != 0) return -ENOMEM; - ret = ttm_bo_kmap(&vps->bo->tbo, 0, PFN_UP(size), &vps->bo->map); - - ttm_bo_unreserve(&vps->bo->tbo); - - if (unlikely(ret != 0)) + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret != 0) return -ENOMEM; - } else if (vps->surf && !vps->bo && vps->surf->res.guest_memory_bo) { - WARN_ON(vps->surf->snooper.image); - ret = ttm_bo_reserve(&vps->surf->res.guest_memory_bo->tbo, true, false, - NULL); - if (unlikely(ret != 0)) - return -ENOMEM; - vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - ttm_bo_unreserve(&vps->surf->res.guest_memory_bo->tbo); - vps->surf_mapped = true; + vmw_bo_pin_reserved(bo, true); + if (vmw_framebuffer_to_vfb(fb)->bo) { + const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + + (void)vmw_bo_map_and_cache_size(bo, size); + } else { + vmw_bo_map_and_cache(bo); + } + ttm_bo_unreserve(&bo->tbo); } - if (vps->surf || vps->bo) { + if (!vmw_user_object_is_null(&vps->uo)) { vmw_du_get_cursor_mob(vcp, vps); vmw_du_cursor_plane_map_cm(vps); } @@ -777,14 +748,17 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); + struct vmw_bo *old_bo = NULL; + struct vmw_bo *new_bo = NULL; s32 hotspot_x, hotspot_y; + int ret; hotspot_x = du->hotspot_x + new_state->hotspot_x; hotspot_y = du->hotspot_y + new_state->hotspot_y; - du->cursor_surface = vps->surf; + du->cursor_surface = vmw_user_object_surface(&vps->uo); - if (!vps->surf && !vps->bo) { + if (vmw_user_object_is_null(&vps->uo)) { vmw_cursor_update_position(dev_priv, false, 0, 0); return; } @@ -792,10 +766,26 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, vps->cursor.hotspot_x = hotspot_x; vps->cursor.hotspot_y = hotspot_y; - if (vps->surf) { + if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + + if (!vmw_user_object_is_null(&old_vps->uo)) { + old_bo = vmw_user_object_buffer(&old_vps->uo); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + if (ret != 0) + return; } + if (!vmw_user_object_is_null(&vps->uo)) { + new_bo = vmw_user_object_buffer(&vps->uo); + if (old_bo != new_bo) { + ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); + if (ret != 0) + return; + } else { + new_bo = NULL; + } + } if (!vmw_du_cursor_plane_has_changed(old_vps, vps)) { /* * If it hasn't changed, avoid making the device do extra @@ -813,6 +803,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + if (new_bo) + ttm_bo_unreserve(&new_bo->tbo); + du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; @@ -913,7 +908,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, } if (!vmw_framebuffer_to_vfb(fb)->bo) { - surface = vmw_framebuffer_to_vfbs(fb)->surface; + surface = vmw_user_object_surface(&vmw_framebuffer_to_vfbs(fb)->uo); WARN_ON(!surface); @@ -1074,12 +1069,7 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) memset(&vps->cursor, 0, sizeof(vps->cursor)); /* Each ref counted resource needs to be acquired again */ - if (vps->surf) - (void) vmw_surface_reference(vps->surf); - - if (vps->bo) - (void) vmw_bo_reference(vps->bo); - + vmw_user_object_ref(&vps->uo); state = &vps->base; __drm_atomic_helper_plane_duplicate_state(plane, state); @@ -1128,11 +1118,7 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(state); /* Should have been freed by cleanup_fb */ - if (vps->surf) - vmw_surface_unreference(&vps->surf); - - if (vps->bo) - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); drm_atomic_helper_plane_destroy_state(plane, state); } @@ -1227,7 +1213,7 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer) vmw_framebuffer_to_vfbs(framebuffer); drm_framebuffer_cleanup(framebuffer); - vmw_surface_unreference(&vfbs->surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); } @@ -1272,29 +1258,41 @@ int vmw_kms_readback(struct vmw_private *dev_priv, return -ENOSYS; } +static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb); + struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo); + + return drm_gem_handle_create(file_priv, &bo->tbo.base, handle); +} static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { + .create_handle = vmw_framebuffer_surface_create_handle, .destroy = vmw_framebuffer_surface_destroy, .dirty = drm_atomic_helper_dirtyfb, }; static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, - struct vmw_surface *surface, + struct vmw_user_object *uo, struct vmw_framebuffer **out, const struct drm_mode_fb_cmd2 - *mode_cmd, - bool is_bo_proxy) + *mode_cmd) { struct drm_device *dev = &dev_priv->drm; struct vmw_framebuffer_surface *vfbs; enum SVGA3dSurfaceFormat format; + struct vmw_surface *surface; int ret; /* 3D is only supported on HWv8 and newer hosts */ if (dev_priv->active_display_unit == vmw_du_legacy) return -ENOSYS; + surface = vmw_user_object_surface(uo); + /* * Sanity checks. */ @@ -1357,8 +1355,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, } drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); - vfbs->surface = vmw_surface_reference(surface); - vfbs->is_bo_proxy = is_bo_proxy; + memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); + vmw_user_object_ref(&vfbs->uo); *out = &vfbs->base; @@ -1370,7 +1368,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, return 0; out_err2: - vmw_surface_unreference(&surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); out_err1: return ret; @@ -1386,7 +1384,6 @@ static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb, { struct vmw_framebuffer_bo *vfbd = vmw_framebuffer_to_vfbd(fb); - return drm_gem_handle_create(file_priv, &vfbd->buffer->tbo.base, handle); } @@ -1407,86 +1404,6 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { .dirty = drm_atomic_helper_dirtyfb, }; -/** - * vmw_create_bo_proxy - create a proxy surface for the buffer object - * - * @dev: DRM device - * @mode_cmd: parameters for the new surface - * @bo_mob: MOB backing the buffer object - * @srf_out: newly created surface - * - * When the content FB is a buffer object, we create a surface as a proxy to the - * same buffer. This way we can do a surface copy rather than a surface DMA. - * This is a more efficient approach - * - * RETURNS: - * 0 on success, error code otherwise - */ -static int vmw_create_bo_proxy(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct vmw_bo *bo_mob, - struct vmw_surface **srf_out) -{ - struct vmw_surface_metadata metadata = {0}; - uint32_t format; - struct vmw_resource *res; - unsigned int bytes_pp; - int ret; - - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - format = SVGA3D_X8R8G8B8; - bytes_pp = 4; - break; - - case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB1555: - format = SVGA3D_R5G6B5; - bytes_pp = 2; - break; - - case 8: - format = SVGA3D_P8; - bytes_pp = 1; - break; - - default: - DRM_ERROR("Invalid framebuffer format %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } - - metadata.format = format; - metadata.mip_levels[0] = 1; - metadata.num_sizes = 1; - metadata.base_size.width = mode_cmd->pitches[0] / bytes_pp; - metadata.base_size.height = mode_cmd->height; - metadata.base_size.depth = 1; - metadata.scanout = true; - - ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out); - if (ret) { - DRM_ERROR("Failed to allocate proxy content buffer\n"); - return ret; - } - - res = &(*srf_out)->res; - - /* Reserve and switch the backing mob. */ - mutex_lock(&res->dev_priv->cmdbuf_mutex); - (void) vmw_resource_reserve(res, false, true); - vmw_user_bo_unref(&res->guest_memory_bo); - res->guest_memory_bo = vmw_user_bo_ref(bo_mob); - res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, 0); - mutex_unlock(&res->dev_priv->cmdbuf_mutex); - - return 0; -} - - - static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, @@ -1565,55 +1482,24 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * vmw_kms_new_framebuffer - Create a new framebuffer. * * @dev_priv: Pointer to device private struct. - * @bo: Pointer to buffer object to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @surface: Pointer to a surface to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @only_2d: No presents will occur to this buffer object based framebuffer. - * This helps the code to do some important optimizations. + * @uo: Pointer to user object to wrap the kms framebuffer around. + * Either the buffer or surface inside the user object must be NULL. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; - bool is_bo_proxy = false; int ret; - /* - * We cannot use the SurfaceDMA command in an non-accelerated VM, - * therefore, wrap the buffer object in a surface so we can use the - * SurfaceCopy command. - */ - if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height) && - bo && only_2d && - mode_cmd->width > 64 && /* Don't create a proxy for cursor */ - dev_priv->active_display_unit == vmw_du_screen_target) { - ret = vmw_create_bo_proxy(&dev_priv->drm, mode_cmd, - bo, &surface); - if (ret) - return ERR_PTR(ret); - - is_bo_proxy = true; - } - /* Create the new framebuffer depending one what we have */ - if (surface) { - ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, - mode_cmd, - is_bo_proxy); - /* - * vmw_create_bo_proxy() adds a reference that is no longer - * needed - */ - if (is_bo_proxy) - vmw_surface_unreference(&surface); - } else if (bo) { - ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb, + if (vmw_user_object_surface(uo)) { + ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, + mode_cmd); + } else if (uo->buffer) { + ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, mode_cmd); } else { BUG(); @@ -1635,14 +1521,12 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, { struct vmw_private *dev_priv = vmw_priv(dev); struct vmw_framebuffer *vfb = NULL; - struct vmw_surface *surface = NULL; - struct vmw_bo *bo = NULL; + struct vmw_user_object uo = {0}; int ret; /* returns either a bo or surface */ - ret = vmw_user_lookup_handle(dev_priv, file_priv, - mode_cmd->handles[0], - &surface, &bo); + ret = vmw_user_object_lookup(dev_priv, file_priv, mode_cmd->handles[0], + &uo); if (ret) { DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", mode_cmd->handles[0], mode_cmd->handles[0]); @@ -1650,7 +1534,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - if (!bo && + if (vmw_user_object_surface(&uo) && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, @@ -1659,20 +1543,15 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, bo, surface, - !(dev_priv->capabilities & SVGA_CAP_3D), - mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; } err_out: - /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) - vmw_user_bo_unref(&bo); - if (surface) - vmw_surface_unreference(&surface); + /* vmw_user_object_lookup takes one ref so does new_fb */ + vmw_user_object_unref(&uo); if (ret) { DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); @@ -2585,72 +2464,6 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, } /** - * vmw_kms_update_proxy - Helper function to update a proxy surface from - * its backing MOB. - * - * @res: Pointer to the surface resource - * @clips: Clip rects in framebuffer (surface) space. - * @num_clips: Number of clips in @clips. - * @increment: Integer with which to increment the clip counter when looping. - * Used to skip a predetermined number of clip rects. - * - * This function makes sure the proxy surface is updated from its backing MOB - * using the region given by @clips. The surface resource @res and its backing - * MOB needs to be reserved and validated on call. - */ -int vmw_kms_update_proxy(struct vmw_resource *res, - const struct drm_clip_rect *clips, - unsigned num_clips, - int increment) -{ - struct vmw_private *dev_priv = res->dev_priv; - struct drm_vmw_size *size = &vmw_res_to_srf(res)->metadata.base_size; - struct { - SVGA3dCmdHeader header; - SVGA3dCmdUpdateGBImage body; - } *cmd; - SVGA3dBox *box; - size_t copy_size = 0; - int i; - - if (!clips) - return 0; - - cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd) * num_clips); - if (!cmd) - return -ENOMEM; - - for (i = 0; i < num_clips; ++i, clips += increment, ++cmd) { - box = &cmd->body.box; - - cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd->header.size = sizeof(cmd->body); - cmd->body.image.sid = res->id; - cmd->body.image.face = 0; - cmd->body.image.mipmap = 0; - - if (clips->x1 > size->width || clips->x2 > size->width || - clips->y1 > size->height || clips->y2 > size->height) { - DRM_ERROR("Invalid clips outsize of framebuffer.\n"); - return -EINVAL; - } - - box->x = clips->x1; - box->y = clips->y1; - box->z = 0; - box->w = clips->x2 - clips->x1; - box->h = clips->y2 - clips->y1; - box->d = 1; - - copy_size += sizeof(*cmd); - } - - vmw_cmd_commit(dev_priv, copy_size); - - return 0; -} - -/** * vmw_kms_create_implicit_placement_property - Set up the implicit placement * property. * @@ -2784,8 +2597,9 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) } else { struct vmw_framebuffer_surface *vfbs = container_of(update->vfb, typeof(*vfbs), base); + struct vmw_surface *surf = vmw_user_object_surface(&vfbs->uo); - ret = vmw_validation_add_resource(&val_ctx, &vfbs->surface->res, + ret = vmw_validation_add_resource(&val_ctx, &surf->res, 0, VMW_RES_DIRTY_NONE, NULL, NULL); } @@ -2941,3 +2755,93 @@ int vmw_connector_get_modes(struct drm_connector *connector) return num_modes; } + +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_ref(uo->buffer); + else if (uo->surface) + vmw_surface_reference(uo->surface); + return uo; +} + +void vmw_user_object_unref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_unref(&uo->buffer); + else if (uo->surface) + vmw_surface_unreference(&uo->surface); +} + +struct vmw_bo * +vmw_user_object_buffer(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer; + else if (uo->surface) + return uo->surface->res.guest_memory_bo; + return NULL; +} + +struct vmw_surface * +vmw_user_object_surface(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer->dumb_surface; + return uo->surface; +} + +void *vmw_user_object_map(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache(bo); +} + +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache_size(bo, size); +} + +void vmw_user_object_unmap(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + int ret; + + WARN_ON(!bo); + + /* Fence the mob creation so we are guarateed to have the mob */ + ret = ttm_bo_reserve(&bo->tbo, false, false, NULL); + if (ret != 0) + return; + + vmw_bo_unmap(bo); + vmw_bo_pin_reserved(bo, false); + + ttm_bo_unreserve(&bo->tbo); +} + +bool vmw_user_object_is_mapped(struct vmw_user_object *uo) +{ + struct vmw_bo *bo; + + if (!uo || vmw_user_object_is_null(uo)) + return false; + + bo = vmw_user_object_buffer(uo); + + if (WARN_ON(!bo)) + return false; + + WARN_ON(bo->map.bo && !bo->map.virtual); + return bo->map.virtual; +} + +bool vmw_user_object_is_null(struct vmw_user_object *uo) +{ + return !uo->buffer && !uo->surface; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bf24f2f0dcfc..6141fadf81ef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -221,11 +222,9 @@ struct vmw_framebuffer { struct vmw_framebuffer_surface { struct vmw_framebuffer base; - struct vmw_surface *surface; - bool is_bo_proxy; /* true if this is proxy surface for DMA buf */ + struct vmw_user_object uo; }; - struct vmw_framebuffer_bo { struct vmw_framebuffer base; struct vmw_bo *buffer; @@ -277,8 +276,7 @@ struct vmw_cursor_plane_state { */ struct vmw_plane_state { struct drm_plane_state base; - struct vmw_surface *surf; - struct vmw_bo *bo; + struct vmw_user_object uo; int content_fb_type; unsigned long bo_size; @@ -457,9 +455,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, uint32_t num_clips); struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); @@ -486,8 +482,7 @@ void vmw_du_plane_reset(struct drm_plane *plane); struct drm_plane_state *vmw_du_plane_duplicate_state(struct drm_plane *plane); void vmw_du_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference); +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps); int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 5befc2719a49..39949e0a493f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -147,8 +148,9 @@ static int vmw_ldu_fb_pin(struct vmw_framebuffer *vfb) struct vmw_bo *buf; int ret; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); if (!buf) return 0; @@ -169,8 +171,10 @@ static int vmw_ldu_fb_unpin(struct vmw_framebuffer *vfb) struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_bo *buf; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); + if (WARN_ON(!buf)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index c45b4724e414..e20f64b67b26 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -92,7 +92,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv, { struct vmw_escape_video_flush *flush; size_t fifo_size; - bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object); + bool have_so = (dev_priv->active_display_unit != vmw_du_legacy); int i, num_items; SVGAGuestPtr ptr; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index c99cad444991..598b90ac7590 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2013-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,6 +32,7 @@ */ #include "vmwgfx_drv.h" +#include "vmwgfx_bo.h" #include "ttm_object.h" #include <linux/dma-buf.h> @@ -88,13 +90,35 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, uint32_t handle, uint32_t flags, int *prime_fd) { + struct vmw_private *vmw = vmw_priv(dev); struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo; int ret; + int surf_handle; - if (handle > VMWGFX_NUM_MOB) + if (handle > VMWGFX_NUM_MOB) { ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); - else - ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + } else { + ret = vmw_user_bo_lookup(file_priv, handle, &vbo); + if (ret) + return ret; + if (vbo && vbo->is_dumb) { + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, + flags, prime_fd); + } else { + surf_handle = vmw_lookup_surface_handle_for_buffer(vmw, + vbo, + handle); + if (surf_handle > 0) + ret = ttm_prime_handle_to_fd(tfile, surf_handle, + flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, + handle, flags, + prime_fd); + } + vmw_user_bo_unref(&vbo); + } return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 848dba09981b..a73af8a355fb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -58,6 +59,7 @@ void vmw_resource_mob_attach(struct vmw_resource *res) rb_link_node(&res->mob_node, parent, new); rb_insert_color(&res->mob_node, &gbo->res_tree); + vmw_bo_del_detached_resource(gbo, res); vmw_bo_prio_add(gbo, res->used_prio); } @@ -287,28 +289,35 @@ out_bad_resource: * * The pointer this pointed at by out_surf and out_buf needs to be null. */ -int vmw_user_lookup_handle(struct vmw_private *dev_priv, +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf) + u32 handle, + struct vmw_user_object *uo) { struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile; struct vmw_resource *res; int ret; - BUG_ON(*out_surf || *out_buf); + WARN_ON(uo->surface || uo->buffer); ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle, user_surface_converter, &res); if (!ret) { - *out_surf = vmw_res_to_srf(res); + uo->surface = vmw_res_to_srf(res); return 0; } - *out_surf = NULL; - ret = vmw_user_bo_lookup(filp, handle, out_buf); + uo->surface = NULL; + ret = vmw_user_bo_lookup(filp, handle, &uo->buffer); + if (!ret && !uo->buffer->is_dumb) { + uo->surface = vmw_lookup_surface_for_buffer(dev_priv, + uo->buffer, + handle); + if (uo->surface) + vmw_user_bo_unref(&uo->buffer); + } + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index df0039a8ef29..0f4bfd98480a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2011-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -240,7 +241,7 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc) struct vmw_connector_state *vmw_conn_state; int x, y; - sou->buffer = vps->bo; + sou->buffer = vmw_user_object_buffer(&vps->uo); conn_state = sou->base.connector.state; vmw_conn_state = vmw_connector_state_to_vcs(conn_state); @@ -376,10 +377,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); struct drm_crtc *crtc = plane->state->crtc ? plane->state->crtc : old_state->crtc; + struct vmw_bo *bo = vmw_user_object_buffer(&vps->uo); - if (vps->bo) - vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false); - vmw_bo_unreference(&vps->bo); + if (bo) + vmw_bo_unpin(vmw_priv(crtc->dev), bo, false); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; vmw_du_plane_cleanup_fb(plane, old_state); @@ -411,9 +413,10 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, .bo_type = ttm_bo_type_device, .pin = true }; + struct vmw_bo *bo = NULL; if (!new_fb) { - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; return 0; @@ -422,17 +425,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, bo_params.size = new_state->crtc_w * new_state->crtc_h * 4; dev_priv = vmw_priv(crtc->dev); - if (vps->bo) { + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { if (vps->bo_size == bo_params.size) { /* * Note that this might temporarily up the pin-count * to 2, until cleanup_fb() is called. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, - true); + return vmw_bo_pin_in_vram(dev_priv, bo, true); } - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; } @@ -442,7 +445,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * resume the overlays, this is preferred to failing to alloc. */ vmw_overlay_pause_all(dev_priv); - ret = vmw_bo_create(dev_priv, &bo_params, &vps->bo); + ret = vmw_gem_object_create(dev_priv, &bo_params, &vps->uo.buffer); vmw_overlay_resume_all(dev_priv); if (ret) return ret; @@ -453,7 +456,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, true); + return vmw_bo_pin_in_vram(dev_priv, vps->uo.buffer, true); } static uint32_t vmw_sou_bo_fifo_size(struct vmw_du_update_plane *update, @@ -580,6 +583,7 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, { struct vmw_kms_sou_dirty_cmd *blit = cmd; struct vmw_framebuffer_surface *vfbs; + struct vmw_surface *surf = NULL; vfbs = container_of(update->vfb, typeof(*vfbs), base); @@ -587,7 +591,8 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, blit->header.size = sizeof(blit->body) + sizeof(SVGASignedRect) * num_hits; - blit->body.srcImage.sid = vfbs->surface->res.id; + surf = vmw_user_object_surface(&vfbs->uo); + blit->body.srcImage.sid = surf->res.id; blit->body.destScreenId = update->du->unit; /* Update the source and destination bounding box later in post_clip */ @@ -1104,7 +1109,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index a04e0736318d..5453f7cf0e2d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /****************************************************************************** * - * COPYRIGHT (C) 2014-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2014-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -29,6 +30,7 @@ #include "vmwgfx_kms.h" #include "vmwgfx_vkms.h" #include "vmw_surface_cache.h" +#include <linux/fsnotify.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -735,7 +737,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); @@ -746,12 +748,6 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, if (ret) goto out_unref; - if (vfbs->is_bo_proxy) { - ret = vmw_kms_update_proxy(srf, clips, num_clips, inc); - if (ret) - goto out_finish; - } - sdirty.base.fifo_commit = vmw_kms_stdu_surface_fifo_commit; sdirty.base.clip = vmw_kms_stdu_surface_clip; sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) + @@ -765,7 +761,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); -out_finish: + vmw_kms_helper_validation_finish(dev_priv, NULL, &val_ctx, out_fence, NULL); @@ -877,6 +873,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +/* + * Trigger a modeset if the X,Y position of the Screen Target changes. + * This is needed when multi-mon is cycled. The original Screen Target will have + * the same mode but its relative X,Y position in the topology will change. + */ +static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, + struct drm_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct vmw_screen_target_display_unit *du; + struct drm_crtc_state *new_crtc_state; + + conn_state = drm_atomic_get_connector_state(state, conn); + du = vmw_connector_to_stdu(conn); + + if (!conn_state->crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (du->base.gui_x != du->base.set_gui_x || + du->base.gui_y != du->base.set_gui_y) + new_crtc_state->mode_changed = true; + + return 0; +} + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, @@ -891,7 +913,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { .get_modes = vmw_connector_get_modes, - .mode_valid = vmw_stdu_connector_mode_valid + .mode_valid = vmw_stdu_connector_mode_valid, + .atomic_check = vmw_stdu_connector_atomic_check, }; @@ -918,9 +941,8 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf) + if (vmw_user_object_surface(&vps->uo)) WARN_ON(!vps->pinned); - vmw_du_plane_cleanup_fb(plane, old_state); vps->content_fb_type = SAME_AS_DISPLAY; @@ -928,7 +950,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, } - /** * vmw_stdu_primary_plane_prepare_fb - Readies the display surface * @@ -952,13 +973,15 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, enum stdu_content_type new_content_type; struct vmw_framebuffer_surface *new_vfbs; uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h; + struct drm_plane_state *old_state = plane->state; + struct drm_rect rect; int ret; /* No FB to prepare */ if (!new_fb) { - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } return 0; @@ -968,8 +991,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb); if (new_vfbs && - new_vfbs->surface->metadata.base_size.width == hdisplay && - new_vfbs->surface->metadata.base_size.height == vdisplay) + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.width == hdisplay && + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.height == vdisplay) new_content_type = SAME_AS_DISPLAY; else if (vfb->bo) new_content_type = SEPARATE_BO; @@ -1007,29 +1030,29 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, metadata.num_sizes = 1; metadata.scanout = true; } else { - metadata = new_vfbs->surface->metadata; + metadata = vmw_user_object_surface(&new_vfbs->uo)->metadata; } metadata.base_size.width = hdisplay; metadata.base_size.height = vdisplay; metadata.base_size.depth = 1; - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { struct drm_vmw_size cur_base_size = - vps->surf->metadata.base_size; + vmw_user_object_surface(&vps->uo)->metadata.base_size; if (cur_base_size.width != metadata.base_size.width || cur_base_size.height != metadata.base_size.height || - vps->surf->metadata.format != metadata.format) { + vmw_user_object_surface(&vps->uo)->metadata.format != metadata.format) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } } - if (!vps->surf) { + if (!vmw_user_object_surface(&vps->uo)) { ret = vmw_gb_surface_define(dev_priv, &metadata, - &vps->surf); + &vps->uo.surface); if (ret != 0) { DRM_ERROR("Couldn't allocate STDU surface.\n"); return ret; @@ -1042,18 +1065,19 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * The only time we add a reference in prepare_fb is if the * state object doesn't have a reference to begin with */ - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } - vps->surf = vmw_surface_reference(new_vfbs->surface); + memcpy(&vps->uo, &new_vfbs->uo, sizeof(vps->uo)); + vmw_user_object_ref(&vps->uo); } - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { /* Pin new surface before flipping */ - ret = vmw_resource_pin(&vps->surf->res, false); + ret = vmw_resource_pin(&vmw_user_object_surface(&vps->uo)->res, false); if (ret) goto out_srf_unref; @@ -1063,6 +1087,34 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, vps->content_fb_type = new_content_type; /* + * The drm fb code will do blit's via the vmap interface, which doesn't + * trigger vmw_bo page dirty tracking due to being kernel side (and thus + * doesn't require mmap'ing) so we have to update the surface's dirty + * regions by hand but we want to be careful to not overwrite the + * resource if it has been written to by the gpu (res_dirty). + */ + if (vps->uo.buffer && vps->uo.buffer->is_dumb) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + struct vmw_resource *res = &surf->res; + + if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state, + new_state, + &rect)) { + /* + * At some point it might be useful to actually translate + * (rect.x1, rect.y1) => start, and (rect.x2, rect.y2) => end, + * but currently the fb code will just report the entire fb + * dirty so in practice it doesn't matter. + */ + pgoff_t start = res->guest_memory_offset >> PAGE_SHIFT; + pgoff_t end = __KERNEL_DIV_ROUND_UP(res->guest_memory_offset + + res->guest_memory_size, + PAGE_SIZE); + vmw_resource_dirty_update(res, start, end); + } + } + + /* * This should only happen if the buffer object is too large to create a * proxy surface for. */ @@ -1072,7 +1124,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, return 0; out_srf_unref: - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); return ret; } @@ -1214,14 +1266,8 @@ static uint32_t vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_update); return size; @@ -1230,14 +1276,8 @@ vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) * num_hits + sizeof(struct vmw_stdu_update); @@ -1245,47 +1285,6 @@ static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, } static uint32_t -vmw_stdu_surface_update_proxy(struct vmw_du_update_plane *update, void *cmd) -{ - struct vmw_framebuffer_surface *vfbs; - struct drm_plane_state *state = update->plane->state; - struct drm_plane_state *old_state = update->old_state; - struct vmw_stdu_update_gb_image *cmd_update = cmd; - struct drm_atomic_helper_damage_iter iter; - struct drm_rect clip; - uint32_t copy_size = 0; - - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - /* - * proxy surface is special where a buffer object type fb is wrapped - * in a surface and need an update gb image command to sync with device. - */ - drm_atomic_helper_damage_iter_init(&iter, old_state, state); - drm_atomic_for_each_plane_damage(&iter, &clip) { - SVGA3dBox *box = &cmd_update->body.box; - - cmd_update->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd_update->header.size = sizeof(cmd_update->body); - cmd_update->body.image.sid = vfbs->surface->res.id; - cmd_update->body.image.face = 0; - cmd_update->body.image.mipmap = 0; - - box->x = clip.x1; - box->y = clip.y1; - box->z = 0; - box->w = drm_rect_width(&clip); - box->h = drm_rect_height(&clip); - box->d = 1; - - copy_size += sizeof(*cmd_update); - cmd_update++; - } - - return copy_size; -} - -static uint32_t vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, uint32_t num_hits) { @@ -1299,7 +1298,7 @@ vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, cmd_copy->header.id = SVGA_3D_CMD_SURFACE_COPY; cmd_copy->header.size = sizeof(cmd_copy->body) + sizeof(SVGA3dCopyBox) * num_hits; - cmd_copy->body.src.sid = vfbs->surface->res.id; + cmd_copy->body.src.sid = vmw_user_object_surface(&vfbs->uo)->res.id; cmd_copy->body.dest.sid = stdu->display_srf->res.id; return sizeof(*cmd_copy); @@ -1370,10 +1369,7 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv, srf_update.mutex = &dev_priv->cmdbuf_mutex; srf_update.intr = true; - if (vfbs->is_bo_proxy) - srf_update.post_prepare = vmw_stdu_surface_update_proxy; - - if (vfbs->surface->res.id != stdu->display_srf->res.id) { + if (vmw_user_object_surface(&vfbs->uo)->res.id != stdu->display_srf->res.id) { srf_update.calc_fifo_size = vmw_stdu_surface_fifo_size; srf_update.pre_clip = vmw_stdu_surface_populate_copy; srf_update.clip = vmw_stdu_surface_populate_clip; @@ -1417,7 +1413,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu = vmw_crtc_to_stdu(crtc); dev_priv = vmw_priv(crtc->dev); - stdu->display_srf = vps->surf; + stdu->display_srf = vmw_user_object_surface(&vps->uo); stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index e7a744dfcecf..8ae6a761c900 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -36,9 +37,6 @@ #include <drm/ttm/ttm_placement.h> #define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32) -#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32) -#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ - (svga3d_flags & ((uint64_t)U32_MAX)) /** * struct vmw_user_surface - User-space visible surface resource @@ -686,6 +684,14 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + + /* + * Dumb buffers own the resource and they'll unref the + * resource themselves + */ + if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) + return; + vmw_resource_unreference(&res); } @@ -812,7 +818,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } } res->guest_memory_size = cur_bo_offset; - if (metadata->scanout && + if (!file_priv->atomic && + metadata->scanout && metadata->num_sizes == 1 && metadata->sizes[0].width == VMW_CURSOR_SNOOP_WIDTH && metadata->sizes[0].height == VMW_CURSOR_SNOOP_HEIGHT && @@ -864,6 +871,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } + vmw_bo_add_detached_resource(res->guest_memory_bo, res); } tmp = vmw_resource_reference(&srf->res); @@ -892,6 +900,113 @@ out_unlock: return ret; } +static struct vmw_user_surface * +vmw_lookup_user_surface_for_buffer(struct vmw_private *vmw, struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = NULL; + struct vmw_surface *surf; + struct ttm_base_object *base; + + surf = vmw_bo_surface(bo); + if (surf) { + rcu_read_lock(); + user_srf = container_of(surf, struct vmw_user_surface, srf); + base = &user_srf->prime.base; + if (base && !kref_get_unless_zero(&base->refcount)) { + drm_dbg_driver(&vmw->drm, + "%s: referencing a stale surface handle %d\n", + __func__, handle); + base = NULL; + user_srf = NULL; + } + rcu_read_unlock(); + } + + return user_srf; +} + +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + struct vmw_surface *surf = NULL; + struct ttm_base_object *base; + + if (user_srf) { + surf = vmw_surface_reference(&user_srf->srf); + base = &user_srf->prime.base; + ttm_base_object_unref(&base); + } + return surf; +} + +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + int surf_handle = 0; + struct ttm_base_object *base; + + if (user_srf) { + base = &user_srf->prime.base; + surf_handle = (u32)base->handle; + ttm_base_object_unref(&base); + } + return surf_handle; +} + +static int vmw_buffer_prime_to_surface_base(struct vmw_private *dev_priv, + struct drm_file *file_priv, + u32 fd, u32 *handle, + struct ttm_base_object **base_p) +{ + struct ttm_base_object *base; + struct vmw_bo *bo; + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_user_surface *user_srf; + int ret; + + ret = drm_gem_prime_fd_to_handle(&dev_priv->drm, file_priv, fd, handle); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to find user buffer for fd = %u.\n", fd); + return ret; + } + + ret = vmw_user_bo_lookup(file_priv, *handle, &bo); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to lookup user buffer for handle = %u.\n", *handle); + return ret; + } + + user_srf = vmw_lookup_user_surface_for_buffer(dev_priv, bo, *handle); + if (WARN_ON(!user_srf)) { + drm_warn(&dev_priv->drm, + "User surface fd %d (handle %d) is null.\n", fd, *handle); + ret = -EINVAL; + goto out; + } + + base = &user_srf->prime.base; + ret = ttm_ref_object_add(tfile, base, NULL, false); + if (ret) { + drm_warn(&dev_priv->drm, + "Couldn't add an object ref for the buffer (%d).\n", *handle); + goto out; + } + + *base_p = base; +out: + vmw_user_bo_unref(&bo); + + return ret; +} static int vmw_surface_handle_reference(struct vmw_private *dev_priv, @@ -901,15 +1016,19 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, struct ttm_base_object **base_p) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct vmw_user_surface *user_srf; + struct vmw_user_surface *user_srf = NULL; uint32_t handle; struct ttm_base_object *base; int ret; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); - if (unlikely(ret != 0)) - return ret; + if (ret) + return vmw_buffer_prime_to_surface_base(dev_priv, + file_priv, + u_handle, + &handle, + base_p); } else { handle = u_handle; } @@ -1503,7 +1622,12 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle, &res->guest_memory_bo); if (ret == 0) { - if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { + if (res->guest_memory_bo->is_dumb) { + VMW_DEBUG_USER("Can't backup surface with a dumb buffer.\n"); + vmw_user_bo_unref(&res->guest_memory_bo); + ret = -EINVAL; + goto out_unlock; + } else if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; @@ -1560,6 +1684,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { + vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2100,3 +2225,140 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, out_unlock: return ret; } + +static SVGA3dSurfaceFormat vmw_format_bpp_to_svga(struct vmw_private *vmw, + int bpp) +{ + switch (bpp) { + case 8: /* DRM_FORMAT_C8 */ + return SVGA3D_P8; + case 16: /* DRM_FORMAT_RGB565 */ + return SVGA3D_R5G6B5; + case 32: /* DRM_FORMAT_XRGB8888 */ + if (has_sm4_context(vmw)) + return SVGA3D_B8G8R8X8_UNORM; + return SVGA3D_X8R8G8B8; + default: + drm_warn(&vmw->drm, "Unsupported format bpp: %d\n", bpp); + return SVGA3D_X8R8G8B8; + } +} + +/** + * vmw_dumb_create - Create a dumb kms buffer + * + * @file_priv: Pointer to a struct drm_file identifying the caller. + * @dev: Pointer to the drm device. + * @args: Pointer to a struct drm_mode_create_dumb structure + * Return: Zero on success, negative error code on failure. + * + * This is a driver callback for the core drm create_dumb functionality. + * Note that this is very similar to the vmw_bo_alloc ioctl, except + * that the arguments have a different format. + */ +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo = NULL; + struct vmw_resource *res = NULL; + union drm_vmw_gb_surface_create_ext_arg arg = { 0 }; + struct drm_vmw_gb_surface_create_ext_req *req = &arg.req; + int ret; + struct drm_vmw_size drm_size = { + .width = args->width, + .height = args->height, + .depth = 1, + }; + SVGA3dSurfaceFormat format = vmw_format_bpp_to_svga(dev_priv, args->bpp); + const struct SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + SVGA3dSurfaceAllFlags flags = SVGA3D_SURFACE_HINT_TEXTURE | + SVGA3D_SURFACE_HINT_RENDERTARGET | + SVGA3D_SURFACE_SCREENTARGET | + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | + SVGA3D_SURFACE_BIND_RENDER_TARGET; + + /* + * Without mob support we're just going to use raw memory buffer + * because we wouldn't be able to support full surface coherency + * without mobs + */ + if (!dev_priv->has_mob) { + int cpp = DIV_ROUND_UP(args->bpp, 8); + + switch (cpp) { + case 1: /* DRM_FORMAT_C8 */ + case 2: /* DRM_FORMAT_RGB565 */ + case 4: /* DRM_FORMAT_XRGB8888 */ + break; + default: + /* + * Dumb buffers don't allow anything else. + * This is tested via IGT's dumb_buffers + */ + return -EINVAL; + } + + args->pitch = args->width * cpp; + args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); + + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + args->size, &args->handle, + &vbo); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->tbo.base); + return ret; + } + + req->version = drm_vmw_gb_surface_v1; + req->multisample_pattern = SVGA3D_MS_PATTERN_NONE; + req->quality_level = SVGA3D_MS_QUALITY_NONE; + req->buffer_byte_stride = 0; + req->must_be_zero = 0; + req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags); + req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags); + req->base.format = (uint32_t)format; + req->base.drm_surface_flags = drm_vmw_surface_flag_scanout; + req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable; + req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer; + req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent; + req->base.base_size.width = args->width; + req->base.base_size.height = args->height; + req->base.base_size.depth = 1; + req->base.array_size = 0; + req->base.mip_levels = 1; + req->base.multisample_count = 0; + req->base.buffer_handle = SVGA3D_INVALID_ID; + req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE; + ret = vmw_gb_surface_define_ext_ioctl(dev, &arg, file_priv); + if (ret) { + drm_warn(dev, "Unable to create a dumb buffer\n"); + return ret; + } + + args->handle = arg.rep.buffer_handle; + args->size = arg.rep.buffer_size; + args->pitch = vmw_surface_calculate_pitch(desc, &drm_size); + + ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg.rep.handle, + user_surface_converter, + &res); + if (ret) { + drm_err(dev, "Created resource handle doesn't exist!\n"); + goto err; + } + + vbo = res->guest_memory_bo; + vbo->is_dumb = true; + vbo->dumb_surface = vmw_res_to_srf(res); + +err: + if (res) + vmw_resource_unreference(&res); + if (ret) + ttm_ref_object_base_unref(tfile, arg.rep.handle); + + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c index 3bfcf671fcd5..8651b788e98b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c @@ -75,7 +75,7 @@ done: return ret; } -static int +static void compute_crc(struct drm_crtc *crtc, struct vmw_surface *surf, u32 *crc) @@ -101,8 +101,6 @@ compute_crc(struct drm_crtc *crtc, } vmw_bo_unmap(bo); - - return 0; } static void @@ -116,7 +114,6 @@ crc_generate_worker(struct work_struct *work) u64 frame_start, frame_end; u32 crc32 = 0; struct vmw_surface *surf = 0; - int ret; spin_lock_irq(&du->vkms.crc_state_lock); crc_pending = du->vkms.crc_pending; @@ -130,22 +127,24 @@ crc_generate_worker(struct work_struct *work) return; spin_lock_irq(&du->vkms.crc_state_lock); - surf = du->vkms.surface; + surf = vmw_surface_reference(du->vkms.surface); spin_unlock_irq(&du->vkms.crc_state_lock); - if (vmw_surface_sync(vmw, surf)) { - drm_warn(crtc->dev, "CRC worker wasn't able to sync the crc surface!\n"); - return; - } + if (surf) { + if (vmw_surface_sync(vmw, surf)) { + drm_warn( + crtc->dev, + "CRC worker wasn't able to sync the crc surface!\n"); + return; + } - ret = compute_crc(crtc, surf, &crc32); - if (ret) - return; + compute_crc(crtc, surf, &crc32); + vmw_surface_unreference(&surf); + } spin_lock_irq(&du->vkms.crc_state_lock); frame_start = du->vkms.frame_start; frame_end = du->vkms.frame_end; - crc_pending = du->vkms.crc_pending; du->vkms.frame_start = 0; du->vkms.frame_end = 0; du->vkms.crc_pending = false; @@ -164,7 +163,7 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) struct vmw_display_unit *du = container_of(timer, struct vmw_display_unit, vkms.timer); struct drm_crtc *crtc = &du->crtc; struct vmw_private *vmw = vmw_priv(crtc->dev); - struct vmw_surface *surf = NULL; + bool has_surface = false; u64 ret_overrun; bool locked, ret; @@ -179,10 +178,10 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) WARN_ON(!ret); if (!locked) return HRTIMER_RESTART; - surf = du->vkms.surface; + has_surface = du->vkms.surface != NULL; vmw_vkms_unlock(crtc); - if (du->vkms.crc_enabled && surf) { + if (du->vkms.crc_enabled && has_surface) { u64 frame = drm_crtc_accurate_vblank_count(crtc); spin_lock(&du->vkms.crc_state_lock); @@ -336,6 +335,8 @@ vmw_vkms_crtc_cleanup(struct drm_crtc *crtc) { struct vmw_display_unit *du = vmw_crtc_to_du(crtc); + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); WARN_ON(work_pending(&du->vkms.crc_generator_work)); hrtimer_cancel(&du->vkms.timer); } @@ -497,9 +498,12 @@ vmw_vkms_set_crc_surface(struct drm_crtc *crtc, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_private *vmw = vmw_priv(crtc->dev); - if (vmw->vkms_enabled) { + if (vmw->vkms_enabled && du->vkms.surface != surf) { WARN_ON(atomic_read(&du->vkms.atomic_lock) != VMW_VKMS_LOCK_MODESET); - du->vkms.surface = surf; + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); + if (surf) + du->vkms.surface = vmw_surface_reference(surf); } } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 03492fbcb8fb..f2f1d8ddb221 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -87,9 +87,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } +static void xe_file_destroy(struct kref *ref) +{ + struct xe_file *xef = container_of(ref, struct xe_file, refcount); + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -98,6 +144,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -108,21 +156,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); + xe_file_put(xef); - xe_drm_client_put(xef->client); - kfree(xef); + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { @@ -854,6 +895,13 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } +static void xe_device_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + xe_pm_runtime_put(xe); +} + /** * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance @@ -870,11 +918,21 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) */ void xe_device_declare_wedged(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + if (xe->wedged.mode == 0) { drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); return; } + if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { + drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); + return; + } + + xe_pm_runtime_get_noresume(xe); + if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; drm_err(&xe->drm, @@ -883,4 +941,7 @@ void xe_device_declare_wedged(struct xe_device *xe) "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); } + + for_each_gt(gt, xe, id) + xe_gt_declare_wedged(gt); } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb..b3952718b3c1 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744..cbc582bcc90a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -566,6 +566,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e..7ddd59908334 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 2d72cdec3a0b..f36980aa26e6 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -118,7 +118,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0, num_ufence = 0; + u32 i, num_syncs, num_ufence = 0; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; @@ -156,15 +156,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) vm = q->vm; - for (i = 0; i < args->num_syncs; i++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], - &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; - if (xe_sync_is_ufence(&syncs[i])) + if (xe_sync_is_ufence(&syncs[num_syncs])) num_ufence++; } @@ -325,8 +325,8 @@ err_unlock_list: if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: - for (i = 0; i < num_syncs; i++) - xe_sync_entry_cleanup(&syncs[i]); + while (num_syncs--) + xe_sync_entry_cleanup(&syncs[num_syncs]); kfree(syncs); err_exec_queue: xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b..a39384bb9553 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c3..a35ce24c9798 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -139,10 +142,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0ba2e2d0289b..31b2e64c70c6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -904,3 +904,18 @@ struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) return NULL; } + +/** + * xe_gt_declare_wedged() - Declare GT wedged + * @gt: the GT object + * + * Wedge the GT which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_gt_declare_wedged(struct xe_gt *gt) +{ + xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); + + xe_uc_declare_wedged(>->uc); + xe_gt_tlb_invalidation_reset(gt); +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 1123fdfc4ebc..8b1a5027dcf2 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -37,6 +37,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); +void xe_gt_declare_wedged(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index db6c213da847..b6f0a7299c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1543,6 +1543,7 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); + fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); @@ -1926,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1934,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e..8892d6c2291e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8b..481d83d07367 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,10 +13,13 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); @@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); + return ret; + } - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -401,43 +404,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, } /** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - -/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done @@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985..a84065fa324c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31..de6e825e0851 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include <linux/dma-fence.h> +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index eb655cee19f7..de0fe9e65746 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1178,3 +1178,19 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_guc_ct_print(&guc->ct, p, false); xe_guc_submit_print(guc, p); } + +/** + * xe_guc_declare_wedged() - Declare GuC wedged + * @guc: the GuC object + * + * Wedge the GuC which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_guc_declare_wedged(struct xe_guc *guc) +{ + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + + xe_guc_reset_prepare(guc); + xe_guc_ct_stop(&guc->ct); + xe_guc_submit_wedge(guc); +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index af59c9545753..e0bbf98f849d 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -37,6 +37,7 @@ void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); +void xe_guc_declare_wedged(struct xe_guc *guc); static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d8..64afc90ad2c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 373447758a60..6398629e6b4e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -861,29 +861,27 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } -static bool guc_submit_hint_wedged(struct xe_guc *guc) +/** + * xe_guc_submit_wedge() - Wedge GuC submission + * @guc: the GuC object + * + * Save exec queue's registered with GuC state by taking a ref to each queue. + * Register a DRMM handler to drop refs upon driver unload. + */ +void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; unsigned long index; int err; - if (xe->wedged.mode != 2) - return false; - - if (xe_device_wedged(xe)) - return true; - - xe_device_declare_wedged(xe); - - xe_guc_submit_reset_prepare(guc); - xe_guc_ct_stop(&guc->ct); + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); - return true; /* Device is wedged anyway */ + return; } mutex_lock(&guc->submission_state.lock); @@ -891,6 +889,19 @@ static bool guc_submit_hint_wedged(struct xe_guc *guc) if (xe_exec_queue_get_unless_zero(q)) set_exec_queue_wedged(q); mutex_unlock(&guc->submission_state.lock); +} + +static bool guc_submit_hint_wedged(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + if (xe->wedged.mode != 2) + return false; + + if (xe_device_wedged(xe)) + return true; + + xe_device_declare_wedged(xe); return true; } @@ -1382,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1471,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; @@ -1677,7 +1692,8 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) void xe_guc_submit_reset_wait(struct xe_guc *guc) { - wait_event(guc->ct.wq, !guc_read_stopped(guc)); + wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || + !guc_read_stopped(guc)); } void xe_guc_submit_stop(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 4ad5f4c1b084..bdf8c9f3d24a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -18,6 +18,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..832ea81faeee 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..58121821f081 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0ad..31a751a5de3f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 02e28274282f..5efe83cc82ab 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe, if (first == last) bitmap_set(ctx->active_entries, first, 1); else - bitmap_set(ctx->active_entries, first, last - first + 2); + bitmap_set(ctx->active_entries, first, last - first + 1); } /** diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2883d9aca404..e8d31e010860 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; @@ -263,7 +267,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0f240534fb72..0d073a9987c2 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -300,3 +300,17 @@ void xe_uc_remove(struct xe_uc *uc) { xe_gsc_remove(&uc->gsc); } + +/** + * xe_uc_declare_wedged() - Declare UC wedged + * @uc: the UC object + * + * Wedge the UC which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_uc_declare_wedged(struct xe_uc *uc) +{ + xe_gt_assert(uc_to_gt(uc), uc_to_xe(uc)->wedged.mode); + + xe_guc_declare_wedged(&uc->guc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 11856f24e6f9..506517c11333 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -21,5 +21,6 @@ int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_remove(struct xe_uc *uc); +void xe_uc_declare_wedged(struct xe_uc *uc); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684..c7561a56abaf 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) @@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; - int ret; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[id], true); + /* * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; - } - } + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[id]); + goto wait; + } - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; + tile_needs_invalidate |= BIT(id); } } +wait: + for_each_tile(tile, xe, id) + if (tile_needs_invalidate & BIT(id)) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index bdb578e0899f..4b59687ff5d8 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -288,12 +288,22 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) mp2_ops->start(privdata, info); cl_data->sensor_sts[i] = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + cl_data->is_any_sensor_enabled = true; + } + + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", + cl_data->is_any_sensor_enabled); + rc = -EOPNOTSUPP; + goto cleanup; } for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->cur_hid_dev = i; if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { - cl_data->is_any_sensor_enabled = true; rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; @@ -305,12 +315,6 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); } - if (!cl_data->is_any_sensor_enabled || - (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - rc = -EOPNOTSUPP; - goto cleanup; - } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b52337068..81f3024b7b1b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } diff --git a/drivers/hid/bpf/Kconfig b/drivers/hid/bpf/Kconfig index 83214bae6768..d65482e02a6c 100644 --- a/drivers/hid/bpf/Kconfig +++ b/drivers/hid/bpf/Kconfig @@ -3,7 +3,7 @@ menu "HID-BPF support" config HID_BPF bool "HID-BPF support" - depends on BPF + depends on BPF_JIT depends on BPF_SYSCALL depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS help diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index f59cce6e437f..cd696c59ba0f 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -183,6 +183,10 @@ static int hid_bpf_reg(void *kdata, struct bpf_link *link) struct hid_device *hdev; int count, err = 0; + /* prevent multiple attach of the same struct_ops */ + if (ops->hdev) + return -EINVAL; + hdev = hid_get_device(ops->hid_id); if (IS_ERR(hdev)) return PTR_ERR(hdev); @@ -248,6 +252,7 @@ static void hid_bpf_unreg(void *kdata, struct bpf_link *link) list_del_rcu(&ops->list); synchronize_srcu(&hdev->bpf.srcu); + ops->hdev = NULL; reconnect = hdev->bpf.rdesc_ops == ops; if (reconnect) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c2..a282388b7aa5 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1249,6 +1249,9 @@ static const struct hid_device_id asus_devices[] = { USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b..0fa785f52707 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b..781c5aa29859 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 @@ -520,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f24..99812c0f830b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index a44367aef621..2541fa2e0fa3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -692,78 +692,28 @@ static bool wacom_is_art_pen(int tool_id) static int wacom_intuos_get_tool_type(int tool_id) { - int tool_type = BTN_TOOL_PEN; - - if (wacom_is_art_pen(tool_id)) - return tool_type; - switch (tool_id) { case 0x812: /* Inking pen */ case 0x801: /* Intuos3 Inking pen */ case 0x12802: /* Intuos4/5 Inking Pen */ case 0x012: - tool_type = BTN_TOOL_PENCIL; - break; - - case 0x822: /* Pen */ - case 0x842: - case 0x852: - case 0x823: /* Intuos3 Grip Pen */ - case 0x813: /* Intuos3 Classic Pen */ - case 0x802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x8e2: /* IntuosHT2 pen */ - case 0x022: - case 0x200: /* Pro Pen 3 */ - case 0x04200: /* Pro Pen 3 */ - case 0x10842: /* MobileStudio Pro Pro Pen slim */ - case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ - case 0x16802: /* Cintiq 13HD Pro Pen */ - case 0x18802: /* DTH2242 Pen */ - case 0x10802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */ - tool_type = BTN_TOOL_PEN; - break; + return BTN_TOOL_PENCIL; case 0x832: /* Stroke pen */ case 0x032: - tool_type = BTN_TOOL_BRUSH; - break; + return BTN_TOOL_BRUSH; case 0x007: /* Mouse 4D and 2D */ case 0x09c: case 0x094: case 0x017: /* Intuos3 2D Mouse */ case 0x806: /* Intuos4 Mouse */ - tool_type = BTN_TOOL_MOUSE; - break; + return BTN_TOOL_MOUSE; case 0x096: /* Lens cursor */ case 0x097: /* Intuos3 Lens cursor */ case 0x006: /* Intuos4 Lens cursor */ - tool_type = BTN_TOOL_LENS; - break; - - case 0x82a: /* Eraser */ - case 0x84a: - case 0x85a: - case 0x91a: - case 0xd1a: - case 0x0fa: - case 0x82b: /* Intuos3 Grip Pen Eraser */ - case 0x81b: /* Intuos3 Classic Pen Eraser */ - case 0x91b: /* Intuos3 Airbrush Eraser */ - case 0x80c: /* Intuos4/5 13HD/24HD Marker Pen Eraser */ - case 0x80a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - case 0x90a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1480a: /* Intuos4/5 13HD/24HD Classic Pen Eraser */ - case 0x1090a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1080c: /* Intuos4/5 13HD/24HD Art Pen Eraser */ - case 0x1084a: /* MobileStudio Pro Pro Pen slim Eraser */ - case 0x1680a: /* Cintiq 13HD Pro Pen Eraser */ - case 0x1880a: /* DTH2242 Eraser */ - case 0x1080a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - tool_type = BTN_TOOL_RUBBER; - break; + return BTN_TOOL_LENS; case 0xd12: case 0x912: @@ -771,10 +721,13 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x913: /* Intuos3 Airbrush */ case 0x902: /* Intuos4/5 13HD/24HD Airbrush */ case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */ - tool_type = BTN_TOOL_AIRBRUSH; - break; + return BTN_TOOL_AIRBRUSH; + + default: + if (tool_id & 0x0008) + return BTN_TOOL_RUBBER; + return BTN_TOOL_PEN; } - return tool_type; } static void wacom_exit_report(struct wacom_wac *wacom) @@ -1925,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index bc186c61a2c0..382a2bb9168a 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -22,23 +22,23 @@ #include <linux/util_macros.h> /* Indexes for the sysfs hooks */ - -#define INPUT 0 -#define MIN 1 -#define MAX 2 -#define CONTROL 3 -#define OFFSET 3 -#define AUTOMIN 4 -#define THERM 5 -#define HYSTERSIS 6 - +enum adt_sysfs_id { + INPUT = 0, + MIN = 1, + MAX = 2, + CONTROL = 3, + OFFSET = 3, // Dup + AUTOMIN = 4, + THERM = 5, + HYSTERSIS = 6, /* * These are unique identifiers for the sysfs functions - unlike the * numbers above, these are not also indexes into an array */ + ALARM = 9, + FAULT = 10, +}; -#define ALARM 9 -#define FAULT 10 /* 7475 Common Registers */ diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0a8b95ce35f7..06e836e3e877 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,12 +986,17 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); - if (ret) + if (ret) { + clk_disable_unprepare(gi2c->core_clk); + geni_icc_disable(&gi2c->se); return ret; + } enable_irq(gi2c->irq); gi2c->suspended = 0; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558d..1df5b4204142 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 4e03b75f9ad7..4c550306f3ec 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -18,7 +18,7 @@ enum testunit_cmds { TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ - TU_CMD_HOST_NOTIFY, + TU_CMD_SMBUS_HOST_NOTIFY, TU_CMD_SMBUS_BLOCK_PROC_CALL, TU_NUM_CMDS }; @@ -60,7 +60,7 @@ static void i2c_slave_testunit_work(struct work_struct *work) msg.len = tu->regs[TU_REG_DATAH]; break; - case TU_CMD_HOST_NOTIFY: + case TU_CMD_SMBUS_HOST_NOTIFY: msg.addr = 0x08; msg.flags = 0; msg.len = 3; diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 7e4203df83ed..8256f7aed0cf 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -34,6 +34,7 @@ static int smbus_do_alert(struct device *dev, void *addrp) struct i2c_client *client = i2c_verify_client(dev); struct alert_data *data = addrp; struct i2c_driver *driver; + int ret; if (!client || client->addr != data->addr) return 0; @@ -47,16 +48,47 @@ static int smbus_do_alert(struct device *dev, void *addrp) device_lock(dev); if (client->dev.driver) { driver = to_i2c_driver(client->dev.driver); - if (driver->alert) + if (driver->alert) { + /* Stop iterating after we find the device */ driver->alert(client, data->type, data->data); - else + ret = -EBUSY; + } else { dev_warn(&client->dev, "no driver alert()!\n"); - } else + ret = -EOPNOTSUPP; + } + } else { dev_dbg(&client->dev, "alert with no driver\n"); + ret = -ENODEV; + } + device_unlock(dev); + + return ret; +} + +/* Same as above, but call back all drivers with alert handler */ + +static int smbus_do_alert_force(struct device *dev, void *addrp) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct alert_data *data = addrp; + struct i2c_driver *driver; + + if (!client || (client->flags & I2C_CLIENT_TEN)) + return 0; + + /* + * Drivers should either disable alerts, or provide at least + * a minimal handler. Lock so the driver won't change. + */ + device_lock(dev); + if (client->dev.driver) { + driver = to_i2c_driver(client->dev.driver); + if (driver->alert) + driver->alert(client, data->type, data->data); + } device_unlock(dev); - /* Stop iterating after we find the device */ - return -EBUSY; + return 0; } /* @@ -67,6 +99,7 @@ static irqreturn_t smbus_alert(int irq, void *d) { struct i2c_smbus_alert *alert = d; struct i2c_client *ara; + unsigned short prev_addr = I2C_CLIENT_END; /* Not a valid address */ ara = alert->ara; @@ -94,8 +127,25 @@ static irqreturn_t smbus_alert(int irq, void *d) data.addr, data.data); /* Notify driver for the device which issued the alert */ - device_for_each_child(&ara->adapter->dev, &data, - smbus_do_alert); + status = device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert); + /* + * If we read the same address more than once, and the alert + * was not handled by a driver, it won't do any good to repeat + * the loop because it will never terminate. Try again, this + * time calling the alert handlers of all devices connected to + * the bus, and abort the loop afterwards. If this helps, we + * are all set. If it doesn't, there is nothing else we can do, + * so we might as well abort the loop. + * Note: This assumes that a driver with alert handler handles + * the alert properly and clears it if necessary. + */ + if (data.addr == prev_addr && status != -EBUSY) { + device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert_force); + break; + } + prev_addr = data.addr; } return IRQ_HANDLED; diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 4d99a3524171..433f6088b7ce 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -10,8 +10,6 @@ #include <linux/i3c/master.h> -extern const struct bus_type i3c_bus_type; - void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 00a3e9d01547..7028f03c2c42 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -342,6 +342,7 @@ const struct bus_type i3c_bus_type = { .probe = i3c_device_probe, .remove = i3c_device_remove, }; +EXPORT_SYMBOL_GPL(i3c_bus_type); static enum i3c_addr_slot_status i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr) diff --git a/drivers/i3c/master/ast2600-i3c-master.c b/drivers/i3c/master/ast2600-i3c-master.c index 01a47d3dd499..84942dbb6f80 100644 --- a/drivers/i3c/master/ast2600-i3c-master.c +++ b/drivers/i3c/master/ast2600-i3c-master.c @@ -156,7 +156,6 @@ static int ast2600_i3c_probe(struct platform_device *pdev) i3c->sda_pullup); i3c->dw.platform_ops = &ast2600_i3c_ops; - i3c->dw.ibi_capable = true; return dw_i3c_common_probe(&i3c->dw, pdev); } diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 0ec00e644bd4..8d694672c110 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -17,7 +17,9 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/reset.h> #include <linux/slab.h> @@ -217,7 +219,7 @@ #define I3C_BUS_THIGH_MAX_NS 41 #define XFER_TIMEOUT (msecs_to_jiffies(1000)) - +#define RPM_AUTOSUSPEND_TIMEOUT 1000 /* ms */ struct dw_i3c_cmd { u32 cmd_lo; u32 cmd_hi; @@ -300,7 +302,14 @@ static void dw_i3c_master_disable(struct dw_i3c_master *master) static void dw_i3c_master_enable(struct dw_i3c_master *master) { - writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_ENABLE, + u32 dev_ctrl; + + dev_ctrl = readl(master->regs + DEVICE_CTRL); + /* For now don't support Hot-Join */ + dev_ctrl |= DEV_CTRL_HOT_JOIN_NACK; + if (master->i2c_slv_prsnt) + dev_ctrl |= DEV_CTRL_I2C_SLAVE_PRESENT; + writel(dev_ctrl | DEV_CTRL_ENABLE, master->regs + DEVICE_CTRL); } @@ -521,6 +530,32 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr) dw_i3c_master_start_xfer_locked(master); } +static void dw_i3c_master_set_intr_regs(struct dw_i3c_master *master) +{ + u32 thld_ctrl; + + thld_ctrl = readl(master->regs + QUEUE_THLD_CTRL); + thld_ctrl &= ~(QUEUE_THLD_CTRL_RESP_BUF_MASK | + QUEUE_THLD_CTRL_IBI_STAT_MASK | + QUEUE_THLD_CTRL_IBI_DATA_MASK); + thld_ctrl |= QUEUE_THLD_CTRL_IBI_STAT(1) | + QUEUE_THLD_CTRL_IBI_DATA(31); + writel(thld_ctrl, master->regs + QUEUE_THLD_CTRL); + + thld_ctrl = readl(master->regs + DATA_BUFFER_THLD_CTRL); + thld_ctrl &= ~DATA_BUFFER_THLD_CTRL_RX_BUF; + writel(thld_ctrl, master->regs + DATA_BUFFER_THLD_CTRL); + + writel(INTR_ALL, master->regs + INTR_STATUS); + writel(INTR_MASTER_MASK, master->regs + INTR_STATUS_EN); + writel(INTR_MASTER_MASK, master->regs + INTR_SIGNAL_EN); + + master->sir_rej_mask = IBI_REQ_REJECT_ALL; + writel(master->sir_rej_mask, master->regs + IBI_SIR_REQ_REJECT); + + writel(IBI_REQ_REJECT_ALL, master->regs + IBI_MR_REQ_REJECT); +} + static int dw_i3c_clk_cfg(struct dw_i3c_master *master) { unsigned long core_rate, core_period; @@ -543,18 +578,22 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master) scl_timing = SCL_I3C_TIMING_HCNT(hcnt) | SCL_I3C_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I3C_PP_TIMING); + master->i3c_pp_timing = scl_timing; /* * In pure i3c mode, MST_FREE represents tCAS. In shared mode, this * will be set up by dw_i2c_clk_cfg as tLOW. */ - if (master->base.bus.mode == I3C_BUS_MODE_PURE) + if (master->base.bus.mode == I3C_BUS_MODE_PURE) { writel(BUS_I3C_MST_FREE(lcnt), master->regs + BUS_FREE_TIMING); + master->bus_free_timing = BUS_I3C_MST_FREE(lcnt); + } lcnt = max_t(u8, DIV_ROUND_UP(I3C_BUS_TLOW_OD_MIN_NS, core_period), lcnt); scl_timing = SCL_I3C_TIMING_HCNT(hcnt) | SCL_I3C_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I3C_OD_TIMING); + master->i3c_od_timing = scl_timing; lcnt = DIV_ROUND_UP(core_rate, I3C_BUS_SDR1_SCL_RATE) - hcnt; scl_timing = SCL_EXT_LCNT_1(lcnt); @@ -565,6 +604,7 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master) lcnt = DIV_ROUND_UP(core_rate, I3C_BUS_SDR4_SCL_RATE) - hcnt; scl_timing |= SCL_EXT_LCNT_4(lcnt); writel(scl_timing, master->regs + SCL_EXT_LCNT_TIMING); + master->ext_lcnt_timing = scl_timing; return 0; } @@ -586,16 +626,21 @@ static int dw_i2c_clk_cfg(struct dw_i3c_master *master) scl_timing = SCL_I2C_FMP_TIMING_HCNT(hcnt) | SCL_I2C_FMP_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FMP_TIMING); + master->i2c_fmp_timing = scl_timing; lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FM_TLOW_MIN_NS, core_period); hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_RATE) - lcnt; scl_timing = SCL_I2C_FM_TIMING_HCNT(hcnt) | SCL_I2C_FM_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FM_TIMING); + master->i2c_fm_timing = scl_timing; writel(BUS_I3C_MST_FREE(lcnt), master->regs + BUS_FREE_TIMING); + master->bus_free_timing = BUS_I3C_MST_FREE(lcnt); + writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_I2C_SLAVE_PRESENT, master->regs + DEVICE_CTRL); + master->i2c_slv_prsnt = true; return 0; } @@ -605,69 +650,58 @@ static int dw_i3c_master_bus_init(struct i3c_master_controller *m) struct dw_i3c_master *master = to_dw_i3c_master(m); struct i3c_bus *bus = i3c_master_get_bus(m); struct i3c_device_info info = { }; - u32 thld_ctrl; int ret; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + ret = master->platform_ops->init(master); if (ret) - return ret; + goto rpm_out; switch (bus->mode) { case I3C_BUS_MODE_MIXED_FAST: case I3C_BUS_MODE_MIXED_LIMITED: ret = dw_i2c_clk_cfg(master); if (ret) - return ret; + goto rpm_out; fallthrough; case I3C_BUS_MODE_PURE: ret = dw_i3c_clk_cfg(master); if (ret) - return ret; + goto rpm_out; break; default: - return -EINVAL; + ret = -EINVAL; + goto rpm_out; } - thld_ctrl = readl(master->regs + QUEUE_THLD_CTRL); - thld_ctrl &= ~(QUEUE_THLD_CTRL_RESP_BUF_MASK | - QUEUE_THLD_CTRL_IBI_STAT_MASK | - QUEUE_THLD_CTRL_IBI_STAT_MASK); - thld_ctrl |= QUEUE_THLD_CTRL_IBI_STAT(1) | - QUEUE_THLD_CTRL_IBI_DATA(31); - writel(thld_ctrl, master->regs + QUEUE_THLD_CTRL); - - thld_ctrl = readl(master->regs + DATA_BUFFER_THLD_CTRL); - thld_ctrl &= ~DATA_BUFFER_THLD_CTRL_RX_BUF; - writel(thld_ctrl, master->regs + DATA_BUFFER_THLD_CTRL); - - writel(INTR_ALL, master->regs + INTR_STATUS); - writel(INTR_MASTER_MASK, master->regs + INTR_STATUS_EN); - writel(INTR_MASTER_MASK, master->regs + INTR_SIGNAL_EN); - ret = i3c_master_get_free_addr(m, 0); if (ret < 0) - return ret; + goto rpm_out; writel(DEV_ADDR_DYNAMIC_ADDR_VALID | DEV_ADDR_DYNAMIC(ret), master->regs + DEVICE_ADDR); - + master->dev_addr = ret; memset(&info, 0, sizeof(info)); info.dyn_addr = ret; ret = i3c_master_set_info(&master->base, &info); if (ret) - return ret; - - writel(IBI_REQ_REJECT_ALL, master->regs + IBI_SIR_REQ_REJECT); - writel(IBI_REQ_REJECT_ALL, master->regs + IBI_MR_REQ_REJECT); - - /* For now don't support Hot-Join */ - writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, - master->regs + DEVICE_CTRL); + goto rpm_out; + dw_i3c_master_set_intr_regs(master); dw_i3c_master_enable(master); - return 0; +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + return ret; } static void dw_i3c_master_bus_cleanup(struct i3c_master_controller *m) @@ -769,11 +803,21 @@ static int dw_i3c_master_send_ccc_cmd(struct i3c_master_controller *m, if (ccc->id == I3C_CCC_ENTDAA) return -EINVAL; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + if (ccc->rnw) ret = dw_i3c_ccc_get(master, ccc); else ret = dw_i3c_ccc_set(master, ccc); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -786,6 +830,14 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) u8 p, last_addr = 0; int ret, pos; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + olddevs = ~(master->free_pos); /* Prepare DAT before launching DAA. */ @@ -794,8 +846,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) continue; ret = i3c_master_get_free_addr(m, last_addr + 1); - if (ret < 0) - return -ENOSPC; + if (ret < 0) { + ret = -ENOSPC; + goto rpm_out; + } master->devs[pos].addr = ret; p = even_parity(ret); @@ -805,16 +859,21 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(ret), master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, pos)); + + ret = 0; } xfer = dw_i3c_master_alloc_xfer(master, 1); - if (!xfer) - return -ENOMEM; + if (!xfer) { + ret = -ENOMEM; + goto rpm_out; + } pos = dw_i3c_master_get_free_pos(master); if (pos < 0) { dw_i3c_master_free_xfer(xfer); - return pos; + ret = pos; + goto rpm_out; } cmd = &xfer->cmds[0]; cmd->cmd_hi = 0x1; @@ -839,7 +898,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) dw_i3c_master_free_xfer(xfer); - return 0; +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + return ret; } static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, @@ -874,6 +936,14 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (!xfer) return -ENOMEM; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + for (i = 0; i < i3c_nxfers; i++) { struct dw_i3c_cmd *cmd = &xfer->cmds[i]; @@ -915,6 +985,8 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1025,6 +1097,14 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (!xfer) return -ENOMEM; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + for (i = 0; i < i2c_nxfers; i++) { struct dw_i3c_cmd *cmd = &xfer->cmds[i]; @@ -1055,6 +1135,8 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1075,6 +1157,7 @@ static int dw_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev) data->index = pos; master->devs[pos].addr = dev->addr; + master->devs[pos].is_i2c_addr = true; master->free_pos &= ~BIT(pos); i2c_dev_set_master_data(dev, data); @@ -1175,17 +1258,16 @@ static void dw_i3c_master_set_sir_enabled(struct dw_i3c_master *master, master->platform_ops->set_dat_ibi(master, dev, enable, ®); writel(reg, master->regs + dat_entry); - reg = readl(master->regs + IBI_SIR_REQ_REJECT); if (enable) { - global = reg == 0xffffffff; - reg &= ~BIT(idx); + global = (master->sir_rej_mask == IBI_REQ_REJECT_ALL); + master->sir_rej_mask &= ~BIT(idx); } else { bool hj_rejected = !!(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_HOT_JOIN_NACK); - reg |= BIT(idx); - global = (reg == 0xffffffff) && hj_rejected; + master->sir_rej_mask |= BIT(idx); + global = (master->sir_rej_mask == IBI_REQ_REJECT_ALL) && hj_rejected; } - writel(reg, master->regs + IBI_SIR_REQ_REJECT); + writel(master->sir_rej_mask, master->regs + IBI_SIR_REQ_REJECT); if (global) dw_i3c_master_enable_sir_signal(master, enable); @@ -1197,6 +1279,15 @@ static void dw_i3c_master_set_sir_enabled(struct dw_i3c_master *master, static int dw_i3c_master_enable_hotjoin(struct i3c_master_controller *m) { struct dw_i3c_master *master = to_dw_i3c_master(m); + int ret; + + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } dw_i3c_master_enable_sir_signal(master, true); writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_HOT_JOIN_NACK, @@ -1212,6 +1303,8 @@ static int dw_i3c_master_disable_hotjoin(struct i3c_master_controller *m) writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, master->regs + DEVICE_CTRL); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1222,12 +1315,23 @@ static int dw_i3c_master_enable_ibi(struct i3c_dev_desc *dev) struct dw_i3c_master *master = to_dw_i3c_master(m); int rc; + rc = pm_runtime_resume_and_get(master->dev); + if (rc < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, rc); + return rc; + } + dw_i3c_master_set_sir_enabled(master, dev, data->index, true); rc = i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); - if (rc) + if (rc) { dw_i3c_master_set_sir_enabled(master, dev, data->index, false); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + } return rc; } @@ -1245,6 +1349,8 @@ static int dw_i3c_master_disable_ibi(struct i3c_dev_desc *dev) dw_i3c_master_set_sir_enabled(master, dev, data->index, false); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1403,21 +1509,6 @@ static const struct i3c_master_controller_ops dw_mipi_i3c_ops = { .attach_i2c_dev = dw_i3c_master_attach_i2c_dev, .detach_i2c_dev = dw_i3c_master_detach_i2c_dev, .i2c_xfers = dw_i3c_master_i2c_xfers, -}; - -static const struct i3c_master_controller_ops dw_mipi_i3c_ibi_ops = { - .bus_init = dw_i3c_master_bus_init, - .bus_cleanup = dw_i3c_master_bus_cleanup, - .attach_i3c_dev = dw_i3c_master_attach_i3c_dev, - .reattach_i3c_dev = dw_i3c_master_reattach_i3c_dev, - .detach_i3c_dev = dw_i3c_master_detach_i3c_dev, - .do_daa = dw_i3c_master_daa, - .supports_ccc_cmd = dw_i3c_master_supports_ccc_cmd, - .send_ccc_cmd = dw_i3c_master_send_ccc_cmd, - .priv_xfers = dw_i3c_master_priv_xfers, - .attach_i2c_dev = dw_i3c_master_attach_i2c_dev, - .detach_i2c_dev = dw_i3c_master_detach_i2c_dev, - .i2c_xfers = dw_i3c_master_i2c_xfers, .request_ibi = dw_i3c_master_request_ibi, .free_ibi = dw_i3c_master_free_ibi, .enable_ibi = dw_i3c_master_enable_ibi, @@ -1455,29 +1546,30 @@ static void dw_i3c_hj_work(struct work_struct *work) int dw_i3c_common_probe(struct dw_i3c_master *master, struct platform_device *pdev) { - const struct i3c_master_controller_ops *ops; int ret, irq; if (!master->platform_ops) master->platform_ops = &dw_i3c_platform_ops_default; + master->dev = &pdev->dev; + master->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(master->regs)) return PTR_ERR(master->regs); - master->core_clk = devm_clk_get(&pdev->dev, NULL); + master->core_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(master->core_clk)) return PTR_ERR(master->core_clk); + master->pclk = devm_clk_get_optional_enabled(&pdev->dev, "pclk"); + if (IS_ERR(master->pclk)) + return PTR_ERR(master->pclk); + master->core_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, "core_rst"); if (IS_ERR(master->core_rst)) return PTR_ERR(master->core_rst); - ret = clk_prepare_enable(master->core_clk); - if (ret) - goto err_disable_core_clk; - reset_control_deassert(master->core_rst); spin_lock_init(&master->xferqueue.lock); @@ -1493,6 +1585,11 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, platform_set_drvdata(pdev, master); + pm_runtime_set_autosuspend_delay(&pdev->dev, RPM_AUTOSUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + /* Information regarding the FIFOs/QUEUEs depth */ ret = readl(master->regs + QUEUE_STATUS_LEVEL); master->caps.cmdfifodepth = QUEUE_STATUS_LEVEL_CMD(ret); @@ -1505,23 +1602,22 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, master->maxdevs = ret >> 16; master->free_pos = GENMASK(master->maxdevs - 1, 0); - ops = &dw_mipi_i3c_ops; - if (master->ibi_capable) - ops = &dw_mipi_i3c_ibi_ops; - INIT_WORK(&master->hj_work, dw_i3c_hj_work); - ret = i3c_master_register(&master->base, &pdev->dev, ops, false); + ret = i3c_master_register(&master->base, &pdev->dev, + &dw_mipi_i3c_ops, false); if (ret) - goto err_assert_rst; + goto err_disable_pm; return 0; +err_disable_pm: + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + err_assert_rst: reset_control_assert(master->core_rst); -err_disable_core_clk: - clk_disable_unprepare(master->core_clk); - return ret; } EXPORT_SYMBOL_GPL(dw_i3c_common_probe); @@ -1530,9 +1626,9 @@ void dw_i3c_common_remove(struct dw_i3c_master *master) { i3c_master_unregister(&master->base); - reset_control_assert(master->core_rst); - - clk_disable_unprepare(master->core_clk); + pm_runtime_disable(master->dev); + pm_runtime_set_suspended(master->dev); + pm_runtime_dont_use_autosuspend(master->dev); } EXPORT_SYMBOL_GPL(dw_i3c_common_remove); @@ -1556,6 +1652,96 @@ static void dw_i3c_remove(struct platform_device *pdev) dw_i3c_common_remove(master); } +static void dw_i3c_master_restore_addrs(struct dw_i3c_master *master) +{ + u32 pos, reg_val; + + writel(DEV_ADDR_DYNAMIC_ADDR_VALID | DEV_ADDR_DYNAMIC(master->dev_addr), + master->regs + DEVICE_ADDR); + + for (pos = 0; pos < master->maxdevs; pos++) { + if (master->free_pos & BIT(pos)) + continue; + + if (master->devs[pos].is_i2c_addr) + reg_val = DEV_ADDR_TABLE_LEGACY_I2C_DEV | + DEV_ADDR_TABLE_STATIC_ADDR(master->devs[pos].addr); + else + reg_val = DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr); + + writel(reg_val, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, pos)); + } +} + +static void dw_i3c_master_restore_timing_regs(struct dw_i3c_master *master) +{ + writel(master->i3c_pp_timing, master->regs + SCL_I3C_PP_TIMING); + writel(master->bus_free_timing, master->regs + BUS_FREE_TIMING); + writel(master->i3c_od_timing, master->regs + SCL_I3C_OD_TIMING); + writel(master->ext_lcnt_timing, master->regs + SCL_EXT_LCNT_TIMING); + + if (master->i2c_slv_prsnt) { + writel(master->i2c_fmp_timing, master->regs + SCL_I2C_FMP_TIMING); + writel(master->i2c_fm_timing, master->regs + SCL_I2C_FM_TIMING); + } +} + +static int dw_i3c_master_enable_clks(struct dw_i3c_master *master) +{ + int ret = 0; + + ret = clk_prepare_enable(master->core_clk); + if (ret) + return ret; + + ret = clk_prepare_enable(master->pclk); + if (ret) { + clk_disable_unprepare(master->core_clk); + return ret; + } + + return 0; +} + +static inline void dw_i3c_master_disable_clks(struct dw_i3c_master *master) +{ + clk_disable_unprepare(master->pclk); + clk_disable_unprepare(master->core_clk); +} + +static int __maybe_unused dw_i3c_master_runtime_suspend(struct device *dev) +{ + struct dw_i3c_master *master = dev_get_drvdata(dev); + + dw_i3c_master_disable(master); + + reset_control_assert(master->core_rst); + dw_i3c_master_disable_clks(master); + pinctrl_pm_select_sleep_state(dev); + return 0; +} + +static int __maybe_unused dw_i3c_master_runtime_resume(struct device *dev) +{ + struct dw_i3c_master *master = dev_get_drvdata(dev); + + pinctrl_pm_select_default_state(dev); + dw_i3c_master_enable_clks(master); + reset_control_deassert(master->core_rst); + + dw_i3c_master_set_intr_regs(master); + dw_i3c_master_restore_timing_regs(master); + dw_i3c_master_restore_addrs(master); + + dw_i3c_master_enable(master); + return 0; +} + +static const struct dev_pm_ops dw_i3c_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(dw_i3c_master_runtime_suspend, dw_i3c_master_runtime_resume, NULL) +}; + static const struct of_device_id dw_i3c_master_of_match[] = { { .compatible = "snps,dw-i3c-master-1.00a", }, {}, @@ -1568,6 +1754,7 @@ static struct platform_driver dw_i3c_driver = { .driver = { .name = "dw-i3c-master", .of_match_table = dw_i3c_master_of_match, + .pm = &dw_i3c_pm_ops, }, }; module_platform_driver(dw_i3c_driver); diff --git a/drivers/i3c/master/dw-i3c-master.h b/drivers/i3c/master/dw-i3c-master.h index 4ab94aa72252..219ff815d3a7 100644 --- a/drivers/i3c/master/dw-i3c-master.h +++ b/drivers/i3c/master/dw-i3c-master.h @@ -19,11 +19,13 @@ struct dw_i3c_master_caps { struct dw_i3c_dat_entry { u8 addr; + bool is_i2c_addr; struct i3c_dev_desc *ibi_dev; }; struct dw_i3c_master { struct i3c_master_controller base; + struct device *dev; u16 maxdevs; u16 datstartaddr; u32 free_pos; @@ -36,10 +38,18 @@ struct dw_i3c_master { void __iomem *regs; struct reset_control *core_rst; struct clk *core_clk; + struct clk *pclk; char version[5]; char type[5]; - bool ibi_capable; - + u32 sir_rej_mask; + bool i2c_slv_prsnt; + u32 dev_addr; + u32 i3c_pp_timing; + u32 i3c_od_timing; + u32 ext_lcnt_timing; + u32 bus_free_timing; + u32 i2c_fm_timing; + u32 i2c_fmp_timing; /* * Per-device hardware data, used to manage the device address table * (DAT) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index d7e966a25583..4e7d6a43ee9b 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -631,6 +631,7 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) static int i3c_hci_init(struct i3c_hci *hci) { u32 regval, offset; + bool size_in_dwords; int ret; /* Validate HCI hardware version */ @@ -654,11 +655,16 @@ static int i3c_hci_init(struct i3c_hci *hci) hci->caps = reg_read(HC_CAPABILITIES); DBG("caps = %#x", hci->caps); + size_in_dwords = hci->version_major < 1 || + (hci->version_major == 1 && hci->version_minor < 1); + regval = reg_read(DAT_SECTION); offset = FIELD_GET(DAT_TABLE_OFFSET, regval); hci->DAT_regs = offset ? hci->base_regs + offset : NULL; hci->DAT_entries = FIELD_GET(DAT_TABLE_SIZE, regval); hci->DAT_entry_size = FIELD_GET(DAT_ENTRY_SIZE, regval) ? 0 : 8; + if (size_in_dwords) + hci->DAT_entries = 4 * hci->DAT_entries / hci->DAT_entry_size; dev_info(&hci->master.dev, "DAT: %u %u-bytes entries at offset %#x\n", hci->DAT_entries, hci->DAT_entry_size, offset); @@ -667,6 +673,8 @@ static int i3c_hci_init(struct i3c_hci *hci) hci->DCT_regs = offset ? hci->base_regs + offset : NULL; hci->DCT_entries = FIELD_GET(DCT_TABLE_SIZE, regval); hci->DCT_entry_size = FIELD_GET(DCT_ENTRY_SIZE, regval) ? 0 : 16; + if (size_in_dwords) + hci->DCT_entries = 4 * hci->DCT_entries / hci->DCT_entry_size; dev_info(&hci->master.dev, "DCT: %u %u-bytes entries at offset %#x\n", hci->DCT_entries, hci->DCT_entry_size, offset); diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index 4e01a95cc4d0..a918e96b21fd 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -147,21 +147,6 @@ struct hci_dma_dev_ibi_data { unsigned int max_len; }; -static inline u32 lo32(dma_addr_t physaddr) -{ - return physaddr; -} - -static inline u32 hi32(dma_addr_t physaddr) -{ - /* trickery to avoid compiler warnings on 32-bit build targets */ - if (sizeof(dma_addr_t) > 4) { - u64 hi = physaddr; - return hi >> 32; - } - return 0; -} - static void hci_dma_cleanup(struct i3c_hci *hci) { struct hci_rings_data *rings = hci->io_data; @@ -265,10 +250,10 @@ static int hci_dma_init(struct i3c_hci *hci) if (!rh->xfer || !rh->resp || !rh->src_xfers) goto err_out; - rh_reg_write(CMD_RING_BASE_LO, lo32(rh->xfer_dma)); - rh_reg_write(CMD_RING_BASE_HI, hi32(rh->xfer_dma)); - rh_reg_write(RESP_RING_BASE_LO, lo32(rh->resp_dma)); - rh_reg_write(RESP_RING_BASE_HI, hi32(rh->resp_dma)); + rh_reg_write(CMD_RING_BASE_LO, lower_32_bits(rh->xfer_dma)); + rh_reg_write(CMD_RING_BASE_HI, upper_32_bits(rh->xfer_dma)); + rh_reg_write(RESP_RING_BASE_LO, lower_32_bits(rh->resp_dma)); + rh_reg_write(RESP_RING_BASE_HI, upper_32_bits(rh->resp_dma)); regval = FIELD_PREP(CR_RING_SIZE, rh->xfer_entries); rh_reg_write(CR_SETUP, regval); @@ -294,7 +279,17 @@ static int hci_dma_init(struct i3c_hci *hci) rh->ibi_chunk_sz = dma_get_cache_alignment(); rh->ibi_chunk_sz *= IBI_CHUNK_CACHELINES; - BUG_ON(rh->ibi_chunk_sz > 256); + /* + * Round IBI data chunk size to number of bytes supported by + * the HW. Chunk size can be 2^n number of DWORDs which is the + * same as 2^(n+2) bytes, where n is 0..6. + */ + rh->ibi_chunk_sz = umax(4, rh->ibi_chunk_sz); + rh->ibi_chunk_sz = roundup_pow_of_two(rh->ibi_chunk_sz); + if (rh->ibi_chunk_sz > 256) { + ret = -EINVAL; + goto err_out; + } ibi_status_ring_sz = rh->ibi_status_sz * rh->ibi_status_entries; ibi_data_ring_sz = rh->ibi_chunk_sz * rh->ibi_chunks_total; @@ -315,6 +310,11 @@ static int hci_dma_init(struct i3c_hci *hci) goto err_out; } + rh_reg_write(IBI_STATUS_RING_BASE_LO, lower_32_bits(rh->ibi_status_dma)); + rh_reg_write(IBI_STATUS_RING_BASE_HI, upper_32_bits(rh->ibi_status_dma)); + rh_reg_write(IBI_DATA_RING_BASE_LO, lower_32_bits(rh->ibi_data_dma)); + rh_reg_write(IBI_DATA_RING_BASE_HI, upper_32_bits(rh->ibi_data_dma)); + regval = FIELD_PREP(IBI_STATUS_RING_SIZE, rh->ibi_status_entries) | FIELD_PREP(IBI_DATA_CHUNK_SIZE, @@ -404,8 +404,8 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, hci_dma_unmap_xfer(hci, xfer_list, i); return -ENOMEM; } - *ring_data++ = lo32(xfer->data_dma); - *ring_data++ = hi32(xfer->data_dma); + *ring_data++ = lower_32_bits(xfer->data_dma); + *ring_data++ = upper_32_bits(xfer->data_dma); } else { *ring_data++ = 0; *ring_data++ = 0; diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index bb299ce02ccc..0a68fd1b81d4 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -790,7 +790,20 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, int ret, i; while (true) { - /* Enter/proceed with DAA */ + /* SVC_I3C_MCTRL_REQUEST_PROC_DAA have two mode, ENTER DAA or PROCESS DAA. + * + * ENTER DAA: + * 1 will issue START, 7E, ENTDAA, and then emits 7E/R to process first target. + * 2 Stops just before the new Dynamic Address (DA) is to be emitted. + * + * PROCESS DAA: + * 1 The DA is written using MWDATAB or ADDR bits 6:0. + * 2 ProcessDAA is requested again to write the new address, and then starts the + * next (START, 7E, ENTDAA) unless marked to STOP; an MSTATUS indicating NACK + * means DA was not accepted (e.g. parity error). If PROCESSDAA is NACKed on the + * 7E/R, which means no more Slaves need a DA, then a COMPLETE will be signaled + * (along with DONE), and a STOP issued automatically. + */ writel(SVC_I3C_MCTRL_REQUEST_PROC_DAA | SVC_I3C_MCTRL_TYPE_I3C | SVC_I3C_MCTRL_IBIRESP_NACK | @@ -807,7 +820,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, SVC_I3C_MSTATUS_MCTRLDONE(reg), 1, 1000); if (ret) - return ret; + break; if (SVC_I3C_MSTATUS_RXPEND(reg)) { u8 data[6]; @@ -819,7 +832,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, */ ret = svc_i3c_master_readb(master, data, 6); if (ret) - return ret; + break; for (i = 0; i < 6; i++) prov_id[dev_nb] |= (u64)(data[i]) << (8 * (5 - i)); @@ -827,7 +840,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, /* We do not care about the BCR and DCR yet */ ret = svc_i3c_master_readb(master, data, 2); if (ret) - return ret; + break; } else if (SVC_I3C_MSTATUS_MCTRLDONE(reg)) { if (SVC_I3C_MSTATUS_STATE_IDLE(reg) && SVC_I3C_MSTATUS_COMPLETE(reg)) { @@ -835,12 +848,23 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * All devices received and acked they dynamic * address, this is the natural end of the DAA * procedure. + * + * Hardware will auto emit STOP at this case. */ - break; + *count = dev_nb; + return 0; + } else if (SVC_I3C_MSTATUS_NACKED(reg)) { /* No I3C devices attached */ - if (dev_nb == 0) + if (dev_nb == 0) { + /* + * Hardware can't treat first NACK for ENTAA as normal + * COMPLETE. So need manual emit STOP. + */ + ret = 0; + *count = 0; break; + } /* * A slave device nacked the address, this is @@ -849,8 +873,10 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * answer again immediately and shall ack the * address this time. */ - if (prov_id[dev_nb] == nacking_prov_id) - return -EIO; + if (prov_id[dev_nb] == nacking_prov_id) { + ret = -EIO; + break; + } dev_nb--; nacking_prov_id = prov_id[dev_nb]; @@ -858,7 +884,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, continue; } else { - return -EIO; + break; } } @@ -870,12 +896,12 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, SVC_I3C_MSTATUS_BETWEEN(reg), 0, 1000); if (ret) - return ret; + break; /* Give the slave device a suitable dynamic address */ ret = i3c_master_get_free_addr(&master->base, last_addr + 1); if (ret < 0) - return ret; + break; addrs[dev_nb] = ret; dev_dbg(master->dev, "DAA: device %d assigned to 0x%02x\n", @@ -885,9 +911,9 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, last_addr = addrs[dev_nb++]; } - *count = dev_nb; - - return 0; + /* Need manual issue STOP except for Complete condition */ + svc_i3c_master_emit_stop(master); + return ret; } static int svc_i3c_update_ibirules(struct svc_i3c_master *master) @@ -961,11 +987,10 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) spin_lock_irqsave(&master->xferqueue.lock, flags); ret = svc_i3c_master_do_daa_locked(master, addrs, &dev_nb); spin_unlock_irqrestore(&master->xferqueue.lock, flags); - if (ret) { - svc_i3c_master_emit_stop(master); - svc_i3c_master_clear_merrwarn(master); + + svc_i3c_master_clear_merrwarn(master); + if (ret) goto rpm_out; - } /* Register all devices who participated to the core */ for (i = 0; i < dev_nb; i++) { @@ -1052,29 +1077,59 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, u8 *in, const u8 *out, unsigned int xfer_len, unsigned int *actual_len, bool continued) { + int retry = 2; u32 reg; int ret; /* clean SVC_I3C_MINT_IBIWON w1c bits */ writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); - writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | - xfer_type | - SVC_I3C_MCTRL_IBIRESP_NACK | - SVC_I3C_MCTRL_DIR(rnw) | - SVC_I3C_MCTRL_ADDR(addr) | - SVC_I3C_MCTRL_RDTERM(*actual_len), - master->regs + SVC_I3C_MCTRL); - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, + while (retry--) { + writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | + xfer_type | + SVC_I3C_MCTRL_IBIRESP_NACK | + SVC_I3C_MCTRL_DIR(rnw) | + SVC_I3C_MCTRL_ADDR(addr) | + SVC_I3C_MCTRL_RDTERM(*actual_len), + master->regs + SVC_I3C_MCTRL); + + ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, SVC_I3C_MSTATUS_MCTRLDONE(reg), 0, 1000); - if (ret) - goto emit_stop; + if (ret) + goto emit_stop; - if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { - ret = -ENXIO; - *actual_len = 0; - goto emit_stop; + if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { + /* + * According to I3C Spec 1.1.1, 11-Jun-2021, section: 5.1.2.2.3. + * If the Controller chooses to start an I3C Message with an I3C Dynamic + * Address, then special provisions shall be made because that same I3C + * Target may be initiating an IBI or a Controller Role Request. So, one of + * three things may happen: (skip 1, 2) + * + * 3. The Addresses match and the RnW bits also match, and so neither + * Controller nor Target will ACK since both are expecting the other side to + * provide ACK. As a result, each side might think it had "won" arbitration, + * but neither side would continue, as each would subsequently see that the + * other did not provide ACK. + * ... + * For either value of RnW: Due to the NACK, the Controller shall defer the + * Private Write or Private Read, and should typically transmit the Target + * Address again after a Repeated START (i.e., the next one or any one prior + * to a STOP in the Frame). Since the Address Header following a Repeated + * START is not arbitrated, the Controller will always win (see Section + * 5.1.2.2.4). + */ + if (retry && addr != 0x7e) { + writel(SVC_I3C_MERRWARN_NACK, master->regs + SVC_I3C_MERRWARN); + } else { + ret = -ENXIO; + *actual_len = 0; + goto emit_stop; + } + } else { + break; + } } /* @@ -1321,7 +1376,7 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master, cmd->addr = ccc->dests[0].addr; cmd->rnw = ccc->rnw; cmd->in = ccc->rnw ? ccc->dests[0].payload.data : NULL; - cmd->out = ccc->rnw ? NULL : ccc->dests[0].payload.data, + cmd->out = ccc->rnw ? NULL : ccc->dests[0].payload.data; cmd->len = xfer_len; cmd->actual_len = actual_len; cmd->continued = false; diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 95fa857e8aad..426e3c9f88a1 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -564,13 +564,11 @@ static int tiadc_parse_dt(struct platform_device *pdev, struct tiadc_device *adc_dev) { struct device_node *node = pdev->dev.of_node; - struct property *prop; - const __be32 *cur; int channels = 0; u32 val; int i; - of_property_for_each_u32(node, "ti,adc-channels", prop, cur, val) { + of_property_for_each_u32(node, "ti,adc-channels", val) { adc_dev->channel_line[channels] = val; /* Set Default values for optional DT parameters */ diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 14b53dac1253..6b04a674f832 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -46,6 +46,9 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, return 0; if (mt) return mt->num_slots != num_slots ? -EINVAL : 0; + /* Arbitrary limit for avoiding too large memory allocation. */ + if (num_slots > 1024) + return -EINVAL; mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL); if (!mt) diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index 7cb26929dc73..9dc25eb2be44 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -871,7 +871,7 @@ static void cyttsp4_get_mt_touches(struct cyttsp4_mt_data *md, int num_cur_tch) struct cyttsp4_touch tch; int sig; int i, j, t = 0; - int ids[max(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; + int ids[MAX(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; memset(ids, 0, si->si_ofs.tch_abs[CY_TCH_T].max * sizeof(int)); for (i = 0; i < num_cur_tch; i++) { diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 9d9a7fde59e7..1074ee25064d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -588,9 +588,9 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->tlb = &v1_flush_ops; pgtable->iop.ops.map_pages = iommu_v1_map_pages; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 4b2994b6126d..2fce4f6d4e1b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -277,7 +277,7 @@ static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, */ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || of_device_is_compatible(np, "nvidia,tegra194-smmu")) { - smmu->pgsize_bitmap = PAGE_SIZE; + smmu->pgsize_bitmap &= GENMASK(PAGE_SHIFT, 0); pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; } diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e..81e9cc6e3164 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61..3214a4c17c6b 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e29133..222cfc11ebfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index ba53571a8239..a2f4ffe6d949 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -232,8 +232,8 @@ static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) pgt_size = sprd_iommu_pgt_size(&dom->domain); dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); - dom->sdev = NULL; sprd_iommu_hw_en(dom->sdev, false); + dom->sdev = NULL; } static void sprd_iommu_domain_free(struct iommu_domain *domain) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 072bd227b6c6..4525366d16d6 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -111,8 +111,6 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) struct device_node *node = irq_domain_get_of_node(domain); struct irq_chip_generic *gc; struct aic_chip_data *aic; - struct property *prop; - const __be32 *p; u32 hwirq; gc = irq_get_domain_generic_chip(domain, 0); @@ -120,7 +118,7 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) aic = gc->private; aic->ext_irqs |= 1; - of_property_for_each_u32(node, "atmel,external-irqs", prop, p, hwirq) { + of_property_for_each_u32(node, "atmel,external-irqs", hwirq) { gc = irq_get_domain_generic_chip(domain, hwirq); if (!gc) { pr_warn("AIC: external irq %d >= %d skip it\n", diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 9d8f2c406043..b35903a06902 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; static u32 lpic_gsi_to_irq(u32 gsi) { + int irq = 0; + /* Only pch irqdomain transferring is required for LoongArch. */ if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) - return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); - return 0; + return (irq > 0) ? irq : 0; } static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 093fd42893a7..53cc08387588 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -64,6 +64,20 @@ struct mbigen_device { void __iomem *base; }; +static inline unsigned int get_mbigen_node_offset(unsigned int nid) +{ + unsigned int offset = nid * MBIGEN_NODE_OFFSET; + + /* + * To avoid touched clear register in unexpected way, we need to directly + * skip clear register when access to more than 10 mbigen nodes. + */ + if (nid >= (REG_MBIGEN_CLEAR_OFFSET / MBIGEN_NODE_OFFSET)) + offset += MBIGEN_NODE_OFFSET; + + return offset; +} + static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) { unsigned int nid, pin; @@ -72,8 +86,7 @@ static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) nid = hwirq / IRQS_PER_MBIGEN_NODE + 1; pin = hwirq % IRQS_PER_MBIGEN_NODE; - return pin * 4 + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_VEC_OFFSET; + return pin * 4 + get_mbigen_node_offset(nid) + REG_MBIGEN_VEC_OFFSET; } static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, @@ -88,8 +101,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, *mask = 1 << (irq_ofst % 32); ofst = irq_ofst / 32 * 4; - *addr = ofst + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_TYPE_OFFSET; + *addr = ofst + get_mbigen_node_offset(nid) + REG_MBIGEN_TYPE_OFFSET; } static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq, diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index 27e30ce41db3..cd789fa51519 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -178,7 +178,7 @@ struct meson_gpio_irq_controller { void __iomem *base; u32 channel_irqs[MAX_NUM_CHANNEL]; DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); - spinlock_t lock; + raw_spinlock_t lock; }; static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, @@ -187,14 +187,14 @@ static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, unsigned long flags; u32 tmp; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); tmp = readl_relaxed(ctl->base + reg); tmp &= ~mask; tmp |= val; writel_relaxed(tmp, ctl->base + reg); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); } static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) @@ -244,12 +244,12 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, unsigned long flags; unsigned int idx; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); if (idx >= ctl->params->nr_channels) { - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; } @@ -257,7 +257,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* Mark the channel as used */ set_bit(idx, ctl->channel_map); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); /* * Setup the mux of the channel to route the signal of the pad @@ -567,7 +567,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node * if (!ctl) return -ENOMEM; - spin_lock_init(&ctl->lock); + raw_spin_lock_init(&ctl->lock); ctl->base = of_iomap(node, 0); if (!ctl->base) { diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c index 1d9bb28d13e5..eb6ca516a166 100644 --- a/drivers/irqchip/irq-pic32-evic.c +++ b/drivers/irqchip/irq-pic32-evic.c @@ -161,9 +161,9 @@ static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq, return ret; } -int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type) +static int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) { struct evic_chip_data *priv = d->host_data; @@ -190,13 +190,11 @@ static void __init pic32_ext_irq_of_init(struct irq_domain *domain) { struct device_node *node = irq_domain_get_of_node(domain); struct evic_chip_data *priv = domain->host_data; - struct property *prop; - const __le32 *p; u32 hwirq; int i = 0; const char *pname = "microchip,external-irqs"; - of_property_for_each_u32(node, pname, prop, p, hwirq) { + of_property_for_each_u32(node, pname, hwirq) { if (i >= ARRAY_SIZE(priv->ext_irqs)) { pr_warn("More than %d external irq, skip rest\n", ARRAY_SIZE(priv->ext_irqs)); diff --git a/drivers/irqchip/irq-riscv-aplic-msi.c b/drivers/irqchip/irq-riscv-aplic-msi.c index 028444af48bd..d7773f76e5d0 100644 --- a/drivers/irqchip/irq-riscv-aplic-msi.c +++ b/drivers/irqchip/irq-riscv-aplic-msi.c @@ -32,15 +32,10 @@ static void aplic_msi_irq_unmask(struct irq_data *d) aplic_irq_unmask(d); } -static void aplic_msi_irq_eoi(struct irq_data *d) +static void aplic_msi_irq_retrigger_level(struct irq_data *d) { struct aplic_priv *priv = irq_data_get_irq_chip_data(d); - /* - * EOI handling is required only for level-triggered interrupts - * when APLIC is in MSI mode. - */ - switch (irqd_get_trigger_type(d)) { case IRQ_TYPE_LEVEL_LOW: case IRQ_TYPE_LEVEL_HIGH: @@ -59,6 +54,29 @@ static void aplic_msi_irq_eoi(struct irq_data *d) } } +static void aplic_msi_irq_eoi(struct irq_data *d) +{ + /* + * EOI handling is required only for level-triggered interrupts + * when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); +} + +static int aplic_msi_irq_set_type(struct irq_data *d, unsigned int type) +{ + int rc = aplic_irq_set_type(d, type); + + if (rc) + return rc; + /* + * Updating sourcecfg register for level-triggered interrupts + * requires interrupt retriggering when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); + return 0; +} + static void aplic_msi_write_msg(struct irq_data *d, struct msi_msg *msg) { unsigned int group_index, hart_index, guest_index, val; @@ -130,7 +148,7 @@ static const struct msi_domain_template aplic_msi_template = { .name = "APLIC-MSI", .irq_mask = aplic_msi_irq_mask, .irq_unmask = aplic_msi_irq_unmask, - .irq_set_type = aplic_irq_set_type, + .irq_set_type = aplic_msi_irq_set_type, .irq_eoi = aplic_msi_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/drivers/irqchip/irq-sun6i-r.c b/drivers/irqchip/irq-sun6i-r.c index a01e44049415..99958d470d62 100644 --- a/drivers/irqchip/irq-sun6i-r.c +++ b/drivers/irqchip/irq-sun6i-r.c @@ -270,7 +270,7 @@ static const struct irq_domain_ops sun6i_r_intc_domain_ops = { static int sun6i_r_intc_suspend(void) { - u32 buf[BITS_TO_U32(max(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; + u32 buf[BITS_TO_U32(MAX(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; int i; /* Wake IRQs are enabled during system sleep and shutdown. */ diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 238d3d344949..7e08714d507f 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -189,7 +189,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, irqc->intr_mask = 0; } - if (irqc->intr_mask >> irqc->nr_irq) + if ((u64)irqc->intr_mask >> irqc->nr_irq) pr_warn("irq-xilinx: mismatch in kind-of-intr param\n"); pr_info("irq-xilinx: %pOF: num_irq=%d, edge=0x%x\n", diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 48ac628f471b..51e6964c1305 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1776,7 +1776,7 @@ static void integrity_metadata(struct work_struct *w) struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); char *checksums; unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; - char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; sector_t sector; unsigned int sectors_to_process; @@ -2064,7 +2064,7 @@ retry_kmap: } while (++s < ic->sectors_per_block); #ifdef INTERNAL_VERIFY if (ic->internal_hash) { - char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { @@ -2837,7 +2837,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start unlikely(from_replay) && #endif ic->internal_hash) { - char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c2c07bfa6471..f299ff393a6a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *param) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; - if (!dm_suspended_md(md)) - dm_suspend(md, suspend_flags); + if (!dm_suspended_md(md)) { + r = dm_suspend(md, suspend_flags); + if (r) { + down_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (hc && !hc->new_map) { + hc->new_map = new_map; + new_map = NULL; + } else { + r = -ENXIO; + } + up_write(&_hash_lock); + if (new_map) { + dm_sync_table(md); + dm_table_destroy(new_map); + } + dm_put(md); + return r; + } + } old_size = dm_get_size(md); old_map = dm_swap_table(md, new_map); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97fab2087df8..87bb90303435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2737,7 +2737,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } @@ -2762,7 +2762,7 @@ static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_st break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 04698fd03e60..d48c4fafc779 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - kfree(smm); + kvfree(smm); } static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) @@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; - smm = kmalloc(sizeof(*smm), GFP_KERNEL); + smm = kvmalloc(sizeof(*smm), GFP_KERNEL); if (!smm) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8d..761989d67906 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ diff --git a/drivers/media/dvb-frontends/stv0367_priv.h b/drivers/media/dvb-frontends/stv0367_priv.h index 617f605947b2..7f056d1cce82 100644 --- a/drivers/media/dvb-frontends/stv0367_priv.h +++ b/drivers/media/dvb-frontends/stv0367_priv.h @@ -25,8 +25,11 @@ #endif /* MACRO definitions */ +#ifndef MIN #define MAX(X, Y) ((X) >= (Y) ? (X) : (Y)) #define MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) +#endif + #define INRANGE(X, Y, Z) \ ((((X) <= (Y)) && ((Y) <= (Z))) || \ (((Z) <= (Y)) && ((Y) <= (X))) ? 1 : 0) diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig index 154343080c82..40e20f0aa5ae 100644 --- a/drivers/media/pci/intel/ipu6/Kconfig +++ b/drivers/media/pci/intel/ipu6/Kconfig @@ -3,13 +3,14 @@ config VIDEO_INTEL_IPU6 depends on ACPI || COMPILE_TEST depends on VIDEO_DEV depends on X86 && X86_64 && HAS_DMA + depends on IPU_BRIDGE || !IPU_BRIDGE + select AUXILIARY_BUS select DMA_OPS select IOMMU_IOVA select VIDEO_V4L2_SUBDEV_API select MEDIA_CONTROLLER select VIDEOBUF2_DMA_CONTIG select V4L2_FWNODE - select IPU_BRIDGE help This is the 6th Gen Intel Image Processing Unit, found in Intel SoCs and used for capturing images and video from camera sensors. diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 52aea4167718..717c441b4a86 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -828,8 +828,10 @@ struct rc_dev *rc_dev_get_from_fd(int fd, bool write) return ERR_PTR(-EINVAL); } - if (write && !(f.file->f_mode & FMODE_WRITE)) + if (write && !(f.file->f_mode & FMODE_WRITE)) { + fdput(f); return ERR_PTR(-EPERM); + } fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index 22d83ac18eb7..fbf58012becd 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); -static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - int ret; - - ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - } - return 0; -} - -static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - } -} - static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) { struct dvb_usb_adapter *adap; int ret, n, o; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint); - if (ret) - return ret; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response); - if (ret) - return ret; for (n = 0; n < d->props.num_adapters; n++) { adap = &d->adapter[n]; adap->dev = d; @@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) * when reloading the driver w/o replugging the device * sometimes a timeout occurs, this helps */ - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint); - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response); + if (d->props.generic_bulk_ctrl_endpoint != 0) { + usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + } return 0; diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 0136df5732ba..4fe26e82e3d1 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) { const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i]; + if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) || + ctrl->info.selector != mapping->selector) + continue; + /* Let the device provide a custom mapping. */ if (mapping->filter_mapping) { mapping = mapping->filter_mapping(chain, ctrl); @@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, continue; } - if (uvc_entity_match_guid(ctrl->entity, mapping->entity) && - ctrl->info.selector == mapping->selector) - __uvc_ctrl_add_mapping(chain, ctrl, mapping); + __uvc_ctrl_add_mapping(chain, ctrl, mapping); } } diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 4bbd542d753e..0c1364d88469 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -119,8 +119,6 @@ static int ti_tscadc_probe(struct platform_device *pdev) struct clk *clk; struct device_node *node; struct mfd_cell *cell; - struct property *prop; - const __be32 *cur; bool use_tsc = false, use_mag = false; u32 val; int err; @@ -167,7 +165,7 @@ static int ti_tscadc_probe(struct platform_device *pdev) } node = of_get_child_by_name(pdev->dev.of_node, "adc"); - of_property_for_each_u32(node, "ti,adc-channels", prop, cur, val) { + of_property_for_each_u32(node, "ti,adc-channels", val) { adc_channels++; if (val > 7) { dev_err(&pdev->dev, " PIN numbers are 0..7 (not %d)\n", diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 41c3d2821a78..41c54051347a 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -587,7 +587,7 @@ config NSM config MARVELL_CN10K_DPI tristate "Octeon CN10K DPI driver" - depends on PCI + depends on PCI && PCI_IOV depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enables Octeon CN10K DMA packet interface (DPI) driver which diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index d4aeeb2b2169..89224d4af4a2 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -233,6 +233,49 @@ static void ee1004_cleanup_bus_data(void *data) mutex_unlock(&ee1004_bus_lock); } +static int ee1004_init_bus_data(struct i2c_client *client) +{ + struct ee1004_bus_data *bd; + int err, cnr = 0; + + bd = ee1004_get_bus_data(client->adapter); + if (!bd) + return dev_err_probe(&client->dev, -ENOSPC, "Only %d busses supported", + EE1004_MAX_BUSSES); + + i2c_set_clientdata(client, bd); + + if (++bd->dev_count == 1) { + /* Use 2 dummy devices for page select command */ + for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { + struct i2c_client *cl; + + cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_out; + } + + bd->set_page[cnr] = cl; + } + + /* Remember current page to avoid unneeded page select */ + err = ee1004_get_current_page(bd); + if (err < 0) + goto err_out; + + dev_dbg(&client->dev, "Currently selected page: %d\n", err); + bd->current_page = err; + } + + return 0; + +err_out: + ee1004_cleanup(cnr, bd); + + return err; +} + static int ee1004_probe(struct i2c_client *client) { struct nvmem_config config = { @@ -251,9 +294,8 @@ static int ee1004_probe(struct i2c_client *client) .compat = true, .base_dev = &client->dev, }; - struct ee1004_bus_data *bd; struct nvmem_device *ndev; - int err, cnr = 0; + int err; /* Make sure we can operate on this adapter */ if (!i2c_check_functionality(client->adapter, @@ -264,46 +306,21 @@ static int ee1004_probe(struct i2c_client *client) mutex_lock(&ee1004_bus_lock); - bd = ee1004_get_bus_data(client->adapter); - if (!bd) { + err = ee1004_init_bus_data(client); + if (err < 0) { mutex_unlock(&ee1004_bus_lock); - return dev_err_probe(&client->dev, -ENOSPC, - "Only %d busses supported", EE1004_MAX_BUSSES); - } - - err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, bd); - if (err < 0) return err; - - i2c_set_clientdata(client, bd); - - if (++bd->dev_count == 1) { - /* Use 2 dummy devices for page select command */ - for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { - struct i2c_client *cl; - - cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); - if (IS_ERR(cl)) { - mutex_unlock(&ee1004_bus_lock); - return PTR_ERR(cl); - } - bd->set_page[cnr] = cl; - } - - /* Remember current page to avoid unneeded page select */ - err = ee1004_get_current_page(bd); - if (err < 0) { - mutex_unlock(&ee1004_bus_lock); - return err; - } - dev_dbg(&client->dev, "Currently selected page: %d\n", err); - bd->current_page = err; } ee1004_probe_temp_sensor(client); mutex_unlock(&ee1004_bus_lock); + err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, + i2c_get_clientdata(client)); + if (err < 0) + return err; + ndev = devm_nvmem_register(&client->dev, &config); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3..339d126414d4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 5cd488f54cfa..8f744bee6fbd 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { @@ -400,6 +415,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(REFCOUNT_DEC_NEGATIVE), CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_ZERO), CRASHTYPE(REFCOUNT_INC_ZERO), CRASHTYPE(REFCOUNT_ADD_ZERO), CRASHTYPE(REFCOUNT_INC_SATURATED), diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index f82e3423acb9..60d0155be869 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -390,7 +390,8 @@ int ubiblock_create(struct ubi_volume_info *vi) ret = blk_mq_alloc_tag_set(&dev->tag_set); if (ret) { - dev_err(disk_to_dev(dev->gd), "blk_mq_alloc_tag_set failed"); + pr_err("ubiblock%d_%d: blk_mq_alloc_tag_set failed\n", + dev->ubi_num, dev->vol_id); goto out_free_dev; } @@ -407,8 +408,8 @@ int ubiblock_create(struct ubi_volume_info *vi) gd->minors = 1; gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL); if (gd->first_minor < 0) { - dev_err(disk_to_dev(gd), - "block: dynamic minor allocation failed"); + pr_err("ubiblock%d_%d: block: dynamic minor allocation failed\n", + dev->ubi_num, dev->vol_id); ret = -ENODEV; goto out_cleanup_disk; } @@ -669,7 +670,7 @@ err_unreg: return ret; } -void __exit ubiblock_exit(void) +void ubiblock_exit(void) { ubi_unregister_volume_notifier(&ubiblock_notifier); ubiblock_remove_all(); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a7e3a6246c0e..30be4ed68fad 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -112,7 +112,7 @@ static struct attribute *ubi_class_attrs[] = { ATTRIBUTE_GROUPS(ubi_class); /* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ -struct class ubi_class = { +const struct class ubi_class = { .name = UBI_NAME_STR, .class_groups = ubi_class_groups, }; @@ -1372,7 +1372,7 @@ static int __init ubi_init(void) /* See comment above re-ubi_is_module(). */ if (ubi_is_module()) - goto out_slab; + goto out_debugfs; } register_mtd_user(&ubi_mtd_notifier); @@ -1387,6 +1387,9 @@ static int __init ubi_init(void) out_mtd_notifier: unregister_mtd_user(&ubi_mtd_notifier); + ubiblock_exit(); +out_debugfs: + ubi_debugfs_exit(); out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index d57f52bd2ff3..9ec3b8b6a0aa 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -598,9 +598,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN, UBI_DFS_DIR_NAME, ubi->ubi_num); - if (n > UBI_DFS_DIR_LEN) { + if (n >= UBI_DFS_DIR_LEN) { /* The array size is too small */ return -EINVAL; } diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index e5ac3cd0bbae..c7ba7a15c9f7 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1564,6 +1564,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, GFP_KERNEL); if (!fm_eba[i]) { ret = -ENOMEM; + kfree(scan_eba[i]); goto out_free; } @@ -1599,7 +1600,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, } out_free: - for (i = 0; i < num_volumes; i++) { + while (--i >= 0) { if (!ubi->volumes[i]) continue; diff --git a/drivers/mtd/ubi/nvmem.c b/drivers/mtd/ubi/nvmem.c index 8aeb9c428e51..a94a1a9aaec1 100644 --- a/drivers/mtd/ubi/nvmem.c +++ b/drivers/mtd/ubi/nvmem.c @@ -6,7 +6,6 @@ /* UBI NVMEM provider */ #include "ubi.h" #include <linux/nvmem-provider.h> -#include <asm/div64.h> /* List of all NVMEM devices */ static LIST_HEAD(nvmem_devices); @@ -27,14 +26,15 @@ static int ubi_nvmem_reg_read(void *priv, unsigned int from, struct ubi_nvmem *unv = priv; struct ubi_volume_desc *desc; uint32_t offs; - uint64_t lnum = from; + uint32_t lnum; int err = 0; desc = ubi_open_volume(unv->ubi_num, unv->vol_id, UBI_READONLY); if (IS_ERR(desc)) return PTR_ERR(desc); - offs = do_div(lnum, unv->usable_leb_size); + offs = from % unv->usable_leb_size; + lnum = from / unv->usable_leb_size; while (bytes_left) { to_read = unv->usable_leb_size - offs; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 32009a24869e..1c9e874e8ede 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -420,7 +420,7 @@ struct ubi_debug_info { unsigned int power_cut_min; unsigned int power_cut_max; unsigned int emulate_failures; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBI_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_chk_gen; struct dentry *dfs_chk_io; @@ -814,7 +814,7 @@ extern struct kmem_cache *ubi_wl_entry_slab; extern const struct file_operations ubi_ctrl_cdev_operations; extern const struct file_operations ubi_cdev_operations; extern const struct file_operations ubi_vol_cdev_operations; -extern struct class ubi_class; +extern const struct class ubi_class; extern struct mutex ubi_devices_mutex; extern struct blocking_notifier_head ubi_notifiers; diff --git a/drivers/net/can/usb/etas_es58x/es58x_devlink.c b/drivers/net/can/usb/etas_es58x/es58x_devlink.c index 635edeb8f68c..eee20839d96f 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_devlink.c +++ b/drivers/net/can/usb/etas_es58x/es58x_devlink.c @@ -215,7 +215,7 @@ static int es58x_devlink_info_get(struct devlink *devlink, struct es58x_sw_version *fw_ver = &es58x_dev->firmware_version; struct es58x_sw_version *bl_ver = &es58x_dev->bootloader_version; struct es58x_hw_revision *hw_rev = &es58x_dev->hardware_revision; - char buf[max(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; + char buf[MAX(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; int ret = 0; if (es58x_sw_version_is_valid(fw_ver)) { diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ed1e6560df25..0e663ec0c12a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -675,8 +675,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->user_mii_bus); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b074b4bb0629..1491099528be 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2578,7 +2578,11 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; + case KSZ8567_CHIP_ID: case KSZ9477_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: /* KSZ9477 Errata DS80000754C * * Module 4: Energy Efficient Ethernet (EEE) feature select must @@ -2588,6 +2592,13 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) * controls. If not disabled, the PHY ports can auto-negotiate * to enable EEE, and this feature can cause link drops when * linked to another device supporting EEE. + * + * The same item appears in the errata for the KSZ9567, KSZ9896, + * and KSZ9897. + * + * A similar item appears in the errata for the KSZ8567, but + * provides an alternative workaround. For now, use the simple + * workaround of disabling the EEE feature for this device too. */ return MICREL_NO_EEE; } @@ -3764,6 +3775,11 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, return -EBUSY; } + /* Need to initialize variable as the code to fill in settings may + * not be executed. + */ + wol.wolopts = 0; + ksz_get_wol(ds, dp->index, &wol); if (wol.wolopts & WAKE_MAGIC) { dev_err(ds->dev, diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index d9d3e30fd47a..e3f95d2cc2c1 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -40,6 +40,10 @@ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ +/* MII Block subblock */ +#define VSC73XX_BLOCK_MII_INTERNAL 0x0 /* Internal MDIO subblock */ +#define VSC73XX_BLOCK_MII_EXTERNAL 0x1 /* External MDIO subblock */ + #define CPU_PORT 6 /* CPU port */ /* MAC Block registers */ @@ -225,6 +229,8 @@ #define VSC73XX_MII_CMD 0x1 #define VSC73XX_MII_DATA 0x2 +#define VSC73XX_MII_STAT_BUSY BIT(3) + /* Arbiter block 5 registers */ #define VSC73XX_ARBEMPTY 0x0c #define VSC73XX_ARBDISC 0x0e @@ -299,6 +305,7 @@ #define IS_739X(a) (IS_7395(a) || IS_7398(a)) #define VSC73XX_POLL_SLEEP_US 1000 +#define VSC73XX_MDIO_POLL_SLEEP_US 5 #define VSC73XX_POLL_TIMEOUT_US 10000 struct vsc73xx_counter { @@ -527,6 +534,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc) return 0; } +static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc) +{ + int ret, err; + u32 val; + + ret = read_poll_timeout(vsc73xx_read, err, + err < 0 || !(val & VSC73XX_MII_STAT_BUSY), + VSC73XX_MDIO_POLL_SLEEP_US, + VSC73XX_POLL_TIMEOUT_US, false, vsc, + VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL, + VSC73XX_MII_STAT, &val); + if (ret) + return ret; + return err; +} + static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) { struct vsc73xx *vsc = ds->priv; @@ -534,12 +557,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) u32 val; int ret; + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + /* Setting bit 26 means "read" */ cmd = BIT(26) | (phy << 21) | (regnum << 16); ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; - msleep(2); + + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val); if (ret) return ret; @@ -563,18 +594,11 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, u32 cmd; int ret; - /* It was found through tedious experiments that this router - * chip really hates to have it's PHYs reset. They - * never recover if that happens: autonegotiation stops - * working after a reset. Just filter out this command. - * (Resetting the whole chip is OK.) - */ - if (regnum == 0 && (val & BIT(15))) { - dev_info(vsc->dev, "reset PHY - disallowed\n"); - return 0; - } + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; - cmd = (phy << 21) | (regnum << 16); + cmd = (phy << 21) | (regnum << 16) | val; ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; @@ -957,6 +981,11 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, if (duplex == DUPLEX_FULL) val |= VSC73XX_MAC_CFG_FDX; + else + /* In datasheet description ("Port Mode Procedure" in 5.6.2) + * this bit is configured only for half duplex. + */ + val |= VSC73XX_MAC_CFG_WEXC_DIS; /* This routine is described in the datasheet (below ARBDISC register * description) @@ -967,7 +996,6 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, get_random_bytes(&seed, 1); val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET; val |= VSC73XX_MAC_CFG_SEED_LOAD; - val |= VSC73XX_MAC_CFG_WEXC_DIS; /* Those bits are responsible for MTU only. Kernel takes care about MTU, * let's enable +8 bytes frame length unconditionally. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ffa74c26ee53..e27e1082ee33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7591,19 +7591,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) int rx = bp->rx_nr_rings, stat; int vnic, grp = rx; - if (hw_resc->resv_tx_rings != bp->tx_nr_rings && - bp->hwrm_spec_code >= 0x10601) - return true; - /* Old firmware does not need RX ring reservations but we still * need to setup a default RSS map when needed. With new firmware * we go through RX ring reservations first and then set up the * RSS map for the successfully reserved RX rings when needed. */ - if (!BNXT_NEW_RM(bp)) { + if (!BNXT_NEW_RM(bp)) bnxt_check_rss_tbl_no_rmgr(bp); + + if (hw_resc->resv_tx_rings != bp->tx_nr_rings && + bp->hwrm_spec_code >= 0x10601) + return true; + + if (!BNXT_NEW_RM(bp)) return false; - } vnic = bnxt_get_total_vnics(bp, rx); @@ -7649,8 +7650,8 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num); static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int rx_rings, old_rx_rings, rc; int cp = bp->cp_nr_rings; - int rx_rings, rc; int ulp_msix = 0; bool sh = false; int tx_cp; @@ -7684,6 +7685,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.grp = bp->rx_nr_rings; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); hwr.stat = bnxt_get_func_stat_ctxs(bp); + old_rx_rings = bp->hw_resc.resv_rx_rings; rc = bnxt_hwrm_reserve_rings(bp, &hwr); if (rc) @@ -7738,7 +7740,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!bnxt_rings_ok(bp, &hwr)) return -ENOMEM; - if (!netif_is_rxfh_configured(bp->dev)) + if (old_rx_rings != bp->hw_resc.resv_rx_rings && + !netif_is_rxfh_configured(bp->dev)) bnxt_set_dflt_rss_indir_tbl(bp, NULL); if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d00ef0063820..9dadc89378f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1863,8 +1863,14 @@ static void bnxt_modify_rss(struct bnxt *bp, struct ethtool_rxfh_context *ctx, } static int bnxt_rxfh_context_check(struct bnxt *bp, + const struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); return -EOPNOTSUPP; @@ -1888,7 +1894,7 @@ static int bnxt_create_rxfh_context(struct net_device *dev, struct bnxt_vnic_info *vnic; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -1915,8 +1921,12 @@ static int bnxt_create_rxfh_context(struct net_device *dev, if (rc) goto out; + /* Populate defaults in the context */ bnxt_set_dflt_rss_indir_tbl(bp, ctx); + ctx->hfunc = ETH_RSS_HASH_TOP; memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + memcpy(ethtool_rxfh_context_key(ctx), + bp->rss_hash_key, HW_HASH_KEY_SIZE); rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); if (rc) { @@ -1953,7 +1963,7 @@ static int bnxt_modify_rxfh_context(struct net_device *dev, struct bnxt_rss_ctx *rss_ctx; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -5280,7 +5290,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, .cap_rss_ctx_supported = 1, - .rxfh_max_context_id = BNXT_MAX_ETH_RSS_CTX, + .rxfh_max_num_contexts = BNXT_MAX_ETH_RSS_CTX + 1, .rxfh_indir_space = BNXT_MAX_RSS_TABLE_ENTRIES_P5, .rxfh_priv_size = sizeof(struct bnxt_rss_ctx), .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1248792d7fd4..0715ea5bf13e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - if (dev->phydev) { + if (dev->phydev) phy_ethtool_get_wol(dev->phydev, wol); - if (wol->supported) - return; - } - if (!device_can_wakeup(kdev)) { - wol->supported = 0; - wol->wolopts = 0; + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) return; - } - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11665be3a22c..dcd3f54ed0cf 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5250,8 +5250,8 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wol & MACB_WOL_ENABLED) { /* Check for IP address in WOL ARP mode */ idev = __in_dev_get_rcu(bp->dev); - if (idev && idev->ifa_list) - ifa = rcu_access_pointer(idev->ifa_list); + if (idev) + ifa = rcu_dereference(idev->ifa_list); if ((bp->wolopts & WAKE_ARP) && !ifa) { netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index a40c266c37f2..608cc6af5af1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1054,18 +1054,12 @@ static int phy_interface_mode(u8 lmac_type) static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { - struct lmac *lmac, **priv; + struct lmac *lmac; u64 cfg; lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - lmac->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); - if (!lmac->netdev) - return -ENOMEM; - priv = netdev_priv(lmac->netdev); - *priv = lmac; - if ((lmac->lmac_type == BGX_MODE_SGMII) || (lmac->lmac_type == BGX_MODE_QSGMII) || (lmac->lmac_type == BGX_MODE_RGMII)) { @@ -1191,7 +1185,6 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); - free_netdev(lmac->netdev); lmac->phydev = NULL; } @@ -1653,6 +1646,23 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_get_qlm_mode(bgx); + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + struct lmac *lmacp, **priv; + + lmacp = &bgx->lmac[lmac]; + lmacp->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); + + if (!lmacp->netdev) { + for (int i = 0; i < lmac; i++) + free_netdev(bgx->lmac[i].netdev); + err = -ENOMEM; + goto err_enable; + } + + priv = netdev_priv(lmacp->netdev); + *priv = lmacp; + } + err = bgx_init_phy(bgx); if (err) goto err_enable; @@ -1692,8 +1702,10 @@ static void bgx_remove(struct pci_dev *pdev) u8 lmac; /* Disable all LMACs */ - for (lmac = 0; lmac < bgx->lmac_count; lmac++) + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { bgx_lmac_disable(bgx, lmac); + free_netdev(bgx->lmac[lmac].netdev); + } pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index e32f6724f568..2e4f3e1782a2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -775,6 +775,9 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); hrtimer_cancel(&fep->perout_timer); if (fep->ptp_clock) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3480ff5c7ed6..5a8b490ab3ad 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -495,7 +495,7 @@ static int gve_set_channels(struct net_device *netdev, return -EINVAL; } - if (!netif_carrier_ok(netdev)) { + if (!netif_running(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; return 0; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9744b426940e..661566db68c8 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv, rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; - if (netif_carrier_ok(priv->dev)) { + if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { + if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) goto revert_features; @@ -2359,7 +2359,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a5fc0209d628..4cbc4d069a1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + hns3_nic_net_stop(netdev); + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index e132c2f09560..cc7f46c0b35f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1598,8 +1598,7 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) { u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; struct hclge_mod_reg_common_msg msg; - u8 i, j, num; - u32 loop_time; + u8 i, j, num, loop_time; num = ARRAY_SIZE(hclge_ssu_reg_common_msg); for (i = 0; i < num; i++) { @@ -1609,7 +1608,8 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) loop_time = 1; loop_para[0] = 0; if (msg.need_para) { - loop_time = hdev->ae_dev->dev_specs.tnl_num; + loop_time = min(hdev->ae_dev->dev_specs.tnl_num, + HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE); for (j = 0; j < loop_time; j++) loop_para[j] = j + 1; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 82574ce0194f..6c33195a1168 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2653,8 +2653,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + + if (ret) + return ret; - return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + hdev->hw.mac.req_speed = speed; + hdev->hw.mac.req_duplex = duplex; + + return 0; } static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) @@ -2956,17 +2965,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) hdev->hw.mac.duplex = HCLGE_MAC_FULL; - ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, - hdev->hw.mac.duplex, hdev->hw.mac.lane_num); - if (ret) - return ret; - if (hdev->hw.mac.support_autoneg) { ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); if (ret) return ret; } + if (!hdev->hw.mac.autoneg) { + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed, + hdev->hw.mac.req_duplex, + hdev->hw.mac.lane_num); + if (ret) + return ret; + } + mac->link = 0; if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { @@ -11444,7 +11456,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_free_irq_vectors(pdev); - pci_release_mem_regions(pdev); + pci_release_regions(pdev); pci_disable_device(pdev); } @@ -11516,8 +11528,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static void hclge_clear_resetting_state(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 85fb11de43a1..80079657afeb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev) if (ret) netdev_err(netdev, "failed to adjust link.\n"); + hdev->hw.mac.req_speed = (u32)speed; + hdev->hw.mac.req_duplex = (u8)duplex; + ret = hclge_cfg_flowctrl(hdev); if (ret) netdev_err(netdev, "failed to configure flow control.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3735d2fed11f..094a7c7b5592 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 99a75a59078e..caaa10157909 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -765,18 +765,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, } /** - * ice_xsk_pool - get XSK buffer pool bound to a ring + * ice_rx_xsk_pool - assign XSK buff pool to Rx ring * @ring: Rx ring to use * - * Returns a pointer to xsk_buff_pool structure if there is a buffer pool - * present, NULL otherwise. + * Sets XSK buff pool pointer on Rx ring. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) +static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - return ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** @@ -801,7 +800,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) if (!ring) return; - ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5d396c1a7731..1facf179a96f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -536,7 +536,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return err; } - ring->xsk_pool = ice_xsk_pool(ring); + ice_rx_xsk_pool(ring); if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -597,7 +597,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return 0; } - ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs); if (!ok) { u16 pf_q = ring->vsi->rxq_map[ring->q_index]; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8c990c976132..bc79ba974e49 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4673,10 +4673,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, if (err) return err; - fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) + - fec_corr_low_val; - fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) + - fec_uncorr_low_val; + fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) + + fec_corr_low_val; + fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) + + fec_uncorr_low_val; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ec636be4d17d..6f97ed471fe9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -559,6 +559,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ @@ -2948,7 +2950,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) { struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; - if (rx_ring->xsk_pool) + if (READ_ONCE(rx_ring->xsk_pool)) napi_schedule(&rx_ring->q_vector->napi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e2786cc13286..ef2e858f49bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; + /* Skip HW writes if reset is in progress */ + if (pf->hw.reset_ongoing) + return; + switch (hw->ptp.phy_model) { case ICE_PHY_E810: /* Do not reconfigure E810 PHY */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8bb743f78fcb..8d25b6981269 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - rx_ring->xdp_prog = NULL; + WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; @@ -1521,10 +1521,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ ice_for_each_tx_ring(tx_ring, q_vector->tx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool); bool wd; - if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring); + if (xsk_pool) + wd = ice_xmit_zc(tx_ring, xsk_pool); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1550,6 +1551,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) budget_per_ring = budget; ice_for_each_rx_ring(rx_ring, q_vector->rx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool); int cleaned; /* A dedicated path for zero-copy allows making a single @@ -1557,7 +1559,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * ice_clean_rx_irq function and makes the codebase cleaner. */ cleaned = rx_ring->xsk_pool ? - ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) : ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a65955eb23c0..240a7bec242b 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) { - synchronize_rcu(); + if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, * ice_qvec_cfg_msix - Enable IRQ for given queue vector * @vsi: the VSI that contains queue vector * @q_vector: queue vector + * @qid: queue index */ static void -ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) { u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; + int q, _qid = qid; ice_cfg_itr(hw, q_vector); - ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, - q_vector->tx.itr_idx); + for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx); + _qid++; + } + + _qid = qid; - ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, - q_vector->rx.itr_idx); + for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx); + _qid++; + } ice_flush(hw); } @@ -164,6 +166,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; int timeout = 50; + int fail = 0; int err; if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) @@ -180,15 +183,17 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) usleep_range(1000, 2000); } + synchronize_net(); + netif_carrier_off(vsi->netdev); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_qvec_dis_irq(vsi, rx_ring, q_vector); ice_qvec_toggle_napi(vsi, q_vector, false); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; @@ -196,17 +201,15 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; } - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); - if (err) - return err; + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); - return 0; + return fail; } /** @@ -219,40 +222,48 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_q_vector *q_vector; + int fail = 0; + bool link_up; int err; err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; ice_set_ring_xdp(xdp_ring); ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (err) - return err; + if (!fail) + fail = err; q_vector = vsi->rx_rings[q_idx]->q_vector; - ice_qvec_cfg_msix(vsi, q_vector); + ice_qvec_cfg_msix(vsi, q_vector, q_idx); err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); - if (err) - return err; + if (!fail) + fail = err; ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ + synchronize_net(); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } clear_bit(ICE_CFG_BUSY, vsi->state); - return 0; + return fail; } /** @@ -459,6 +470,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, /** * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Place the @count of descriptors onto Rx ring. Handle the ring wrap @@ -467,7 +479,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, * * Returns true if all allocations were successful, false if any fail. */ -static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u32 nb_buffs_extra = 0, nb_buffs = 0; union ice_32b_rx_flex_desc *rx_desc; @@ -479,8 +492,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp = ice_xdp_buf(rx_ring, ntu); if (ntu + count >= rx_ring->count) { - nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, - rx_desc, + nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, rx_ring->count - ntu); if (nb_buffs_extra != rx_ring->count - ntu) { ntu += nb_buffs_extra; @@ -493,7 +505,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, 0); } - nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count); ntu += nb_buffs; if (ntu == rx_ring->count) @@ -509,6 +521,7 @@ exit: /** * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Wrapper for internal allocation routine; figure out how many tail @@ -516,7 +529,8 @@ exit: * * Returns true if all calls to internal alloc routine succeeded */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); u16 leftover, i, tail_bumps; @@ -525,9 +539,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) - if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh)) return false; - return __ice_alloc_rx_bufs_zc(rx_ring, leftover); + return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover); } /** @@ -596,8 +610,10 @@ out: /** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring + * @xsk_pool: AF_XDP buffer pool pointer */ -static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) +static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *tx_desc; @@ -648,7 +664,7 @@ skip: if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + xsk_tx_completed(xsk_pool, xsk_frames); return completed_frames; } @@ -657,6 +673,7 @@ skip: * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX * @xdp: XDP buffer to xmit * @xdp_ring: XDP ring to produce descriptor onto + * @xsk_pool: AF_XDP buffer pool pointer * * note that this function works directly on xdp_buff, no need to convert * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning @@ -666,7 +683,8 @@ skip: * was not enough space on XDP ring */ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, - struct ice_tx_ring *xdp_ring) + struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { struct skb_shared_info *sinfo = NULL; u32 size = xdp->data_end - xdp->data; @@ -680,7 +698,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) - free_space += ice_clean_xdp_irq_zc(xdp_ring); + free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); if (unlikely(!free_space)) goto busy; @@ -700,7 +718,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, dma_addr_t dma; dma = xsk_buff_xdp_get_dma(xdp); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size); tx_buf->xdp = xdp; tx_buf->type = ICE_TX_BUF_XSK_TX; @@ -742,12 +760,14 @@ busy: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @xsk_pool: AF_XDP buffer pool pointer * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { int err, result = ICE_XDP_PASS; u32 act; @@ -758,7 +778,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (!err) return ICE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS) result = ICE_XDP_EXIT; else result = ICE_XDP_CONSUMED; @@ -769,7 +789,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: break; case XDP_TX: - result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool); if (result == ICE_XDP_CONSUMED) goto out_failure; break; @@ -821,14 +841,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, /** * ice_clean_rx_irq_zc - consumes packets from the hardware ring * @rx_ring: AF_XDP Rx ring + * @xsk_pool: AF_XDP buffer pool pointer * @budget: NAPI budget * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; struct xdp_buff *first = NULL; @@ -891,7 +913,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, + xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { xdp_xmit |= xdp_res; } else if (xdp_res == ICE_XDP_EXIT) { @@ -940,7 +963,8 @@ construct_skb: rx_ring->next_to_clean = ntc; entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) - failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); + failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, + entries_to_alloc); ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); @@ -963,17 +987,19 @@ construct_skb: /** * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @desc: AF_XDP descriptor to pull the DMA address and length from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc, unsigned int *total_bytes) { struct ice_tx_desc *tx_desc; dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len); tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -986,10 +1012,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, /** * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, unsigned int *total_bytes) { u16 ntu = xdp_ring->next_to_use; @@ -999,8 +1028,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); + dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); tx_desc = ICE_TX_DESC(xdp_ring, ntu++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -1016,60 +1045,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de /** * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @nb_pkts: count of packets to be send * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, - u32 nb_pkts, unsigned int *total_bytes) +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, u32 nb_pkts, + unsigned int *total_bytes) { u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); leftover = nb_pkts & (PKTS_PER_BATCH - 1); for (i = 0; i < batched; i += PKTS_PER_BATCH) - ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes); for (; i < batched + leftover; i++) - ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: AF_XDP buffer pool pointer * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool) { - struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + struct xdp_desc *descs = xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; int budget; - ice_clean_xdp_irq_zc(xdp_ring); + ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); + + if (!netif_carrier_ok(xdp_ring->vsi->netdev) || + !netif_running(xdp_ring->vsi->netdev)) + return true; budget = ICE_DESC_UNUSED(xdp_ring); budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; - ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed, + &total_bytes); xdp_ring->next_to_use = 0; } - ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, - &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed], + nb_pkts - nb_processed, &total_bytes); ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); - if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) - xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); return nb_pkts < budget; } @@ -1091,7 +1129,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_VSI_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) @@ -1102,7 +1140,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, ring = vsi->rx_rings[queue_id]->xdp_ring; - if (!ring->xsk_pool) + if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 6fa181f080ef..45adeb513253 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -20,16 +20,20 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + struct xsk_buff_pool __always_unused *xsk_pool) { return false; } @@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, static inline int ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, int __always_unused budget) { return 0; @@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, u16 __always_unused count) { return false; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5dbf2b4ba1b0..0b6c8fd5bc90 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -900,8 +900,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1335,9 +1335,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1350,45 +1349,43 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1455,10 +1452,10 @@ unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } @@ -1539,7 +1536,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1898,9 +1895,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1932,17 +1926,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2171,7 +2171,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index af2879f03b8d..585c3dadd9bf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3576,9 +3576,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->complq); @@ -3593,26 +3591,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) free_cpumask_var(q_vector->affinity_mask); } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } @@ -3780,13 +3758,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; @@ -3804,8 +3784,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name, - vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); @@ -4326,7 +4306,6 @@ error: */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4340,11 +4319,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 5f92b3c7c3d4..511384f3ec5c 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -404,6 +404,12 @@ #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Retry Buffer Control */ +#define IGC_RETX_CTL 0x041C +#define IGC_RETX_CTL_WATERMARK_MASK 0xF +#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ +#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ + /* Transmit Scheduling Latency */ /* Latency between transmission scheduling (LaunchTime) and the time * the packet is transmitted to the network in nanosecond. diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cb5c7b09e8a0..dfd6c00b4205 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6306,21 +6306,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - break; - case TAPRIO_CMD_DESTROY: - return igc_tsn_clear_schedule(adapter); - case TAPRIO_CMD_STATS: - igc_taprio_stats(adapter->netdev, &qopt->stats); - return 0; - case TAPRIO_CMD_QUEUE_STATS: - igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); - return 0; - default: - return -EOPNOTSUPP; - } - if (qopt->base_time < 0) return -ERANGE; @@ -6330,12 +6315,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; + igc_ptp_read(adapter, &now); + + if (igc_tsn_is_taprio_activated_by_user(adapter) && + is_base_time_past(qopt->base_time, &now)) + adapter->qbv_config_change_errors++; + adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; adapter->taprio_offload_enable = true; - igc_ptp_read(adapter, &now); - for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6429,7 +6418,23 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, if (hw->mac.type != igc_i225) return -EOPNOTSUPP; - err = igc_save_qbv_schedule(adapter, qopt); + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + err = igc_save_qbv_schedule(adapter, qopt); + break; + case TAPRIO_CMD_DESTROY: + err = igc_tsn_clear_schedule(adapter); + break; + case TAPRIO_CMD_STATS: + igc_taprio_stats(adapter->netdev, &qopt->stats); + return 0; + case TAPRIO_CMD_QUEUE_STATS: + igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); + return 0; + default: + return -EOPNOTSUPP; + } + if (err) return err; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 22cefb1eeedf..d68fa7f3d5f0 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -49,12 +49,19 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) return new_flags; } +static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); +} + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u16 txoffset; - if (!is_any_launchtime(adapter)) + if (!igc_tsn_is_tx_mode_in_tsn(adapter)) return; switch (adapter->link_speed) { @@ -78,6 +85,23 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) wr32(IGC_GTXOFFSET, txoffset); } +static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl; + + retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; + wr32(IGC_RETX_CTL, retxctl); +} + +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + adapter->taprio_offload_enable; +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -91,6 +115,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + if (igc_is_device_id_i226(hw)) + igc_tsn_restore_retx_default(adapter); + tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); @@ -111,6 +138,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) return 0; } +/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes + * to 88 Bytes by setting RETX_CTL register using the recommendation from: + * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 + * Item 9: TSN: Packet Transmission Might Cross the Qbv Window + * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + */ +static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl, watermark; + + retxctl = rd32(IGC_RETX_CTL); + watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; + /* Set QBVFULLTH value using watermark and set QBVFULLEN */ + retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | + IGC_RETX_CTL_QBVFULLEN; + wr32(IGC_RETX_CTL, retxctl); +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -123,6 +169,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + if (igc_is_device_id_i226(hw)) + igc_tsn_set_retx_qbvfullthreshold(adapter); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; u32 txqctl = 0; @@ -262,14 +311,6 @@ skip_cbs: s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); - - /* Increase the counter if scheduling into the past while - * Gate Control List (GCL) is running. - */ - if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - (adapter->qbv_count > 1)) - adapter->qbv_config_change_errors++; } else { if (igc_is_device_id_i226(hw)) { ktime_t adjust_time, expires_time; @@ -331,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter) return err; } -int igc_tsn_offload_apply(struct igc_adapter *adapter) +static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) { - struct igc_hw *hw = &adapter->hw; + bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & + IGC_FLAG_TSN_ANY_ENABLED); - /* Per I225/6 HW Design Section 7.5.2.1, transmit mode - * cannot be changed dynamically. Require reset the adapter. + return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || + (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); +} + +int igc_tsn_offload_apply(struct igc_adapter *adapter) +{ + /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change + * from legacy->tsn or tsn->legacy, then reset adapter is needed. */ if (netif_running(adapter->netdev) && - (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { + igc_tsn_will_tx_mode_change(adapter)) { schedule_work(&adapter->reset_task); return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index b53e6af560b7..98ec845a86bf 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -7,5 +7,6 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b06e24562973..d8be0e4dcb07 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -946,15 +946,13 @@ jme_udpsum(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IP)) return csum; skb_set_network_header(skb, ETH_HLEN); - if ((ip_hdr(skb)->protocol != IPPROTO_UDP) || - (skb->len < (ETH_HLEN + - (ip_hdr(skb)->ihl << 2) + - sizeof(struct udphdr)))) { + + if (ip_hdr(skb)->protocol != IPPROTO_UDP || + skb->len < (ETH_HLEN + ip_hdrlen(skb) + sizeof(struct udphdr))) { skb_reset_network_header(skb); return csum; } - skb_set_transport_header(skb, - ETH_HLEN + (ip_hdr(skb)->ihl << 2)); + skb_set_transport_header(skb, ETH_HLEN + ip_hdrlen(skb)); csum = udp_hdr(skb)->check; skb_reset_transport_header(skb); skb_reset_network_header(skb); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8c45ad983abc..0d62a33afa80 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en) { struct mvpp2_port *port; - int i; + int i, j; for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; if (port->priv->percpu_pools) { - for (i = 0; i < port->nrxqs; i++) - mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], + for (j = 0; j < port->nrxqs; j++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j], port->tx_fc & en); } else { mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 61334a71058c..e212a4ba9275 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri { struct mtk_wed_flow_block_priv *priv = cb_priv; struct flow_cls_offload *cls = type_data; - struct mtk_wed_hw *hw = priv->hw; + struct mtk_wed_hw *hw = NULL; - if (!tc_can_offload(priv->dev)) + if (!priv || !tc_can_offload(priv->dev)) return -EOPNOTSUPP; if (type != TC_SETUP_CLSFLOWER) return -EOPNOTSUPP; + hw = priv->hw; return mtk_flow_offload_cmd(hw->eth, cls, hw->index); } @@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev, flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); kfree(block_cb->cb_priv); + block_cb->cb_priv = NULL; } return 0; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5fd82c67b6ab..bb5da42edc23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -130,7 +130,7 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_DEFAULT_LRO_TIMEOUT 32 -#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 +#define MLX5E_DEFAULT_SHAMPO_TIMEOUT 1024 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 6c9ccccca81e..64b62ed17b07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_headers_entry_size, mlx5e_shampo_get_log_hd_entry_size(mdev, params)); MLX5_SET(rqc, rqc, reservation_timeout, - params->packet_merge.timeout); + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); MLX5_SET(rqc, rqc, shampo_match_criteria_type, params->packet_merge.shampo.match_criteria_type); MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, @@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, return wqebbs; } +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 749b2ec0436e..3f8986f9d862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 22918b2ef7f1..09433b91be17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } + mutex_lock(&priv->state_lock); err = mlx5e_safe_reopen_channels(priv); + mutex_unlock(&priv->state_lock); if (!err) { to_ctx->status = 1; /* all channels recovered */ return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 8cf8ba2622f2..71a168746ebe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -932,6 +932,7 @@ err_rule: mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: + *attr = *old_attr; kfree(old_attr); err_attr: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 6e00afe4671b..797db853de36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -51,9 +51,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; - if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || - MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + if (IS_ENABLED(CONFIG_MLX5_CLS_ACT) && + ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))) caps |= MLX5_IPSEC_CAP_PRIO; if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 00d5661dc62e..36845872ae94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1409,7 +1409,12 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + if (err) { + netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err); + goto out; + } + mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3eccdadc0357..773624bb2c5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, if (num_tuples <= 0) { netdev_warn(priv->netdev, "%s: flow is not valid %d\n", __func__, num_tuples); - return num_tuples; + return num_tuples < 0 ? num_tuples : -EINVAL; } eth_ft = get_flow_table(priv, fs, num_tuples); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6f686fabed44..5df904639b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5167,18 +5167,6 @@ const struct net_device_ops mlx5e_netdev_ops = { #endif }; -static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) -{ - int i; - - /* The supported periods are organized in ascending order */ - for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) - if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) - break; - - return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; @@ -5308,7 +5296,7 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *rq_stats; ASSERT_RTNL(); - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; channel_stats = priv->channel_stats[i]; @@ -5328,6 +5316,9 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_sq_stats *sq_stats; ASSERT_RTNL(); + if (!priv->stats_nch) + return; + /* no special case needed for ptp htb etc since txq2sq_stats is kept up * to date for active sq_stats, otherwise get_base_stats takes care of * inactive sqs. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 979c49ae6b5c..b43ca0b762c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -207,6 +207,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct devlink *devlink = priv_to_devlink(dev); /* if this is the driver that initiated the fw reset, devlink completed the reload */ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { @@ -218,9 +219,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else mlx5_load_one(dev, true); - devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + devl_lock(devlink); + devlink_remote_reload_actions_performed(devlink, 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + devl_unlock(devlink); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index f7b01b3f0cba..1477db7f5307 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -48,6 +48,7 @@ static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct irq_affinity_desc auto_desc = {}; + struct mlx5_irq *irq; u32 irq_index; int err; @@ -64,9 +65,12 @@ irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_de else cpu_get(pool, cpumask_first(&af_desc->mask)); } - return mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, - NULL); + irq = mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); + if (IS_ERR(irq)) + xa_erase(&pool->irqs, irq_index); + return irq; } /* Looking for the IRQ with the smallest refcount that fits req_mask. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d0871c46b8c5..cf8045b92689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1538,7 +1538,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + if (ldev->pf[i].netdev == slave) { port = i; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f6deb5a3f820..eeb0b7ea05f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) } static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, - u8 *host_buses, u8 *sd_group) + u8 *host_buses) { u32 out[MLX5_ST_SZ_DW(mpir_reg)]; int err; @@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, if (err) return err; - err = mlx5_query_nic_vport_sd_group(dev, sd_group); - if (err) - return err; - *sdm = MLX5_GET(mpir_reg, out, sdm); *host_buses = MLX5_GET(mpir_reg, out, host_buses); @@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev) if (mlx5_core_is_ecpf(dev)) return 0; + err = mlx5_query_nic_vport_sd_group(dev, &sd_group); + if (err) + return err; + + if (!sd_group) + return 0; + if (!MLX5_CAP_MCAM_REG(dev, mpir)) return 0; - err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group); + err = mlx5_query_sd(dev, &sdm, &host_buses); if (err) return err; if (!sdm) return 0; - if (!sd_group) - return 0; - group_id = mlx5_sd_group_id(dev, sd_group); if (!mlx5_sd_is_supported(dev, host_buses)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 527da58c7953..5b7e6f4b5c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2142,7 +2142,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) /* Panic tear down fw command will stop the PCI bus communication * with the HCA, so the health poll is no longer needed. */ - mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_fast_teardown_hca(dev); @@ -2177,6 +2176,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index b2986175d9af..b706f1486504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) struct mlx5_core_dev *mdev = sf_dev->mdev; set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state); + mlx5_drain_health_wq(mdev); mlx5_unload_one(mdev, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 042ca0349124..d1db04baa1fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -7,7 +7,7 @@ /* don't try to optimize STE allocation if the stack is too constaraining */ #define DR_RULE_MAX_STES_OPTIMIZED 0 #else -#define DR_RULE_MAX_STES_OPTIMIZED 5 +#define DR_RULE_MAX_STES_OPTIMIZED 2 #endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index bc94e75a7aeb..e7777700ee18 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -40,6 +40,7 @@ */ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 +#define MLXBF_GIGE_MAX_FILTER_IDX 3 /* Define for broadcast MAC literal */ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF @@ -175,6 +176,13 @@ enum mlxbf_gige_res { int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv); void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); + +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 dmac); void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index b157f0f1c5a8..385a56ac7348 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev) if (err) goto napi_deinit; + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); + mlxbf_gige_enable_multicast_rx(priv); + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) void __iomem *plu_base; void __iomem *base; int addr, phy_irq; + unsigned int i; int err; base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); @@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++) + mlxbf_gige_disable_mac_rx_filter(priv, i); + mlxbf_gige_disable_multicast_rx(priv); + mlxbf_gige_disable_promisc(priv); + /* Write initial MAC address to hardware */ mlxbf_gige_initial_mac(priv); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 98a8681c21b9..4d14cb13fd64 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -62,6 +62,8 @@ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 +#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL 0x0530 +#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index 699984358493..eb62620b63c7 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -11,15 +11,31 @@ #include "mlxbf_gige.h" #include "mlxbf_gige_regs.h" -void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, - unsigned int index, u64 dmac) +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv) { void __iomem *base = priv->base; - u64 control; + u64 data; - /* Write destination MAC to specified MAC RX filter */ - writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + - (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 data; + + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; /* Enable MAC receive filter mask for specified index */ control = readq(base + MLXBF_GIGE_CONTROL); @@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, writeq(control, base + MLXBF_GIGE_CONTROL); } +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; + + /* Disable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); + control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); + writeq(control, base + MLXBF_GIGE_CONTROL); +} + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac) +{ + void __iomem *base = priv->base; + + /* Write destination MAC to specified MAC RX filter */ + writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); +} + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 *dmac) { diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 86034ea4ba5b..c002ede36402 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_META config FBNIC tristate "Meta Platforms Host Network Interface" depends on X86_64 || COMPILE_TEST - depends on S390=n + depends on !S390 depends on MAX_SKB_FRAGS < 22 depends on PCI_MSI select PHYLINK diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d2f07e179e86..39f56973746d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } @@ -1788,7 +1792,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1799,16 +1802,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 714d2e804694..3507c2e28110 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4349,7 +4349,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; + netif_stop_queue(dev); + return NETDEV_TX_BUSY; } opts[1] = rtl8169_tx_vlan_tag(skb); @@ -4405,11 +4406,6 @@ err_dma_0: dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; - -err_stop_0: - netif_stop_queue(dev); - dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; } static unsigned int rtl_last_frag_len(struct sk_buff *skb) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d3c5306f1c41..93a78fd0737b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) #define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKMOD */ -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK 0x1 /* LNKSPEED */ #define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f98741d2607e..31c387cc5f26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -786,7 +786,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) else x->pcs_speed = SPEED_10; - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK); + x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, x->pcs_duplex ? "Full" : "Half"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 12689774d755..f3a1b179aaea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2912,7 +2912,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) u32 channels_to_check = tx_channel_count > rx_channel_count ? tx_channel_count : rx_channel_count; u32 chan; - int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; + int status[MAX_T(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; /* Make sure we never check beyond our status buffer. */ if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index fa5500decc96..c7d9221fafdc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -160,16 +160,16 @@ #define XAE_RCW1_OFFSET 0x00000404 /* Rx Configuration Word 1 */ #define XAE_TC_OFFSET 0x00000408 /* Tx Configuration */ #define XAE_FCC_OFFSET 0x0000040C /* Flow Control Configuration */ -#define XAE_EMMC_OFFSET 0x00000410 /* EMAC mode configuration */ -#define XAE_PHYC_OFFSET 0x00000414 /* RGMII/SGMII configuration */ +#define XAE_EMMC_OFFSET 0x00000410 /* MAC speed configuration */ +#define XAE_PHYC_OFFSET 0x00000414 /* RX Max Frame Configuration */ #define XAE_ID_OFFSET 0x000004F8 /* Identification register */ -#define XAE_MDIO_MC_OFFSET 0x00000500 /* MII Management Config */ -#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MII Management Control */ -#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ -#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ +#define XAE_MDIO_MC_OFFSET 0x00000500 /* MDIO Setup */ +#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MDIO Control */ +#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MDIO Write Data */ +#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MDIO Read Data */ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ -#define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ +#define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ @@ -308,7 +308,7 @@ */ #define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF -/* Bit masks for Axi Ethernet FMI register */ +/* Bit masks for Axi Ethernet FMC register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ #define XAE_FMI_IND_MASK 0x00000003 /* Index Mask */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index e342f387c3dd..02fdf66e07fa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2219,9 +2219,9 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); - axienet_setoptions(ndev, lp->options); napi_enable(&lp->napi_rx); napi_enable(&lp->napi_tx); + axienet_setoptions(ndev, lp->options); } /** diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b3ddc9a629d9..fad5b6564464 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -14,9 +14,7 @@ #include "fjes.h" #include "fjes_trace.h" -#define MAJ 1 -#define MIN 2 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) +#define DRV_VERSION "1.2" #define DRV_NAME "fjes" char fjes_driver_name[] = DRV_NAME; char fjes_driver_version[] = DRV_VERSION; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 427b91aca50d..0696faf60013 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1269,6 +1269,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index d12e35374231..e982e9ce44a5 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -653,13 +653,7 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) unsigned long *possible = phydev->possible_interfaces; unsigned int serdes_mode, rate_adapt; phy_interface_t interface; - int i, val, ret; - - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_CFG_10M, val, val != 0, - 1000, 100000, false); - if (ret) - return ret; + int i, val; /* Walk the media-speed configuration registers to determine which * host-side serdes modes may be used by the PHY depending on the @@ -708,6 +702,25 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) return 0; } +static int aqr113c_fill_interface_modes(struct phy_device *phydev) +{ + int val, ret; + + /* It's been observed on some models that - when coming out of suspend + * - the FW signals that the PHY is ready but the GLOBAL_CFG registers + * continue on returning zeroes for some time. Let's poll the 100M + * register until it returns a real value as both 113c and 115c support + * this mode. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_CFG_100M, val, val != 0, + 1000, 100000, false); + if (ret) + return ret; + + return aqr107_fill_interface_modes(phydev); +} + static int aqr113c_config_init(struct phy_device *phydev) { int ret; @@ -725,7 +738,7 @@ static int aqr113c_config_init(struct phy_device *phydev) if (ret) return ret; - return aqr107_fill_interface_modes(phydev); + return aqr113c_fill_interface_modes(phydev); } static int aqr107_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dd519805deee..65b0a3115e14 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1389,6 +1389,8 @@ static int ksz9131_config_init(struct phy_device *phydev) const struct device *dev_walker; int ret; + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; @@ -1438,28 +1440,30 @@ static int ksz9131_config_init(struct phy_device *phydev) #define MII_KSZ9131_AUTO_MDIX 0x1C #define MII_KSZ9131_AUTO_MDI_SET BIT(7) #define MII_KSZ9131_AUTO_MDIX_SWAP_OFF BIT(6) +#define MII_KSZ9131_DIG_AXAN_STS 0x14 +#define MII_KSZ9131_DIG_AXAN_STS_LINK_DET BIT(14) +#define MII_KSZ9131_DIG_AXAN_STS_A_SELECT BIT(12) static int ksz9131_mdix_update(struct phy_device *phydev) { int ret; - ret = phy_read(phydev, MII_KSZ9131_AUTO_MDIX); - if (ret < 0) - return ret; - - if (ret & MII_KSZ9131_AUTO_MDIX_SWAP_OFF) { - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix_ctrl = ETH_TP_MDI; - else - phydev->mdix_ctrl = ETH_TP_MDI_X; + if (phydev->mdix_ctrl != ETH_TP_MDI_AUTO) { + phydev->mdix = phydev->mdix_ctrl; } else { - phydev->mdix_ctrl = ETH_TP_MDI_AUTO; - } + ret = phy_read(phydev, MII_KSZ9131_DIG_AXAN_STS); + if (ret < 0) + return ret; - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix = ETH_TP_MDI; - else - phydev->mdix = ETH_TP_MDI_X; + if (ret & MII_KSZ9131_DIG_AXAN_STS_LINK_DET) { + if (ret & MII_KSZ9131_DIG_AXAN_STS_A_SELECT) + phydev->mdix = ETH_TP_MDI; + else + phydev->mdix = ETH_TP_MDI_X; + } else { + phydev->mdix = ETH_TP_MDI_INVALID; + } + } return 0; } diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index bed839237fb5..87865918dab6 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1465,6 +1465,13 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001cc960), + .name = "RTL8366S Gigabit Ethernet", + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 897b979ec03c..3b5fcaf0dd36 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -237,16 +237,6 @@ static int vsc739x_config_init(struct phy_device *phydev) return 0; } -static int vsc73xx_config_aneg(struct phy_device *phydev) -{ - /* The VSC73xx switches does not like to be instructed to - * do autonegotiation in any way, it prefers that you just go - * with the power-on/reset defaults. Writing some registers will - * just make autonegotiation permanently fail. - */ - return 0; -} - /* This adds a skew for both TX and RX clocks, so the skew should only be * applied to "rgmii-id" interfaces. It may not work as expected * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. @@ -444,7 +434,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -453,7 +442,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -462,7 +450,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -471,7 +458,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index ec20953e0f82..4f032b16a8a0 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -401,9 +401,14 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, rdesc->ops = &pse_pi_ops; rdesc->owner = pcdev->owner; - rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS | - REGULATOR_CHANGE_CURRENT; - rinit_data->constraints.max_uA = MAX_PI_CURRENT; + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + + if (pcdev->ops->pi_set_current_limit) { + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; + rinit_data->constraints.max_uA = MAX_PI_CURRENT; + } + rinit_data->supply_regulator = "vpwr"; rconfig.dev = pcdev->dev; diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index 61f6ad9c1934..2ea75686a319 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 Bootlin, Kory Maincent <kory.maincent@bootlin.com> */ +#include <linux/bitfield.h> #include <linux/delay.h> #include <linux/firmware.h> #include <linux/i2c.h> @@ -29,6 +30,8 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 +#define TPS23881_REG_DEVID_MASK 0xF0 +#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -750,7 +753,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (ret != 0x22) { + if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 687d70cfc556..46afb95ffabe 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -286,10 +286,11 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - if (urb->actual_length <= IPHETH_IP_ALIGN) { - dev->net->stats.rx_length_errors++; - return; - } + /* iPhone may periodically send URBs with no payload + * on the "bulk in" endpoint. It is safe to ignore them. + */ + if (urb->actual_length == 0) + goto rx_submit; /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and @@ -298,7 +299,8 @@ static void ipheth_rcvbulk_callback(struct urb *urb) * URB received from the bulk IN endpoint. */ if (unlikely - (((char *)urb->transfer_buffer)[0] == 0 && + (urb->actual_length == 4 && + ((char *)urb->transfer_buffer)[0] == 0 && ((char *)urb->transfer_buffer)[1] == 1)) goto rx_submit; @@ -306,7 +308,6 @@ static void ipheth_rcvbulk_callback(struct urb *urb) if (retval != 0) { dev_err(&dev->intf->dev, "%s: callback retval: %d\n", __func__, retval); - return; } rx_submit: @@ -354,13 +355,14 @@ static int ipheth_carrier_set(struct ipheth_device *dev) 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); - if (retval < 0) { + if (retval <= 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } - if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) { + if ((retval == 1 && dev->ctrl_buf[0] == IPHETH_CARRIER_ON) || + (retval >= 2 && dev->ctrl_buf[1] == IPHETH_CARRIER_ON)) { netif_carrier_on(dev->net); if (dev->tx_urb->status != -EINPROGRESS) netif_wake_queue(dev->net); @@ -475,8 +477,8 @@ static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); - cancel_delayed_work_sync(&dev->carrier_work); netif_stop_queue(net); + cancel_delayed_work_sync(&dev->carrier_work); return 0; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 386d62769ded..4823dbdf5465 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -201,6 +201,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } @@ -1431,6 +1432,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ + {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 0a662e42ed96..cb7d2f798fb4 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -179,6 +179,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; int rc = 0; + int err; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); @@ -189,11 +190,17 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, SR_NSR, &value); + err = sr_read_reg(dev, SR_NSR, &value); + if (err < 0) + return err; + if (value & NSR_LINKST) rc = 1; } - sr_share_read_word(dev, 1, loc, &res); + err = sr_share_read_word(dev, 1, loc, &res); + if (err < 0) + return err; + if (rc == 1) res = le16_to_cpu(res) | BMSR_LSTATUS; else diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0383a3e136d6..3f10c72743e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3658,6 +3658,9 @@ static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) @@ -3675,6 +3678,9 @@ static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) @@ -3743,7 +3749,11 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); - if (err) + + /* Don't break the tx resize action if the vq coalescing is not + * supported. The same is true for rx resize below. + */ + if (err && err != -EOPNOTSUPP) return err; } @@ -3758,7 +3768,7 @@ static int virtnet_set_ringparam(struct net_device *dev, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); - if (err) + if (err && err != -EOPNOTSUPP) return err; } } diff --git a/drivers/net/wan/fsl_qmc_hdlc.c b/drivers/net/wan/fsl_qmc_hdlc.c index c5e7ca793c43..8fcfbde31a1c 100644 --- a/drivers/net/wan/fsl_qmc_hdlc.c +++ b/drivers/net/wan/fsl_qmc_hdlc.c @@ -18,6 +18,7 @@ #include <linux/hdlc.h> #include <linux/mod_devicetable.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -37,7 +38,7 @@ struct qmc_hdlc { struct qmc_chan *qmc_chan; struct net_device *netdev; struct framer *framer; - spinlock_t carrier_lock; /* Protect carrier detection */ + struct mutex carrier_lock; /* Protect carrier detection */ struct notifier_block nb; bool is_crc32; spinlock_t tx_lock; /* Protect tx descriptors */ @@ -60,7 +61,7 @@ static int qmc_hdlc_framer_set_carrier(struct qmc_hdlc *qmc_hdlc) if (!qmc_hdlc->framer) return 0; - guard(spinlock_irqsave)(&qmc_hdlc->carrier_lock); + guard(mutex)(&qmc_hdlc->carrier_lock); ret = framer_get_status(qmc_hdlc->framer, &framer_status); if (ret) { @@ -249,6 +250,7 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl struct qmc_hdlc_desc *desc = context; struct net_device *netdev; struct qmc_hdlc *qmc_hdlc; + size_t crc_size; int ret; netdev = desc->netdev; @@ -267,15 +269,26 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl if (flags & QMC_RX_FLAG_HDLC_CRC) /* CRC error */ netdev->stats.rx_crc_errors++; kfree_skb(desc->skb); - } else { - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += length; + goto re_queue; + } - skb_put(desc->skb, length); - desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); - netif_rx(desc->skb); + /* Discard the CRC */ + crc_size = qmc_hdlc->is_crc32 ? 4 : 2; + if (length < crc_size) { + netdev->stats.rx_length_errors++; + kfree_skb(desc->skb); + goto re_queue; } + length -= crc_size; + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + + skb_put(desc->skb, length); + desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); + netif_rx(desc->skb); +re_queue: /* Re-queue a transfer using the same descriptor */ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, desc->dma_size); if (ret) { @@ -706,7 +719,7 @@ static int qmc_hdlc_probe(struct platform_device *pdev) qmc_hdlc->dev = dev; spin_lock_init(&qmc_hdlc->tx_lock); - spin_lock_init(&qmc_hdlc->carrier_lock); + mutex_init(&qmc_hdlc->carrier_lock); qmc_hdlc->qmc_chan = devm_qmc_chan_get_bychild(dev, dev->of_node); if (IS_ERR(qmc_hdlc->qmc_chan)) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index d08c04343e90..44406e0b4a34 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -162,6 +162,60 @@ static int ath12k_dp_prepare_htt_metadata(struct sk_buff *skb) return 0; } +static void ath12k_dp_tx_move_payload(struct sk_buff *skb, + unsigned long delta, + bool head) +{ + unsigned long len = skb->len; + + if (head) { + skb_push(skb, delta); + memmove(skb->data, skb->data + delta, len); + skb_trim(skb, len); + } else { + skb_put(skb, delta); + memmove(skb->data + delta, skb->data, len); + skb_pull(skb, delta); + } +} + +static int ath12k_dp_tx_align_payload(struct ath12k_base *ab, + struct sk_buff **pskb) +{ + u32 iova_mask = ab->hw_params->iova_mask; + unsigned long offset, delta1, delta2; + struct sk_buff *skb2, *skb = *pskb; + unsigned int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + int ret = 0; + + offset = (unsigned long)skb->data & iova_mask; + delta1 = offset; + delta2 = iova_mask - offset + 1; + + if (headroom >= delta1) { + ath12k_dp_tx_move_payload(skb, delta1, true); + } else if (tailroom >= delta2) { + ath12k_dp_tx_move_payload(skb, delta2, false); + } else { + skb2 = skb_realloc_headroom(skb, iova_mask); + if (!skb2) { + ret = -ENOMEM; + goto out; + } + + dev_kfree_skb_any(skb); + + offset = (unsigned long)skb2->data & iova_mask; + if (offset) + ath12k_dp_tx_move_payload(skb2, offset, true); + *pskb = skb2; + } + +out: + return ret; +} + int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, struct sk_buff *skb) { @@ -184,6 +238,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, bool tcl_ring_retry; bool msdu_ext_desc = false; bool add_htt_metadata = false; + u32 iova_mask = ab->hw_params->iova_mask; if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) return -ESHUTDOWN; @@ -279,6 +334,23 @@ tcl_ring_sel: goto fail_remove_tx_buf; } + if (iova_mask && + (unsigned long)skb->data & iova_mask) { + ret = ath12k_dp_tx_align_payload(ab, &skb); + if (ret) { + ath12k_warn(ab, "failed to align TX buffer %d\n", ret); + /* don't bail out, give original buffer + * a chance even unaligned. + */ + goto map; + } + + /* hdr is pointing to a wrong place after alignment, + * so refresh it for later use. + */ + hdr = (void *)skb->data; + } +map: ti.paddr = dma_map_single(ab->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(ab->dev, ti.paddr)) { atomic_inc(&ab->soc_stats.tx_err.misc_fail); diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 2e11ea763574..7b0b6a7f4701 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -924,6 +924,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, { .name = "wcn7850 hw2.0", @@ -1000,6 +1002,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = &wcn7850_uuid, .supports_dynamic_smps_6ghz = false, + + .iova_mask = ATH12K_PCIE_MAX_PAYLOAD_SIZE - 1, }, { .name = "qcn9274 hw2.0", @@ -1072,6 +1076,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, }; diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index e792eb6b249b..b1d302c48326 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -96,6 +96,8 @@ #define ATH12K_M3_FILE "m3.bin" #define ATH12K_REGDB_FILE_NAME "regdb.bin" +#define ATH12K_PCIE_MAX_PAYLOAD_SIZE 128 + enum ath12k_hw_rate_cck { ATH12K_HW_RATE_CCK_LP_11M = 0, ATH12K_HW_RATE_CCK_LP_5_5M, @@ -215,6 +217,8 @@ struct ath12k_hw_params { const guid_t *acpi_guid; bool supports_dynamic_smps_6ghz; + + u32 iova_mask; }; struct ath12k_hw_ops { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8106297f0bc1..ce41c8153080 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9193,6 +9193,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) hw->vif_data_size = sizeof(struct ath12k_vif); hw->sta_data_size = sizeof(struct ath12k_sta); + hw->extra_tx_headroom = ab->hw_params->iova_mask; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 876c029f58f6..9e0b9e329bda 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -473,7 +473,8 @@ static void __ath12k_pci_ext_irq_disable(struct ath12k_base *ab) { int i; - clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); + if (!test_and_clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags)) + return; for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c index bead19db2c9a..9b8684abbe40 100644 --- a/drivers/net/wireless/ath/ath12k/wow.c +++ b/drivers/net/wireless/ath/ath12k/wow.c @@ -361,7 +361,7 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, struct ath12k *ar = arvif->ar; unsigned long wow_mask = 0; int pattern_id = 0; - int ret, i; + int ret, i, j; /* Setup requested WOW features */ switch (arvif->vdev_type) { @@ -431,9 +431,9 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, eth_pattern->pattern_len); /* convert bitmask to bytemask */ - for (i = 0; i < eth_pattern->pattern_len; i++) - if (eth_pattern->mask[i / 8] & BIT(i % 8)) - new_pattern.bytemask[i] = 0xff; + for (j = 0; j < eth_pattern->pattern_len; j++) + if (eth_pattern->mask[j / 8] & BIT(j % 8)) + new_pattern.bytemask[j] = 0xff; new_pattern.pattern_len = eth_pattern->pattern_len; new_pattern.pkt_offset = eth_pattern->pkt_offset; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1585a5653ee4..d4cc5fa92341 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, /* Single PMK operation */ pmk_op->count = cpu_to_le16(1); length += sizeof(struct brcmf_pmksa_v3); - memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); - memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); - pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + if (pmksa->bssid) + memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); + if (pmksa->pmkid) { + memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); + pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + } + if (pmksa->ssid && pmksa->ssid_len) { + memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len); + pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len; + } pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b59de4f80b4b..27a7e0b5b3d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -639,7 +639,8 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, bool cmd_queue); -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr); +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, u8 **hdr, unsigned int hdr_room); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 2e780fb2da42..b1846abb99b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -168,6 +168,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; dma_addr_t start_hdr_phys; u16 length, amsdu_pad; u8 *start_hdr; @@ -260,7 +261,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, int ret; tb_len = min_t(unsigned int, tso.size, data_left); - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, + tb_len); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) goto out_err; @@ -272,6 +274,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, goto out_err; data_left -= tb_len; + data_offset += tb_len; tso_build_data(skb, &tso, tb_len); } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 22d482ae53d9..9fe050f0ddc1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1814,23 +1814,31 @@ out: /** * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list * @sgt: scatter gather table - * @addr: Virtual address + * @offset: Offset into the mapped memory (i.e. SKB payload data) + * @len: Length of the area * - * Find the entry that includes the address for the given address and return - * correct physical address for the TB entry. + * Find the DMA address that corresponds to the SKB payload data at the + * position given by @offset. * * Returns: Address for TB entry */ -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr) +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len) { struct scatterlist *sg; + unsigned int sg_offset = 0; int i; + /* + * Search the mapped DMA areas in the SG for the area that contains the + * data at offset with the given length. + */ for_each_sgtable_dma_sg(sgt, sg, i) { - if (addr >= sg_virt(sg) && - (u8 *)addr < (u8 *)sg_virt(sg) + sg_dma_len(sg)) - return sg_dma_address(sg) + - ((unsigned long)addr - (unsigned long)sg_virt(sg)); + if (offset >= sg_offset && + offset + len <= sg_offset + sg_dma_len(sg)) + return sg_dma_address(sg) + offset - sg_offset; + + sg_offset += sg_dma_len(sg); } WARN_ON_ONCE(1); @@ -1875,7 +1883,9 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + /* Only map the data, not the header (it is copied to the TSO page) */ + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), + skb->data_len); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1900,6 +1910,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; u16 length, iv_len, amsdu_pad; dma_addr_t start_hdr_phys; u8 *start_hdr, *pos_hdr; @@ -2000,7 +2011,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, data_left); dma_addr_t tb_phys; - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, size); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) return -EINVAL; @@ -2011,6 +2022,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, tb_phys, size); data_left -= size; + data_offset += size; tso_build_data(skb, &tso, size); } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 2e6268cb06c0..23b228804289 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -303,6 +303,7 @@ mt7921_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mvif->bss_conf.mt76.omac_idx = mvif->bss_conf.mt76.idx; mvif->phy = phy; + mvif->bss_conf.vif = mvif; mvif->bss_conf.mt76.band_idx = 0; mvif->bss_conf.mt76.wmm_idx = mvif->bss_conf.mt76.idx % MT76_CONNAC_MAX_WMM_SETS; @@ -1182,7 +1183,7 @@ static void mt7921_ipv6_addr_change(struct ieee80211_hw *hw, struct inet6_dev *idev) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - struct mt792x_dev *dev = mvif->phy->dev; + struct mt792x_dev *dev = mt792x_hw_dev(hw); struct inet6_ifaddr *ifa; struct in6_addr ns_addrs[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; struct sk_buff *skb; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c index 700c6e2bcad1..ff458fb8514d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c @@ -181,11 +181,11 @@ static void _rtl92du_init_queue_reserved_page(struct ieee80211_hw *hw, struct rtl_hal *rtlhal = rtl_hal(rtlpriv); u32 txqpagenum, txqpageunit; u32 txqremainingpage; + u32 value32 = 0; u32 numhq = 0; u32 numlq = 0; u32 numnq = 0; u32 numpubq; - u32 value32; if (rtlhal->macphymode != SINGLEMAC_SINGLEPHY) { numpubq = NORMAL_PAGE_NUM_PUBQ_92D_DUAL_MAC; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 9fe664960b38..e2a6575b9ff7 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -126,8 +126,6 @@ struct pn544_i2c_fw_secure_blob { #define PN544_FW_CMD_RESULT_COMMAND_REJECTED 0xE0 #define PN544_FW_CMD_RESULT_CHUNK_ERROR 0xE6 -#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) - #define PN544_FW_WRITE_BUFFER_MAX_LEN 0x9f7 #define PN544_FW_I2C_MAX_PAYLOAD PN544_HCI_I2C_LLC_MAX_SIZE #define PN544_FW_I2C_WRITE_FRAME_HEADER_LEN 8 diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1ae8b2351654..210fb77f51ba 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -498,7 +498,7 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; - if (is_nd_pfn(dev)) + if (is_nd_pfn(dev) || pmem_should_map_pages(dev)) lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 19917253ba7b..33fa01c599ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -36,6 +36,7 @@ struct nvme_ns_info { struct nvme_ns_ids ids; u32 nsid; __le32 anagrpid; + u8 pi_offset; bool is_shared; bool is_readonly; bool is_ready; @@ -1757,8 +1758,8 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, - struct queue_limits *lim) +static bool nvme_init_integrity(struct nvme_ns_head *head, + struct queue_limits *lim, struct nvme_ns_info *info) { struct blk_integrity *bi = &lim->integrity; @@ -1816,7 +1817,7 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, } bi->tuple_size = head->ms; - bi->pi_offset = head->pi_offset; + bi->pi_offset = info->pi_offset; return true; } @@ -1876,12 +1877,18 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) { u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]); + u8 guard_type; /* no support for storage tag formats right now */ if (nvme_elbaf_sts(elbaf)) return; - head->guard_type = nvme_elbaf_guard_type(elbaf); + guard_type = nvme_elbaf_guard_type(elbaf); + if ((nvm->pic & NVME_ID_NS_NVM_QPIFS) && + guard_type == NVME_NVM_NS_QTYPE_GUARD) + guard_type = nvme_elbaf_qualified_guard_type(elbaf); + + head->guard_type = guard_type; switch (head->guard_type) { case NVME_NVM_NS_64B_GUARD: head->pi_size = sizeof(struct crc64_pi_tuple); @@ -1896,12 +1903,11 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, static void nvme_configure_metadata(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id, - struct nvme_id_ns_nvm *nvm) + struct nvme_id_ns_nvm *nvm, struct nvme_ns_info *info) { head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); head->pi_type = 0; head->pi_size = 0; - head->pi_offset = 0; head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; @@ -1916,7 +1922,7 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - head->pi_offset = head->ms - head->pi_size; + info->pi_offset = head->ms - head->pi_size; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -2150,7 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; @@ -2170,7 +2176,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head, &lim)) + if (!nvme_init_integrity(ns->head, &lim, info)) capacity = 0; ret = queue_limits_commit_update(ns->disk->queue, &lim); @@ -2274,7 +2280,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else - nvme_init_integrity(ns->head->disk, ns->head, &lim); + nvme_init_integrity(ns->head, &lim, info); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 44e342a46f39..f5f545fa0103 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1403,10 +1403,10 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) tok = &opt_tokens[idx]; if (tok->token == NVMF_OPT_ERR) continue; - seq_puts(seq_file, ","); + seq_putc(seq_file, ','); seq_puts(seq_file, tok->pattern); } - seq_puts(seq_file, "\n"); + seq_putc(seq_file, '\n'); } static int nvmf_dev_show(struct seq_file *seq_file, void *private) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f900e44243ae..ae5314d32943 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -462,20 +462,19 @@ struct nvme_ns_head { struct srcu_struct srcu; struct nvme_subsystem *subsys; struct nvme_ns_ids ids; + u8 lba_shift; + u16 ms; + u16 pi_size; + u8 pi_type; + u8 guard_type; struct list_head entry; struct kref ref; bool shared; bool passthru_err_log_enabled; - int instance; struct nvme_effects_log *effects; u64 nuse; unsigned ns_id; - int lba_shift; - u16 ms; - u16 pi_size; - u8 pi_type; - u8 pi_offset; - u8 guard_type; + int instance; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 21f8e1b9801f..6cd9395ba9ec 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -863,7 +863,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) nvme_start_request(req); return BLK_STS_OK; out_unmap_data: - nvme_unmap_data(dev, req); + if (blk_rq_nr_phys_segments(req)) + nvme_unmap_data(dev, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; @@ -1309,7 +1310,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) dev_warn(dev->ctrl.device, "Does your device have a faulty power saving mode enabled?\n"); dev_warn(dev->ctrl.device, - "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n"); + "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off pcie_port_pm=off\" and report a bug\n"); } static enum blk_eh_timer_return nvme_timeout(struct request *req) @@ -2968,6 +2969,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } + /* + * NVMe SSD drops off the PCIe bus after system idle + * for 10 hours on a Lenovo N60z board. + */ + if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6")) + return NVME_QUIRK_NO_APST; + return 0; } diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 3c55f7edd181..ba05faaac562 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -233,13 +233,12 @@ static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, { struct nvme_ns_head *head = dev_to_ns_head(dev); struct gendisk *disk = dev_to_disk(dev); - struct block_device *bdev = disk->part0; int ret; - if (nvme_disk_is_ns_head(bdev->bd_disk)) + if (nvme_disk_is_ns_head(disk)) ret = ns_head_update_nuse(head); else - ret = ns_update_nuse(bdev->bd_disk->private_data); + ret = ns_update_nuse(disk->private_data); if (ret) return ret; diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb..8fd63100ba8f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 061f01f60db4..736ad8baa2a5 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -485,7 +485,9 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot, struct pci_dev *pdev = ctrl_dev(ctrl); pci_config_pm_runtime_get(pdev); - pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC, status), + + /* Attention and Power Indicator Control bits are supported */ + pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC, status), PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC); pci_config_pm_runtime_put(pdev); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3a49f66982d..ffaaca0978cb 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4477,12 +4477,6 @@ void pci_intx(struct pci_dev *pdev, int enable) { u16 pci_command, new; - /* Preserve the "hybrid" behavior for backwards compatibility */ - if (pci_is_managed(pdev)) { - WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); - return; - } - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); if (enable) @@ -4490,8 +4484,15 @@ void pci_intx(struct pci_dev *pdev, int enable) else new = pci_command | PCI_COMMAND_INTX_DISABLE; - if (new != pci_command) + if (new != pci_command) { + /* Preserve the "hybrid" behavior for backwards compatibility */ + if (pci_is_managed(pdev)) { + WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); + return; + } + pci_write_config_word(pdev, PCI_COMMAND, new); + } } EXPORT_SYMBOL_GPL(pci_intx); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 27b42ca509bd..31a17a56eb3b 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -25,6 +25,8 @@ #include <asm/errata_list.h> #include <asm/sbi.h> #include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> #define ALT_SBI_PMU_OVERFLOW(__ovl) \ asm volatile(ALTERNATIVE_2( \ @@ -33,7 +35,8 @@ asm volatile(ALTERNATIVE_2( \ THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ CONFIG_ERRATA_THEAD_PMU, \ "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \ - 0, RISCV_ISA_EXT_XANDESPMU, \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ CONFIG_ANDES_CUSTOM_PMU) \ : "=r" (__ovl) : \ : "memory") @@ -42,7 +45,8 @@ asm volatile(ALTERNATIVE_2( \ asm volatile(ALTERNATIVE( \ "csrc " __stringify(CSR_IP) ", %0\n\t", \ "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \ - 0, RISCV_ISA_EXT_XANDESPMU, \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ CONFIG_ANDES_CUSTOM_PMU) \ : : "r"(__irq_mask) \ : "memory") @@ -412,7 +416,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) * but not in the user access mode as we want to use the other counters * that support sampling/filtering. */ - if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) { if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; cmask = 1; @@ -1095,7 +1099,8 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde riscv_cached_mimpid(0) == 0) { riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; riscv_pmu_use_irq = true; - } else if (riscv_isa_extension_available(NULL, XANDESPMU) && + } else if (riscv_has_vendor_extension_unlikely(ANDES_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XANDESPMU) && IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) { riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI; riscv_pmu_use_irq = true; diff --git a/drivers/pinctrl/nxp/pinctrl-s32cc.c b/drivers/pinctrl/nxp/pinctrl-s32cc.c index df3e5d82da4b..f2609a35c312 100644 --- a/drivers/pinctrl/nxp/pinctrl-s32cc.c +++ b/drivers/pinctrl/nxp/pinctrl-s32cc.c @@ -730,9 +730,7 @@ static int s32_pinctrl_parse_groups(struct device_node *np, struct s32_pin_group *grp, struct s32_pinctrl_soc_info *info) { - const __be32 *p; struct device *dev; - struct property *prop; unsigned int *pins, *sss; int i, npins; u32 pinmux; @@ -763,7 +761,7 @@ static int s32_pinctrl_parse_groups(struct device_node *np, return -ENOMEM; i = 0; - of_property_for_each_u32(np, "pinmux", prop, p, pinmux) { + of_property_for_each_u32(np, "pinmux", pinmux) { pins[i] = get_pin_no(pinmux); sss[i] = get_pin_func(pinmux); diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index 2753e14c3e38..a898e40451fe 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -763,8 +763,6 @@ static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned int *reserved_maps, unsigned int *num_maps) { - struct property *prop; - const __be32 *p; int ret, pinmux_groups; u32 pinmux_group; unsigned long *configs = NULL; @@ -797,7 +795,7 @@ static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, if (ret < 0) goto exit; - of_property_for_each_u32(np, "pinmux", prop, p, pinmux_group) { + of_property_for_each_u32(np, "pinmux", pinmux_group) { const char *group_name, *func_name; u32 pin = FIELD_GET(K210_PG_PIN, pinmux_group); u32 func = FIELD_GET(K210_PG_FUNC, pinmux_group); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index f776fd42244f..73f75958e15c 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -813,9 +813,11 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, if (ret == -ENOPROTOOPT) { dev_dbg(ec_dev->dev, "GET_NEXT_EVENT returned invalid version error.\n"); + mutex_lock(&ec_dev->lock); ret = cros_ec_get_host_command_version_mask(ec_dev, EC_CMD_GET_NEXT_EVENT, &ver_mask); + mutex_unlock(&ec_dev->lock); if (ret < 0 || ver_mask == 0) /* * Do not change the MKBP supported version if we can't diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index cb0d4d686d8a..a111eca8ff57 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -16,35 +16,65 @@ config TURRIS_OMNIA_MCU tristate "Turris Omnia MCU driver" depends on MACH_ARMADA_38X || COMPILE_TEST depends on I2C - depends on OF - depends on WATCHDOG - depends on GPIOLIB - depends on HW_RANDOM - depends on RTC_CLASS - depends on WATCHDOG_CORE - select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the microcontroller on the CZ.NIC's Turris Omnia SOHO router. - The features include: - - board poweroff into true low power mode (with voltage regulators - disabled) and the ability to configure wake up from this mode (via - rtcwake) - - true random number generator (if available on the MCU) - - MCU watchdog - - GPIO pins - - to get front button press events (the front button can be - configured either to generate press events to the CPU or to change - front LEDs panel brightness) - - to enable / disable USB port voltage regulators and to detect - USB overcurrent - - to detect MiniPCIe / mSATA card presence in MiniPCIe port 0 - - to configure resets of various peripherals on board revisions 32+ - - to enable / disable the VHV voltage regulator to the SOC in order - to be able to program SOC's OTP on board revisions 32+ - - to get input from the LED output pins of the WAN ethernet PHY, LAN - switch and MiniPCIe ports + This option only enables the core part of the driver. Specific + features can be enabled by subsequent config options. To compile this driver as a module, choose M here; the module will be called turris-omnia-mcu. +if TURRIS_OMNIA_MCU + +config TURRIS_OMNIA_MCU_GPIO + bool "Turris Omnia MCU GPIOs" + default y + depends on GPIOLIB + depends on OF + select GPIOLIB_IRQCHIP + help + Say Y here to add support for controlling MCU GPIO pins and receiving + MCU interrupts on CZ.NIC's Turris Omnia. + This enables you to + - get front button press events (the front button can be configured + either to generate press events to the CPU or to change front LEDs + panel brightness), + - enable / disable USB port voltage regulators and to detect USB + overcurrent, + - detect MiniPCIe / mSATA card presence in MiniPCIe port 0, + - configure resets of various peripherals on board revisions 32+, + - enable / disable the VHV voltage regulator to the SOC in order to be + able to program SOC's OTP on board revisions 32+, + - get input from the LED output pins of the WAN ethernet PHY, LAN + switch and MiniPCIe ports. + +config TURRIS_OMNIA_MCU_SYSOFF_WAKEUP + bool "Turris Omnia MCU system off and RTC wakeup" + default y + depends on RTC_CLASS + help + Say Y here to add support for CZ.NIC's Turris Omnia board poweroff + into true low power mode (with voltage regulators disabled) and the + ability to configure wake up from this mode (via rtcwake). + +config TURRIS_OMNIA_MCU_WATCHDOG + bool "Turris Omnia MCU watchdog" + default y + depends on WATCHDOG + select WATCHDOG_CORE + help + Say Y here to add support for watchdog provided by CZ.NIC's Turris + Omnia MCU. + +config TURRIS_OMNIA_MCU_TRNG + bool "Turris Omnia MCU true random number generator" + default y + depends on TURRIS_OMNIA_MCU_GPIO + depends on HW_RANDOM + help + Say Y here to add support for the true random number generator + provided by CZ.NIC's Turris Omnia MCU. + +endif # TURRIS_OMNIA_MCU + endif # CZNIC_PLATFORMS diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index eae4c6b341ff..ce6d997f34d6 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o -turris-omnia-mcu-y += turris-omnia-mcu-gpio.o -turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o -turris-omnia-mcu-y += turris-omnia-mcu-trng.o -turris-omnia-mcu-y += turris-omnia-mcu-watchdog.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_GPIO) += turris-omnia-mcu-gpio.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP) += turris-omnia-mcu-sys-off-wakeup.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index c68a7a84a951..58f9afae2867 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -197,8 +197,12 @@ static const struct attribute_group omnia_mcu_base_group = { static const struct attribute_group *omnia_mcu_groups[] = { &omnia_mcu_base_group, +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO &omnia_mcu_gpio_group, +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP &omnia_mcu_poweroff_group, +#endif NULL }; diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 2ca56ae13aa9..fed0d357fea3 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -33,6 +33,7 @@ struct omnia_mcu { u8 board_first_mac[ETH_ALEN]; u8 board_revision; +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO /* GPIO chip */ struct gpio_chip gc; struct mutex lock; @@ -41,18 +42,25 @@ struct omnia_mcu { struct delayed_work button_release_emul_work; unsigned long last_status; bool button_pressed_emul; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP /* RTC device for configuring wake-up */ struct rtc_device *rtcdev; u32 rtc_alarm; bool front_button_poweron; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG /* MCU watchdog */ struct watchdog_device wdt; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG /* true random number generator */ struct hwrng trng; struct completion trng_entropy_ready; +#endif }; int omnia_cmd_write_read(const struct i2c_client *client, @@ -182,13 +190,43 @@ static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); } +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; -extern const struct attribute_group omnia_mcu_poweroff_group; - int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP +extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG int omnia_mcu_register_trng(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_trng(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG int omnia_mcu_register_watchdog(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_watchdog(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #endif /* __TURRIS_OMNIA_MCU_H */ diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487..7e89f547999b 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9..a23dff35f8ca 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,16 +265,34 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, &ssam_node_hid_sam_ucm_ucsi, NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +307,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -324,7 +358,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +399,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, @@ -389,14 +426,23 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 665fa9524986..ddfccc226751 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -477,6 +477,7 @@ config LENOVO_YMC tristate "Lenovo Yoga Tablet Mode Control" depends on ACPI_WMI depends on INPUT + depends on IDEAPAD_LAPTOP select INPUT_SPARSEKMAP help This driver maps the Tablet Mode Control switch to SW_TABLET_MODE input diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a3d881f6e5d9..c3e51f0a5c33 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -764,6 +764,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return MSG_OS_HINT_RN; } return -EINVAL; @@ -967,6 +968,7 @@ static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index 9e32d3128c3a..f1166d15c856 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -67,6 +67,7 @@ void amd_mp2_stb_deinit(struct amd_pmc_dev *dev); #define AMD_CPU_ID_PS 0x14E8 #define AMD_CPU_ID_SP 0x14A4 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PCI_DEVICE_ID_AMD_MP2_STB 0x172c #endif /* PMC_H */ diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 2d6e2558863c..8f1f719befa3 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -41,6 +41,7 @@ #define AMD_CPU_ID_RMB 0x14b5 #define AMD_CPU_ID_PS 0x14e8 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PMF_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -249,6 +250,7 @@ static const struct pci_device_id pmf_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RMB) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; @@ -382,6 +384,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = { {"AMDI0102", 0}, {"AMDI0103", 0}, {"AMDI0105", 0}, + {"AMDI0107", 0}, { } }; MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids); diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c index 0b2eb0ae85fe..460444cda1b2 100644 --- a/drivers/platform/x86/amd/pmf/pmf-quirks.c +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -29,6 +29,14 @@ static const struct dmi_system_id fwbug_list[] = { }, .driver_data = &quirk_no_sps_bug, }, + { + .ident = "ROG Ally X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "RC72LA"), + }, + .driver_data = &quirk_no_sps_bug, + }, {} }; @@ -48,4 +56,3 @@ void amd_pmf_quirks_init(struct amd_pmf_dev *dev) dmi_id->ident); } } - diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a3dec14c3004..3c153fb1425e 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -150,36 +150,26 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } -static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +static void amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { struct amd_sfh_info sfh_info; - int ret; + + /* Get the latest information from SFH */ + in->ev_info.user_present = false; /* Get ALS data */ - ret = amd_get_sfh_info(&sfh_info, MT_ALS); - if (!ret) + if (!amd_get_sfh_info(&sfh_info, MT_ALS)) in->ev_info.ambient_light = sfh_info.ambient_light; else - return ret; + dev_dbg(dev->dev, "ALS is not enabled/detected\n"); /* get HPD data */ - ret = amd_get_sfh_info(&sfh_info, MT_HPD); - if (ret) - return ret; - - switch (sfh_info.user_present) { - case SFH_NOT_DETECTED: - in->ev_info.user_present = 0xff; /* assume no sensors connected */ - break; - case SFH_USER_PRESENT: - in->ev_info.user_present = 1; - break; - case SFH_USER_AWAY: - in->ev_info.user_present = 0; - break; + if (!amd_get_sfh_info(&sfh_info, MT_HPD)) { + if (sfh_info.user_present == SFH_USER_PRESENT) + in->ev_info.user_present = true; + } else { + dev_dbg(dev->dev, "HPD is not enabled/detected\n"); } - - return 0; } void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b..37636e5a38e3 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1ace711f7442..98ec30fce9fd 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -126,6 +126,7 @@ struct ideapad_rfk_priv { struct ideapad_private { struct acpi_device *adev; + struct mutex vpc_mutex; /* protects the VPC calls */ struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct ideapad_rfk_priv rfk_priv[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; @@ -146,6 +147,7 @@ struct ideapad_private { bool touchpad_ctrl_via_ec : 1; bool ctrl_ps2_aux_port : 1; bool usb_charging : 1; + bool ymc_ec_trigger : 1; } features; struct { bool initialized; @@ -194,6 +196,12 @@ MODULE_PARM_DESC(touchpad_ctrl_via_ec, "Enable registering a 'touchpad' sysfs-attribute which can be used to manually " "tell the EC to enable/disable the touchpad. This may not work on all models."); +static bool ymc_ec_trigger __read_mostly; +module_param(ymc_ec_trigger, bool, 0444); +MODULE_PARM_DESC(ymc_ec_trigger, + "Enable EC triggering work-around to force emitting tablet mode events. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + /* * shared data */ @@ -294,6 +302,8 @@ static int debugfs_status_show(struct seq_file *s, void *data) struct ideapad_private *priv = s->private; unsigned long value; + guard(mutex)(&priv->vpc_mutex); + if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL_MAX, &value)) seq_printf(s, "Backlight max: %lu\n", value); if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL, &value)) @@ -412,7 +422,8 @@ static ssize_t camera_power_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); if (err) return err; @@ -431,7 +442,8 @@ static ssize_t camera_power_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); if (err) return err; @@ -484,7 +496,8 @@ static ssize_t fan_mode_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); if (err) return err; @@ -506,7 +519,8 @@ static ssize_t fan_mode_store(struct device *dev, if (state > 4 || state == 3) return -EINVAL; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); if (err) return err; @@ -591,7 +605,8 @@ static ssize_t touchpad_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); if (err) return err; @@ -612,7 +627,8 @@ static ssize_t touchpad_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); if (err) return err; @@ -1005,6 +1021,8 @@ static int ideapad_rfk_set(void *data, bool blocked) struct ideapad_rfk_priv *priv = data; int opcode = ideapad_rfk_data[priv->dev].opcode; + guard(mutex)(&priv->priv->vpc_mutex); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } @@ -1018,6 +1036,8 @@ static void ideapad_sync_rfk_state(struct ideapad_private *priv) int i; if (priv->features.hw_rfkill_switch) { + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_RF, &hw_blocked)) return; hw_blocked = !hw_blocked; @@ -1191,8 +1211,9 @@ static void ideapad_input_novokey(struct ideapad_private *priv) { unsigned long long_pressed; - if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) + return; if (long_pressed) ideapad_input_report(priv, 17); @@ -1204,8 +1225,9 @@ static void ideapad_check_special_buttons(struct ideapad_private *priv) { unsigned long bit, value; - if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) + return; for_each_set_bit (bit, &value, 16) { switch (bit) { @@ -1238,6 +1260,8 @@ static int ideapad_backlight_get_brightness(struct backlight_device *blightdev) unsigned long now; int err; + guard(mutex)(&priv->vpc_mutex); + err = read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); if (err) return err; @@ -1250,6 +1274,8 @@ static int ideapad_backlight_update_status(struct backlight_device *blightdev) struct ideapad_private *priv = bl_get_data(blightdev); int err; + guard(mutex)(&priv->vpc_mutex); + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_BL, blightdev->props.brightness); if (err) @@ -1327,6 +1353,8 @@ static void ideapad_backlight_notify_power(struct ideapad_private *priv) if (!blightdev) return; + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_BL_POWER, &power)) return; @@ -1339,7 +1367,8 @@ static void ideapad_backlight_notify_brightness(struct ideapad_private *priv) /* if we control brightness via acpi video driver */ if (!priv->blightdev) - read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); + scoped_guard(mutex, &priv->vpc_mutex) + read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); else backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY); } @@ -1564,7 +1593,8 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ int ret; /* Without reading from EC touchpad LED doesn't switch state */ - ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); + scoped_guard(mutex, &priv->vpc_mutex) + ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); if (ret) return; @@ -1592,16 +1622,92 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ priv->r_touchpad_val = value; } +static const struct dmi_system_id ymc_ec_trigger_quirk_dmi_table[] = { + { + /* Lenovo Yoga 7 14ARB7 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + }, + }, + { + /* Lenovo Yoga 7 14ACN6 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), + }, + }, + { } +}; + +static void ideapad_laptop_trigger_ec(void) +{ + struct ideapad_private *priv; + int ret; + + guard(mutex)(&ideapad_shared_mutex); + + priv = ideapad_shared; + if (!priv) + return; + + if (!priv->features.ymc_ec_trigger) + return; + + scoped_guard(mutex, &priv->vpc_mutex) + ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); + if (ret) + dev_warn(&priv->platform_device->dev, "Could not write YMC: %d\n", ret); +} + +static int ideapad_laptop_nb_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + switch (action) { + case IDEAPAD_LAPTOP_YMC_EVENT: + ideapad_laptop_trigger_ec(); + break; + } + + return 0; +} + +static struct notifier_block ideapad_laptop_notifier = { + .notifier_call = ideapad_laptop_nb_notify, +}; + +static BLOCKING_NOTIFIER_HEAD(ideapad_laptop_chain_head); + +int ideapad_laptop_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_register_notifier, IDEAPAD_LAPTOP); + +int ideapad_laptop_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_unregister_notifier, IDEAPAD_LAPTOP); + +void ideapad_laptop_call_notifier(unsigned long action, void *data) +{ + blocking_notifier_call_chain(&ideapad_laptop_chain_head, action, data); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_call_notifier, IDEAPAD_LAPTOP); + static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) { struct ideapad_private *priv = data; unsigned long vpc1, vpc2, bit; - if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) - return; + scoped_guard(mutex, &priv->vpc_mutex) { + if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) + return; - if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) - return; + if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) + return; + } vpc1 = (vpc2 << 8) | vpc1; @@ -1728,6 +1834,8 @@ static void ideapad_check_features(struct ideapad_private *priv) priv->features.ctrl_ps2_aux_port = ctrl_ps2_aux_port || dmi_check_system(ctrl_ps2_aux_port_list); priv->features.touchpad_ctrl_via_ec = touchpad_ctrl_via_ec; + priv->features.ymc_ec_trigger = + ymc_ec_trigger || dmi_check_system(ymc_ec_trigger_quirk_dmi_table); if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) priv->features.fan_mode = true; @@ -1906,6 +2014,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->adev = adev; priv->platform_device = pdev; + err = devm_mutex_init(&pdev->dev, &priv->vpc_mutex); + if (err) + return err; + ideapad_check_features(priv); err = ideapad_sysfs_init(priv); @@ -1974,6 +2086,8 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (err) goto shared_init_failed; + ideapad_laptop_register_notifier(&ideapad_laptop_notifier); + return 0; shared_init_failed: @@ -2006,6 +2120,8 @@ static void ideapad_acpi_remove(struct platform_device *pdev) struct ideapad_private *priv = dev_get_drvdata(&pdev->dev); int i; + ideapad_laptop_unregister_notifier(&ideapad_laptop_notifier); + ideapad_shared_exit(priv); acpi_remove_notify_handler(priv->adev->handle, diff --git a/drivers/platform/x86/ideapad-laptop.h b/drivers/platform/x86/ideapad-laptop.h index 4498a96de597..948cc61800a9 100644 --- a/drivers/platform/x86/ideapad-laptop.h +++ b/drivers/platform/x86/ideapad-laptop.h @@ -12,6 +12,15 @@ #include <linux/acpi.h> #include <linux/jiffies.h> #include <linux/errno.h> +#include <linux/notifier.h> + +enum ideapad_laptop_notifier_actions { + IDEAPAD_LAPTOP_YMC_EVENT, +}; + +int ideapad_laptop_register_notifier(struct notifier_block *nb); +int ideapad_laptop_unregister_notifier(struct notifier_block *nb); +void ideapad_laptop_call_notifier(unsigned long action, void *data); enum { VPCCMD_R_VPC1 = 0x10, diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 282e4bfe30da..be3d51ed0e47 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -221,8 +221,8 @@ static int doscan(void *data) */ static void ifs_test_core(int cpu, struct device *dev) { + union ifs_status status = {}; union ifs_scan activate; - union ifs_status status; unsigned long timeout; struct ifs_data *ifsd; int to_start, to_stop; diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6e..404582307109 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 9b7ce03ba085..a353e830b65f 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -7,11 +7,13 @@ */ #include <linux/acpi.h> +#include <linux/cleanup.h> #include <linux/dmi.h> #include <linux/input.h> #include <linux/input/sparse-keymap.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/suspend.h> #include "../dual_accel_detect.h" @@ -66,6 +68,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { }; struct intel_vbtn_priv { + struct mutex mutex; /* Avoid notify_handler() racing with itself */ struct input_dev *buttons_dev; struct input_dev *switches_dev; bool dual_accel; @@ -155,6 +158,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) bool autorelease; int ret; + guard(mutex)(&priv->mutex); + if ((ke = sparse_keymap_entry_from_scancode(priv->buttons_dev, event))) { if (!priv->has_buttons) { dev_warn(&device->dev, "Warning: received 0x%02x button event on a device without buttons, please report this.\n", @@ -290,6 +295,10 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + err = devm_mutex_init(&device->dev, &priv->mutex); + if (err) + return err; + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c index e1fbc35504d4..e0bbd6a14a89 100644 --- a/drivers/platform/x86/lenovo-ymc.c +++ b/drivers/platform/x86/lenovo-ymc.c @@ -20,32 +20,10 @@ #define LENOVO_YMC_QUERY_INSTANCE 0 #define LENOVO_YMC_QUERY_METHOD 0x01 -static bool ec_trigger __read_mostly; -module_param(ec_trigger, bool, 0444); -MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events"); - static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type"); -static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { - { - /* Lenovo Yoga 7 14ARB7 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), - }, - }, - { - /* Lenovo Yoga 7 14ACN6 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), - }, - }, - { } -}; - static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { { .matches = { @@ -62,21 +40,8 @@ static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { struct lenovo_ymc_private { struct input_dev *input_dev; - struct acpi_device *ec_acpi_dev; }; -static void lenovo_ymc_trigger_ec(struct wmi_device *wdev, struct lenovo_ymc_private *priv) -{ - int err; - - if (!priv->ec_acpi_dev) - return; - - err = write_ec_cmd(priv->ec_acpi_dev->handle, VPCCMD_W_YMC, 1); - if (err) - dev_warn(&wdev->dev, "Could not write YMC: %d\n", err); -} - static const struct key_entry lenovo_ymc_keymap[] = { /* Laptop */ { KE_SW, 0x01, { .sw = { SW_TABLET_MODE, 0 } } }, @@ -125,11 +90,9 @@ static void lenovo_ymc_notify(struct wmi_device *wdev, union acpi_object *data) free_obj: kfree(obj); - lenovo_ymc_trigger_ec(wdev, priv); + ideapad_laptop_call_notifier(IDEAPAD_LAPTOP_YMC_EVENT, &code); } -static void acpi_dev_put_helper(void *p) { acpi_dev_put(p); } - static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) { struct lenovo_ymc_private *priv; @@ -143,29 +106,10 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) return -ENODEV; } - ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table); - priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - if (ec_trigger) { - pr_debug("Lenovo YMC enable EC triggering.\n"); - priv->ec_acpi_dev = acpi_dev_get_first_match_dev("VPC2004", NULL, -1); - - if (!priv->ec_acpi_dev) { - dev_err(&wdev->dev, "Could not find EC ACPI device.\n"); - return -ENODEV; - } - err = devm_add_action_or_reset(&wdev->dev, - acpi_dev_put_helper, priv->ec_acpi_dev); - if (err) { - dev_err(&wdev->dev, - "Could not clean up EC ACPI device: %d\n", err); - return err; - } - } - input_dev = devm_input_allocate_device(&wdev->dev); if (!input_dev) return -ENOMEM; @@ -192,7 +136,6 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) dev_set_drvdata(&wdev->dev, priv); /* Report the state for the first time on probe */ - lenovo_ymc_trigger_ec(wdev, priv); lenovo_ymc_notify(wdev, NULL); return 0; } @@ -217,3 +160,4 @@ module_wmi_driver(lenovo_ymc_driver); MODULE_AUTHOR("Gergo Koteles <soyer@irl.hu>"); MODULE_DESCRIPTION("Lenovo Yoga Mode Control driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IDEAPAD_LAPTOP); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 3e94fdd1ea52..3197aaa69da7 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -757,7 +757,6 @@ static union acpi_object *__call_snc_method(acpi_handle handle, char *method, return result; } -#define MIN(a, b) (a > b ? b : a) static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value, void *buffer, size_t buflen) { diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index b5903193e2f9..ac05942e4e6a 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -178,18 +178,18 @@ static inline int axp288_charger_set_cv(struct axp288_chrg_info *info, int cv) u8 reg_val; int ret; - if (cv <= CV_4100MV) { - reg_val = CHRG_CCCV_CV_4100MV; - cv = CV_4100MV; - } else if (cv <= CV_4150MV) { - reg_val = CHRG_CCCV_CV_4150MV; - cv = CV_4150MV; - } else if (cv <= CV_4200MV) { + if (cv >= CV_4350MV) { + reg_val = CHRG_CCCV_CV_4350MV; + cv = CV_4350MV; + } else if (cv >= CV_4200MV) { reg_val = CHRG_CCCV_CV_4200MV; cv = CV_4200MV; + } else if (cv >= CV_4150MV) { + reg_val = CHRG_CCCV_CV_4150MV; + cv = CV_4150MV; } else { - reg_val = CHRG_CCCV_CV_4350MV; - cv = CV_4350MV; + reg_val = CHRG_CCCV_CV_4100MV; + cv = CV_4100MV; } reg_val = reg_val << CHRG_CCCV_CV_BIT_POS; @@ -337,8 +337,8 @@ static int axp288_charger_usb_set_property(struct power_supply *psy, } break; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: - scaled_val = min(val->intval, info->max_cv); - scaled_val = DIV_ROUND_CLOSEST(scaled_val, 1000); + scaled_val = DIV_ROUND_CLOSEST(val->intval, 1000); + scaled_val = min(scaled_val, info->max_cv); ret = axp288_charger_set_cv(info, scaled_val); if (ret < 0) { dev_warn(&info->pdev->dev, "set charge voltage failed\n"); diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 46f36dcb185c..49bef4a5ac3f 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -486,7 +486,7 @@ static int qcom_battmgr_bat_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -683,7 +683,7 @@ static int qcom_battmgr_ac_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); if (ret) @@ -748,7 +748,7 @@ static int qcom_battmgr_usb_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -867,7 +867,7 @@ static int qcom_battmgr_wls_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -1007,7 +1007,9 @@ static void qcom_battmgr_sc8280xp_callback(struct qcom_battmgr *battmgr, battmgr->error = 0; break; case BATTMGR_BAT_INFO: - if (payload_len != sizeof(resp->info)) { + /* some firmware versions report an extra __le32 at the end of the payload */ + if (payload_len != sizeof(resp->info) && + payload_len != (sizeof(resp->info) + sizeof(__le32))) { dev_warn(battmgr->dev, "invalid payload length for battery information request: %zd\n", payload_len); diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 32eafe2c00af..7a27b262fb84 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -159,6 +159,7 @@ static int rt5033_battery_probe(struct i2c_client *client) return -EINVAL; } + i2c_set_clientdata(client, battery); psy_cfg.of_node = client->dev.of_node; psy_cfg.drv_data = battery; diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 7adf4f2b1049..951b38ff5f8e 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -510,8 +510,6 @@ static int pwm_samsung_parse_dt(struct pwm_chip *chip) struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip); struct device_node *np = pwmchip_parent(chip)->of_node; const struct of_device_id *match; - struct property *prop; - const __be32 *cur; u32 val; match = of_match_node(samsung_pwm_matches, np); @@ -520,7 +518,7 @@ static int pwm_samsung_parse_dt(struct pwm_chip *chip) memcpy(&our_chip->variant, match->data, sizeof(our_chip->variant)); - of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) { + of_property_for_each_u32(np, "samsung,pwm-outputs", val) { if (val >= SAMSUNG_PWM_NUM) { dev_err(pwmchip_parent(chip), "%s: invalid channel index in samsung,pwm-outputs property\n", diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index e6a9027773fc..4b411a09c1a6 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1661,10 +1661,10 @@ config REGULATOR_UNIPHIER config REGULATOR_RZG2L_VBCTRL tristate "Renesas RZ/G2L USB VBUS regulator driver" - depends on ARCH_RZG2L || COMPILE_TEST + depends on RESET_RZG2L_USBPHY_CTRL || COMPILE_TEST depends on OF select REGMAP_MMIO - default ARCH_RZG2L + default RESET_RZG2L_USBPHY_CTRL help Support for VBUS regulators implemented on Renesas RZ/G2L SoCs. diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a97cfedd706..42a4a996defb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct dasd_block *block, struct irb *irb) if (!sense) return 0; - return !!(sense[1] & SNS1_NO_REC_FOUND) || - !!(sense[1] & SNS1_FILE_PROTECTED) || - scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN; + if (sense[1] & SNS1_NO_REC_FOUND) + return 1; + + if ((sense[1] & SNS1_INV_TRACK_FORMAT) && + scsw_is_tm(&irb->scsw) && + !(sense[2] & SNS2_ENV_DATA_PRESENT)) + return 1; + + return 0; } static int dasd_ese_oos_cond(u8 *sense) @@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, struct dasd_device *device; unsigned long now; int nrf_suppressed = 0; - int fp_suppressed = 0; + int it_suppressed = 0; struct request *req; u8 *sense = NULL; int expires; @@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, */ sense = dasd_get_sense(irb); if (sense) { - fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); + it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) && test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); @@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } } - if (!(fp_suppressed || nrf_suppressed)) + if (!(it_suppressed || nrf_suppressed)) device->discipline->dump_sense_dbf(device, irb, "int"); if (device->features & DASD_FEATURE_ERPLOG) @@ -2459,14 +2466,17 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and error recovery would be - * unnecessary in these cases. Check if the according suppress - * bit is set. + * In some cases certain errors might be expected and + * error recovery would be unnecessary in these cases. + * Check if the according suppress bit is set. */ sense = dasd_get_sense(&cqr->irb); - if (sense && sense[1] & SNS1_FILE_PROTECTED && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags)) + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags)) + continue; + if (sense && (sense[1] & SNS1_NO_REC_FOUND) && + test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags)) continue; if (scsw_cstat(&cqr->irb.scsw) == 0x40 && test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags)) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index bbbacfc386f2..d0aa267462c5 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp) struct dasd_device *device = erp->startdev; - /* - * In some cases the 'File Protected' error might be expected and - * log messages shouldn't be written then. - * Check if the according suppress bit is set. - */ - if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags)) - dev_err(&device->cdev->dev, - "Accessing the DASD failed because of a hardware error\n"); + dev_err(&device->cdev->dev, + "Accessing the DASD failed because of a hardware error\n"); return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9388b5c383ca..90b106408992 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2275,6 +2275,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); return cqr; } @@ -2556,7 +2557,6 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); return cqr; @@ -4130,8 +4130,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); } @@ -4633,9 +4631,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); } return cqr; @@ -5780,36 +5777,32 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, { u8 *sense = dasd_get_sense(irb); - if (scsw_is_tm(&irb->scsw)) { - /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and log messages shouldn't be written - * then. Check if the according suppress bit is set. - */ - if (sense && (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &req->flags)) - return; - if (scsw_cstat(&irb->scsw) == 0x40 && - test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) - return; + /* + * In some cases certain errors might be expected and + * log messages shouldn't be written then. + * Check if the according suppress bit is set. + */ + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &req->flags)) + return; - dasd_eckd_dump_sense_tcw(device, req, irb); - } else { - /* - * In some cases the 'Command Reject' or 'No Record Found' - * error might be expected and log messages shouldn't be - * written then. Check if the according suppress bit is set. - */ - if (sense && sense[0] & SNS0_CMD_REJECT && - test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) - return; + if (sense && sense[0] & SNS0_CMD_REJECT && + test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) + return; - if (sense && sense[1] & SNS1_NO_REC_FOUND && - test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) - return; + if (sense && sense[1] & SNS1_NO_REC_FOUND && + test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) + return; + if (scsw_cstat(&irb->scsw) == 0x40 && + test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) + return; + + if (scsw_is_tm(&irb->scsw)) + dasd_eckd_dump_sense_tcw(device, req, irb); + else dasd_eckd_dump_sense_ccw(device, req, irb); - } } static int dasd_eckd_reload_device(struct dasd_device *device) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 1aa426b1dedd..6da47a65af61 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,7 +41,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) */ .max_segment_size = PAGE_SIZE, .seg_boundary_mask = PAGE_SIZE - 1, - .dma_alignment = PAGE_SIZE - 1, .max_segments = USHRT_MAX, }; struct gendisk *gdp; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e5f40536b425..81cfb5c89681 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -196,7 +196,7 @@ struct dasd_ccw_req { * The following flags are used to suppress output of certain errors. */ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ -#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ +#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 8a03af5ee5b3..80c4e5101c97 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -96,7 +96,7 @@ config SCLP_OFB config S390_UV_UAPI def_tristate m prompt "Ultravisor userspace API" - depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST) + depends on S390 help Selecting exposes parts of the UV interface to userspace by providing a misc character device at /dev/uv. diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b72f672a7720..66b1bdc63284 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -550,4 +550,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) put_device(&gdev->dev); } EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_DESCRIPTION("ccwgroup bus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8ad49030a7bf..914dde041675 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -488,4 +488,5 @@ static void __exit vfio_ccw_sch_exit(void) module_init(vfio_ccw_sch_init); module_exit(vfio_ccw_sch_exit); +MODULE_DESCRIPTION("VFIO based Subchannel device driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index d31884f82f2a..73085d2f5c43 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -65,11 +65,7 @@ #include "task.h" #include "probe_roms.h" -#define MAJ 1 -#define MIN 2 -#define BUILD 0 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ - __stringify(BUILD) +#define DRV_VERSION "1.2.0" MODULE_VERSION(DRV_VERSION); diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36..01f035f9330e 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 69b14918de59..616894571c6a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3575,6 +3575,17 @@ static int mpi3mr_prepare_sg_scmd(struct mpi3mr_ioc *mrioc, scmd->sc_data_direction); priv->meta_sg_valid = 1; /* To unmap meta sg DMA */ } else { + /* + * Some firmware versions byte-swap the REPORT ZONES command + * reply from ATA-ZAC devices by directly accessing in the host + * buffer. This does not respect the default command DMA + * direction and causes IOMMU page faults on some architectures + * with an IOMMU enforcing write mappings (e.g. AMD hosts). + * Avoid such issue by making the REPORT ZONES buffer mapping + * bi-directional. + */ + if (scmd->cmnd[0] == ZBC_IN && scmd->cmnd[1] == ZI_REPORT_ZONES) + scmd->sc_data_direction = DMA_BIDIRECTIONAL; sg_scmd = scsi_sglist(scmd); sges_left = scsi_dma_map(scmd); } @@ -5223,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b2bcf4a27ddc..b785a7e88b49 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2671,6 +2671,22 @@ _base_build_zero_len_sge_ieee(struct MPT3SAS_ADAPTER *ioc, void *paddr) _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); } +static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) +{ + /* + * Some firmware versions byte-swap the REPORT ZONES command reply from + * ATA-ZAC devices by directly accessing in the host buffer. This does + * not respect the default command DMA direction and causes IOMMU page + * faults on some architectures with an IOMMU enforcing write mappings + * (e.g. AMD hosts). Avoid such issue by making the report zones buffer + * mapping bi-directional. + */ + if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) + cmd->sc_data_direction = DMA_BIDIRECTIONAL; + + return scsi_dma_map(cmd); +} + /** * _base_build_sg_scmd - main sg creation routine * pcie_device is unused here! @@ -2717,7 +2733,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc, sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; @@ -2861,7 +2877,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, } sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index adeaa8ab9951..699f4f9674d9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2711,8 +2711,6 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; - - sd_config_discard(sdkp, lim, SD_LBP_WS16); } sdkp->capacity = lba + 1; @@ -3365,8 +3363,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); - config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); @@ -3753,9 +3749,10 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); - sd_read_cpr(sdkp); } + sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); + sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); @@ -3809,6 +3806,14 @@ static int sd_revalidate_disk(struct gendisk *disk) return err; /* + * Query concurrent positioning ranges after + * queue_limits_commit_update() unlocked q->limits_lock to avoid + * deadlock with q->sysfs_dir_lock and q->sysfs_lock. + */ + if (sdkp->media_present && scsi_device_supports_vpd(sdp)) + sd_read_cpr(sdkp); + + /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk * capacity to 0. @@ -4205,6 +4210,8 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4221,13 +4228,12 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index a0d2556a27bb..089653018d32 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -431,7 +431,7 @@ int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed) struct packet_command cgc; /* avoid exceeding the max speed or overflowing integer bounds */ - speed = clamp(0, speed, 0xffff / 177); + speed = clamp(speed, 0, 0xffff / 177); if (speed == 0) speed = 0xffff; /* set to max */ diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad2..4dc8aba33d9b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 32baa14dfd83..be261ac09df8 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -296,7 +296,7 @@ static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; - unsigned int perclk_rate, scldiv; + unsigned int perclk_rate, scldiv, div; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); @@ -313,8 +313,10 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) return -EINVAL; } + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); + for (prescale = 0; prescale < 8; prescale++) { - scldiv = perclk_rate / config.speed_hz / (1 << prescale) - 2; + scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; break; diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 77e9738e42f6..16054695bdb0 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -481,6 +481,9 @@ static int hisi_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (host->max_speed_hz == 0) + return dev_err_probe(dev, -EINVAL, "spi-max-frequency can't be 0\n"); + ret = device_property_read_u16(dev, "num-cs", &host->num_chipselect); if (ret) @@ -495,6 +498,7 @@ static int hisi_spi_probe(struct platform_device *pdev) host->transfer_one = hisi_spi_transfer_one; host->handle_err = hisi_spi_handle_err; host->dev.fwnode = dev->fwnode; + host->min_speed_hz = DIV_ROUND_UP(host->max_speed_hz, CLK_DIV_MAX); hisi_spi_hw_init(hs); diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c index 6246254e1dff..7c1a9a985373 100644 --- a/drivers/spi/spi-microchip-core.c +++ b/drivers/spi/spi-microchip-core.c @@ -75,6 +75,7 @@ #define REG_CONTROL (0x00) #define REG_FRAME_SIZE (0x04) +#define FRAME_SIZE_MASK GENMASK(5, 0) #define REG_STATUS (0x08) #define REG_INT_CLEAR (0x0c) #define REG_RX_DATA (0x10) @@ -89,6 +90,9 @@ #define REG_RIS (0x24) #define REG_CONTROL2 (0x28) #define REG_COMMAND (0x2c) +#define COMMAND_CLRFRAMECNT BIT(4) +#define COMMAND_TXFIFORST BIT(3) +#define COMMAND_RXFIFORST BIT(2) #define REG_PKTSIZE (0x30) #define REG_CMD_SIZE (0x34) #define REG_HWSTATUS (0x38) @@ -103,10 +107,11 @@ struct mchp_corespi { u8 *rx_buf; u32 clk_gen; /* divider for spi output clock generated by the controller */ u32 clk_mode; + u32 pending_slave_select; int irq; int tx_len; int rx_len; - int pending; + int n_bytes; }; static inline u32 mchp_corespi_read(struct mchp_corespi *spi, unsigned int reg) @@ -130,113 +135,126 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi) static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi) { - u8 data; - int fifo_max, i = 0; + while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { + u32 data = mchp_corespi_read(spi, REG_RX_DATA); - fifo_max = min(spi->rx_len, FIFO_DEPTH); + spi->rx_len -= spi->n_bytes; - while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { - data = mchp_corespi_read(spi, REG_RX_DATA); + if (!spi->rx_buf) + continue; - if (spi->rx_buf) - *spi->rx_buf++ = data; - i++; + if (spi->n_bytes == 4) + *((u32 *)spi->rx_buf) = data; + else if (spi->n_bytes == 2) + *((u16 *)spi->rx_buf) = data; + else + *spi->rx_buf = data; + + spi->rx_buf += spi->n_bytes; } - spi->rx_len -= i; - spi->pending -= i; } static void mchp_corespi_enable_ints(struct mchp_corespi *spi) { - u32 control, mask = INT_ENABLE_MASK; - - mchp_corespi_disable(spi); - - control = mchp_corespi_read(spi, REG_CONTROL); - - control |= mask; - mchp_corespi_write(spi, REG_CONTROL, control); + u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_ENABLE; + control |= INT_ENABLE_MASK; mchp_corespi_write(spi, REG_CONTROL, control); } static void mchp_corespi_disable_ints(struct mchp_corespi *spi) { - u32 control, mask = INT_ENABLE_MASK; - - mchp_corespi_disable(spi); - - control = mchp_corespi_read(spi, REG_CONTROL); - control &= ~mask; - mchp_corespi_write(spi, REG_CONTROL, control); + u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_ENABLE; + control &= ~INT_ENABLE_MASK; mchp_corespi_write(spi, REG_CONTROL, control); } static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len) { u32 control; - u16 lenpart; + u32 lenpart; + u32 frames = mchp_corespi_read(spi, REG_FRAMESUP); /* - * Disable the SPI controller. Writes to transfer length have - * no effect when the controller is enabled. + * Writing to FRAMECNT in REG_CONTROL will reset the frame count, taking + * a shortcut requires an explicit clear. */ - mchp_corespi_disable(spi); + if (frames == len) { + mchp_corespi_write(spi, REG_COMMAND, COMMAND_CLRFRAMECNT); + return; + } /* * The lower 16 bits of the frame count are stored in the control reg * for legacy reasons, but the upper 16 written to a different register: * FRAMESUP. While both the upper and lower bits can be *READ* from the - * FRAMESUP register, writing to the lower 16 bits is a NOP + * FRAMESUP register, writing to the lower 16 bits is (supposedly) a NOP. + * + * The driver used to disable the controller while modifying the frame + * count, and mask off the lower 16 bits of len while writing to + * FRAMES_UP. When the driver was changed to disable the controller as + * infrequently as possible, it was discovered that the logic of + * lenpart = len & 0xffff_0000 + * write(REG_FRAMESUP, lenpart) + * would actually write zeros into the lower 16 bits on an mpfs250t-es, + * despite documentation stating these bits were read-only. + * Writing len unmasked into FRAMES_UP ensures those bits aren't zeroed + * on an mpfs250t-es and will be a NOP for the lower 16 bits on hardware + * that matches the documentation. */ lenpart = len & 0xffff; - control = mchp_corespi_read(spi, REG_CONTROL); control &= ~CONTROL_FRAMECNT_MASK; control |= lenpart << CONTROL_FRAMECNT_SHIFT; mchp_corespi_write(spi, REG_CONTROL, control); - - lenpart = len & 0xffff0000; - mchp_corespi_write(spi, REG_FRAMESUP, lenpart); - - control |= CONTROL_ENABLE; - mchp_corespi_write(spi, REG_CONTROL, control); + mchp_corespi_write(spi, REG_FRAMESUP, len); } static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi) { - u8 byte; int fifo_max, i = 0; - fifo_max = min(spi->tx_len, FIFO_DEPTH); + fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); mchp_corespi_set_xfer_size(spi, fifo_max); while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) { - byte = spi->tx_buf ? *spi->tx_buf++ : 0xaa; - mchp_corespi_write(spi, REG_TX_DATA, byte); + u32 word; + + if (spi->n_bytes == 4) + word = spi->tx_buf ? *((u32 *)spi->tx_buf) : 0xaa; + else if (spi->n_bytes == 2) + word = spi->tx_buf ? *((u16 *)spi->tx_buf) : 0xaa; + else + word = spi->tx_buf ? *spi->tx_buf : 0xaa; + + mchp_corespi_write(spi, REG_TX_DATA, word); + if (spi->tx_buf) + spi->tx_buf += spi->n_bytes; i++; } - spi->tx_len -= i; - spi->pending += i; + spi->tx_len -= i * spi->n_bytes; } static inline void mchp_corespi_set_framesize(struct mchp_corespi *spi, int bt) { + u32 frame_size = mchp_corespi_read(spi, REG_FRAME_SIZE); u32 control; + if ((frame_size & FRAME_SIZE_MASK) == bt) + return; + /* * Disable the SPI controller. Writes to the frame size have * no effect when the controller is enabled. */ - mchp_corespi_disable(spi); + control = mchp_corespi_read(spi, REG_CONTROL); + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); mchp_corespi_write(spi, REG_FRAME_SIZE, bt); - control = mchp_corespi_read(spi, REG_CONTROL); control |= CONTROL_ENABLE; mchp_corespi_write(spi, REG_CONTROL, control); } @@ -249,8 +267,18 @@ static void mchp_corespi_set_cs(struct spi_device *spi, bool disable) reg = mchp_corespi_read(corespi, REG_SLAVE_SELECT); reg &= ~BIT(spi_get_chipselect(spi, 0)); reg |= !disable << spi_get_chipselect(spi, 0); + corespi->pending_slave_select = reg; - mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); + /* + * Only deassert chip select immediately. Writing to some registers + * requires the controller to be disabled, which results in the + * output pins being tristated and can cause the SCLK and MOSI lines + * to transition. Therefore asserting the chip select is deferred + * until just before writing to the TX FIFO, to ensure the device + * doesn't see any spurious clock transitions whilst CS is enabled. + */ + if (((spi->mode & SPI_CS_HIGH) == 0) == disable) + mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); } static int mchp_corespi_setup(struct spi_device *spi) @@ -269,6 +297,7 @@ static int mchp_corespi_setup(struct spi_device *spi) if (spi->mode & SPI_CS_HIGH) { reg = mchp_corespi_read(corespi, REG_SLAVE_SELECT); reg |= BIT(spi_get_chipselect(spi, 0)); + corespi->pending_slave_select = reg; mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); } return 0; @@ -279,17 +308,13 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * unsigned long clk_hz; u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_MASTER; + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); + control |= CONTROL_MASTER; control &= ~CONTROL_MODE_MASK; control |= MOTOROLA_MODE; - mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); - - /* max. possible spi clock rate is the apb clock rate */ - clk_hz = clk_get_rate(spi->clk); - host->max_speed_hz = clk_hz; - /* * The controller must be configured so that it doesn't remove Chip * Select until the entire message has been transferred, even if at @@ -298,11 +323,16 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * * BIGFIFO mode is also enabled, which sets the fifo depth to 32 frames * for the 8 bit transfers that this driver uses. */ - control = mchp_corespi_read(spi, REG_CONTROL); control |= CONTROL_SPS | CONTROL_BIGFIFO; mchp_corespi_write(spi, REG_CONTROL, control); + mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); + + /* max. possible spi clock rate is the apb clock rate */ + clk_hz = clk_get_rate(spi->clk); + host->max_speed_hz = clk_hz; + mchp_corespi_enable_ints(spi); /* @@ -310,7 +340,8 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * * select is relinquished to the hardware. SSELOUT is enabled too so we * can deal with active high targets. */ - mchp_corespi_write(spi, REG_SLAVE_SELECT, SSELOUT | SSEL_DIRECT); + spi->pending_slave_select = SSELOUT | SSEL_DIRECT; + mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); control = mchp_corespi_read(spi, REG_CONTROL); @@ -324,8 +355,6 @@ static inline void mchp_corespi_set_clk_gen(struct mchp_corespi *spi) { u32 control; - mchp_corespi_disable(spi); - control = mchp_corespi_read(spi, REG_CONTROL); if (spi->clk_mode) control |= CONTROL_CLKMODE; @@ -334,12 +363,12 @@ static inline void mchp_corespi_set_clk_gen(struct mchp_corespi *spi) mchp_corespi_write(spi, REG_CLK_GEN, spi->clk_gen); mchp_corespi_write(spi, REG_CONTROL, control); - mchp_corespi_write(spi, REG_CONTROL, control | CONTROL_ENABLE); } static inline void mchp_corespi_set_mode(struct mchp_corespi *spi, unsigned int mode) { - u32 control, mode_val; + u32 mode_val; + u32 control = mchp_corespi_read(spi, REG_CONTROL); switch (mode & SPI_MODE_X_MASK) { case SPI_MODE_0: @@ -357,12 +386,13 @@ static inline void mchp_corespi_set_mode(struct mchp_corespi *spi, unsigned int } /* - * Disable the SPI controller. Writes to the frame size have + * Disable the SPI controller. Writes to the frame protocol have * no effect when the controller is enabled. */ - mchp_corespi_disable(spi); - control = mchp_corespi_read(spi, REG_CONTROL); + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); + control &= ~(SPI_MODE_X_MASK << MODE_X_MASK_SHIFT); control |= mode_val; @@ -383,21 +413,18 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id) if (intfield == 0) return IRQ_NONE; - if (intfield & INT_TXDONE) { + if (intfield & INT_TXDONE) mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE); + if (intfield & INT_RXRDY) { + mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); + if (spi->rx_len) mchp_corespi_read_fifo(spi); - - if (spi->tx_len) - mchp_corespi_write_fifo(spi); - - if (!spi->rx_len) - finalise = true; } - if (intfield & INT_RXRDY) - mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); + if (!spi->rx_len && !spi->tx_len) + finalise = true; if (intfield & INT_RX_CHANNEL_OVERFLOW) { mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW); @@ -477,13 +504,17 @@ static int mchp_corespi_transfer_one(struct spi_controller *host, spi->rx_buf = xfer->rx_buf; spi->tx_len = xfer->len; spi->rx_len = xfer->len; - spi->pending = 0; + spi->n_bytes = roundup_pow_of_two(DIV_ROUND_UP(xfer->bits_per_word, BITS_PER_BYTE)); - mchp_corespi_set_xfer_size(spi, (spi->tx_len > FIFO_DEPTH) - ? FIFO_DEPTH : spi->tx_len); + mchp_corespi_set_framesize(spi, xfer->bits_per_word); - if (spi->tx_len) + mchp_corespi_write(spi, REG_COMMAND, COMMAND_RXFIFORST | COMMAND_TXFIFORST); + + mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); + + while (spi->tx_len) mchp_corespi_write_fifo(spi); + return 1; } @@ -493,7 +524,6 @@ static int mchp_corespi_prepare_message(struct spi_controller *host, struct spi_device *spi_dev = msg->spi; struct mchp_corespi *spi = spi_controller_get_devdata(host); - mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); mchp_corespi_set_mode(spi, spi_dev->mode); return 0; @@ -521,7 +551,7 @@ static int mchp_corespi_probe(struct platform_device *pdev) host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; host->use_gpio_descriptors = true; host->setup = mchp_corespi_setup; - host->bits_per_word_mask = SPI_BPW_MASK(8); + host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); host->transfer_one = mchp_corespi_transfer_one; host->prepare_message = mchp_corespi_prepare_message; host->set_cs = mchp_corespi_set_cs; diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 95fb5f1c91c1..5304728c68c2 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -700,6 +700,7 @@ static const struct class spidev_class = { }; static const struct spi_device_id spidev_spi_ids[] = { + { .name = "bh2228fv" }, { .name = "dh2228fv" }, { .name = "ltc2488" }, { .name = "sx1301" }, @@ -734,6 +735,7 @@ static const struct of_device_id spidev_dt_ids[] = { { .compatible = "lwn,bk4", .data = &spidev_of_check }, { .compatible = "menlo,m53cpld", .data = &spidev_of_check }, { .compatible = "micron,spi-authenta", .data = &spidev_of_check }, + { .compatible = "rohm,bh2228fv", .data = &spidev_of_check }, { .compatible = "rohm,dh2228fv", .data = &spidev_of_check }, { .compatible = "semtech,sx1301", .data = &spidev_of_check }, { .compatible = "silabs,em3581", .data = &spidev_of_check }, diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index f240fcc5a4e1..9ba9495fcc4b 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -398,7 +398,7 @@ static int pmic_arb_fmt_read_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, u8 sid, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -477,7 +477,7 @@ static int pmic_arb_fmt_write_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -1702,7 +1702,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "cnfg"); if (index < 0) { - dev_err(dev, "cnfg reg region missing"); + dev_err(dev, "cnfg reg region missing\n"); return -EINVAL; } @@ -1712,7 +1712,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "intr"); if (index < 0) { - dev_err(dev, "intr reg region missing"); + dev_err(dev, "intr reg region missing\n"); return -EINVAL; } @@ -1737,8 +1737,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, dev_dbg(&pdev->dev, "adding irq domain for bus %d\n", bus_index); - bus->domain = irq_domain_add_tree(dev->of_node, - &pmic_arb_irq_domain_ops, bus); + bus->domain = irq_domain_add_tree(node, &pmic_arb_irq_domain_ops, bus); if (!bus->domain) { dev_err(&pdev->dev, "unable to create irq_domain\n"); return -ENOMEM; diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h index 7349943bba2b..160c496784b7 100644 --- a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h +++ b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h @@ -22,11 +22,6 @@ /* force a value to a lower even value */ #define EVEN_FLOOR(x) ((x) & ~1) -/* for preprocessor and array sizing use MIN and MAX - otherwise use min and max */ -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) - #define CEIL_DIV(a, b) (((b) != 0) ? ((a) + (b) - 1) / (b) : 0) #define CEIL_MUL(a, b) (CEIL_DIV(a, b) * (b)) #define CEIL_MUL2(a, b) (((a) + (b) - 1) & ~((b) - 1)) diff --git a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h index 961c61288083..aad860e54d3a 100644 --- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h +++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h @@ -27,12 +27,16 @@ #include "ia_css_prbs.h" #include "ia_css_input_port.h" -/* Input modes, these enumerate all supported input modes. - * Note that not all ISP modes support all input modes. +/* + * Input modes, these enumerate all supported input modes. + * This enum is part of the atomisp firmware ABI and must + * NOT be changed! + * Note that not all ISP modes support all input modes. */ enum ia_css_input_mode { IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */ IA_CSS_INPUT_MODE_FIFO, /** data from input-fifo */ + IA_CSS_INPUT_MODE_TPG, /** data from test-pattern generator */ IA_CSS_INPUT_MODE_PRBS, /** data from pseudo-random bit stream */ IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */ IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */ diff --git a/drivers/staging/media/atomisp/pci/sh_css_internal.h b/drivers/staging/media/atomisp/pci/sh_css_internal.h index a2d972ea3fa0..959e7f549641 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_internal.h +++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h @@ -344,7 +344,14 @@ struct sh_css_sp_input_formatter_set { #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3) -/* SP configuration information */ +/* + * SP configuration information + * + * This struct is part of the atomisp firmware ABI and is directly copied + * to ISP DRAM by sh_css_store_sp_group_to_ddr() + * + * Do NOT change this struct's layout or remove seemingly unused fields! + */ struct sh_css_sp_config { u8 no_isp_sync; /* Signal host immediately after start */ u8 enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */ @@ -354,6 +361,10 @@ struct sh_css_sp_config { host (true) or when they are passed to the preview/video pipe (false). */ + /* + * Note the fields below are only used on the ISP2400 not on the ISP2401, + * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401. + */ struct { u8 a_changed; u8 b_changed; @@ -363,11 +374,13 @@ struct sh_css_sp_config { } input_formatter; sync_generator_cfg_t sync_gen; + tpg_cfg_t tpg; prbs_cfg_t prbs; input_system_cfg_t input_circuit; u8 input_circuit_cfg_changed; - u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; - u8 enable_isys_event_queue; + u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; + /* These last 2 fields are used on both the ISP2400 and the ISP2401 */ + u8 enable_isys_event_queue; u8 disable_cont_vf; }; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 4a2e869b9538..daed67d19efb 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,6 +13,28 @@ #include "thermal_core.h" +static void bang_bang_set_instance_target(struct thermal_instance *instance, + unsigned int target) +{ + if (instance->target != 0 && instance->target != 1 && + instance->target != THERMAL_NO_TARGET) + pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", + instance->target, instance->name); + + /* + * Enable the fan when the trip is crossed on the way up and disable it + * when the trip is crossed on the way down. + */ + instance->target = target; + instance->initialized = true; + + dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + + mutex_lock(&instance->cdev->lock); + __thermal_cdev_update(instance->cdev); + mutex_unlock(&instance->cdev->lock); +} + /** * bang_bang_control - controls devices associated with the given zone * @tz: thermal_zone_device @@ -54,33 +76,60 @@ static void bang_bang_control(struct thermal_zone_device *tz, tz->temperature, trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; + if (instance->trip == trip) + bang_bang_set_instance_target(instance, crossed_up); + } +} + +static void bang_bang_manage(struct thermal_zone_device *tz) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; - if (instance->target != 0 && instance->target != 1 && - instance->target != THERMAL_NO_TARGET) - pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", - instance->target, instance->name); + /* If the code below has run already, nothing needs to be done. */ + if (tz->governor_data) + return; - /* - * Enable the fan when the trip is crossed on the way up and - * disable it when the trip is crossed on the way down. - */ - instance->target = crossed_up; + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; - dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + if (tz->temperature >= td->threshold || + trip->temperature == THERMAL_TEMP_INVALID || + trip->type == THERMAL_TRIP_CRITICAL || + trip->type == THERMAL_TRIP_HOT) + continue; - mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ - mutex_unlock(&instance->cdev->lock); + /* + * If the initial cooling device state is "on", but the zone + * temperature is not above the trip point, the core will not + * call bang_bang_control() until the zone temperature reaches + * the trip point temperature which may be never. In those + * cases, set the initial state of the cooling device to 0. + */ + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (!instance->initialized && instance->trip == trip) + bang_bang_set_instance_target(instance, 0); + } } - list_for_each_entry(instance, &tz->thermal_instances, tz_node) - thermal_cdev_update(instance->cdev); + tz->governor_data = (void *)true; +} + +static void bang_bang_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + /* + * Let bang_bang_manage() know that it needs to walk trips after binding + * a new cdev and after system resume. + */ + if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME) + tz->governor_data = NULL; } static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, + .manage = bang_bang_manage, + .update_tz = bang_bang_update_tz, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 114136893a59..006614921870 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -278,20 +278,32 @@ static struct thermal_zone_params tzone_params = { static bool msi_irq; +static void proc_thermal_free_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) +{ + int i; + + for (i = 0; i < MSI_THERMAL_MAX; i++) { + if (proc_thermal_msi_map[i]) + devm_free_irq(&pdev->dev, proc_thermal_msi_map[i], pci_info); + } + + pci_free_irq_vectors(pdev); +} + static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) { - int ret, i, irq; + int ret, i, irq, count; - ret = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); - if (ret < 0) { + count = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (count < 0) { dev_err(&pdev->dev, "Failed to allocate vectors!\n"); - return ret; + return count; } dev_info(&pdev->dev, "msi enabled:%d msix enabled:%d\n", pdev->msi_enabled, pdev->msix_enabled); - for (i = 0; i < MSI_THERMAL_MAX; i++) { + for (i = 0; i < count; i++) { irq = pci_irq_vector(pdev, i); ret = devm_request_threaded_irq(&pdev->dev, irq, proc_thermal_irq_handler, @@ -310,7 +322,7 @@ static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci return 0; err_free_msi_vectors: - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); return ret; } @@ -397,7 +409,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_ err_free_vectors: if (msi_irq) - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); err_ret_tzone: thermal_zone_device_unregister(pci_info->tzone); err_del_legacy: @@ -419,6 +431,9 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); + if (msi_irq) + proc_thermal_free_msi(pdev, pci_info); + thermal_zone_device_unregister(pci_info->tzone); proc_thermal_mmio_remove(pdev, pci_info->proc_priv); if (!pci_info->no_legacy) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index f6e700e48aad..e6669aeda1ff 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -272,6 +272,44 @@ static int __init thermal_register_governors(void) return ret; } +static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz, + enum thermal_device_mode mode) +{ + if (tz->ops.change_mode) { + int ret; + + ret = tz->ops.change_mode(tz, mode); + if (ret) + return ret; + } + + tz->mode = mode; + + return 0; +} + +static void thermal_zone_broken_disable(struct thermal_zone_device *tz) +{ + struct thermal_trip_desc *td; + + dev_err(&tz->device, "Unable to get temperature, disabling!\n"); + /* + * This function only runs for enabled thermal zones, so no need to + * check for the current mode. + */ + __thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED); + thermal_notify_tz_disable(tz); + + for_each_trip_desc(tz, td) { + if (td->trip.type == THERMAL_TRIP_CRITICAL && + td->trip.temperature > THERMAL_TEMP_INVALID) { + dev_crit(&tz->device, + "Disabled thermal zone with critical trip point\n"); + return; + } + } +} + /* * Zone update section: main control loop applied to each zone while monitoring * in polling mode. The monitoring is done using a workqueue. @@ -292,6 +330,34 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, cancel_delayed_work(&tz->poll_queue); } +static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) +{ + if (error == -EAGAIN) { + thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY); + return; + } + + /* + * Print the message once to reduce log noise. It will be followed by + * another one if the temperature cannot be determined after multiple + * attempts. + */ + if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY) + dev_info(&tz->device, "Temperature check failed (%d)\n", error); + + thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies); + + tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL); + if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) { + thermal_zone_broken_disable(tz); + /* + * Restore the original recheck delay value to allow the thermal + * zone to try to recover when it is reenabled by user space. + */ + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + } +} + static void monitor_thermal_zone(struct thermal_zone_device *tz) { if (tz->mode != THERMAL_DEVICE_ENABLED) @@ -491,10 +557,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, ret = __thermal_zone_get_temp(tz, &temp); if (ret) { - if (ret != -EAGAIN) - dev_info(&tz->device, "Temperature check failed (%d)\n", ret); - - thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); + thermal_zone_recheck(tz, ret); return; } else if (temp <= THERMAL_TEMP_INVALID) { /* @@ -506,6 +569,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, goto monitor; } + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + tz->last_temperature = tz->temperature; tz->temperature = temp; @@ -540,7 +605,7 @@ monitor: static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, enum thermal_device_mode mode) { - int ret = 0; + int ret; mutex_lock(&tz->lock); @@ -548,14 +613,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, if (mode == tz->mode) { mutex_unlock(&tz->lock); - return ret; + return 0; } - if (tz->ops.change_mode) - ret = tz->ops.change_mode(tz, mode); + ret = __thermal_zone_device_set_mode(tz, mode); + if (ret) { + mutex_unlock(&tz->lock); - if (!ret) - tz->mode = mode; + return ret; + } __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); @@ -566,7 +632,7 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, else thermal_notify_tz_disable(tz); - return ret; + return 0; } int thermal_zone_device_enable(struct thermal_zone_device *tz) @@ -1445,6 +1511,7 @@ thermal_zone_device_register_with_trips(const char *type, thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; /* sys I/F */ /* Add nodes that are always present via .groups */ @@ -1661,7 +1728,8 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_debug_tz_resume(tz); thermal_zone_device_init(tz); - __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + thermal_governor_update_tz(tz, THERMAL_TZ_RESUME); + __thermal_zone_device_update(tz, THERMAL_TZ_RESUME); complete(&tz->resume); tz->resuming = false; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index ba8e6fc807ca..4cf2b7230d04 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -67,6 +67,8 @@ struct thermal_governor { * @polling_delay_jiffies: number of jiffies to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) + * @recheck_delay_jiffies: delay after a failed attempt to determine the zone + * temperature before trying again * @temperature: current temperature. This is only for core code, * drivers should use thermal_zone_get_temp() to get the * current temperature @@ -108,6 +110,7 @@ struct thermal_zone_device { int num_trips; unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; + unsigned long recheck_delay_jiffies; int temperature; int last_temperature; int emul_temperature; @@ -137,10 +140,11 @@ struct thermal_zone_device { #define THERMAL_TEMP_INIT INT_MIN /* - * Default delay after a failing thermal zone temperature check before - * attempting to check it again. + * Default and maximum delay after a failed thermal zone temperature check + * before attempting to check it again (in jiffies). */ -#define THERMAL_RECHECK_DELAY_MS 250 +#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) +#define THERMAL_MAX_RECHECK_DELAY (120 * HZ) /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index c0b679b846b3..06a0554ddc38 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -88,10 +88,10 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip_desc(tz, td) { - if (td->threshold < tz->temperature && td->threshold > low) + if (td->threshold <= tz->temperature && td->threshold > low) low = td->threshold; - if (td->threshold > tz->temperature && td->threshold < high) + if (td->threshold >= tz->temperature && td->threshold < high) high = td->threshold; } diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92..9ed4bb2e8d05 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880..6a2116cbb06f 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c65..afef1dd4ddf4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include <linux/pm_wakeirq.h> #include <linux/dma-mapping.h> #include <linux/sys_soc.h> -#include <linux/pm_domain.h> #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d10..09b246c9e389 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e..77efa7ee6eda 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index c79dcd7c8d1a..b4c1798a1df2 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -327,6 +327,7 @@ struct sc16is7xx_one { struct kthread_work reg_work; struct kthread_delayed_work ms_work; struct sc16is7xx_one_config config; + unsigned char buf[SC16IS7XX_FIFO_SIZE]; /* Rx buffer. */ unsigned int old_mctrl; u8 old_lcr; /* Value before EFR access. */ bool irda_mode; @@ -340,7 +341,6 @@ struct sc16is7xx_port { unsigned long gpio_valid_mask; #endif u8 mctrl_mask; - unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; struct sc16is7xx_one p[]; @@ -592,6 +592,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_MCR_CLKSEL_BIT, prescaler == 1 ? 0 : SC16IS7XX_MCR_CLKSEL_BIT); + mutex_lock(&one->efr_lock); + /* Backup LCR and access special register set (DLL/DLH) */ lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, @@ -606,24 +608,26 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Restore LCR and access to general register set */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&one->efr_lock); + return DIV_ROUND_CLOSEST((clk / prescaler) / 16, div); } static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, unsigned int iir) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lsr = 0, bytes_read, i; bool read_lsr = (iir == SC16IS7XX_IIR_RLSE_SRC) ? true : false; u8 ch, flag; - if (unlikely(rxlen >= sizeof(s->buf))) { + if (unlikely(rxlen >= sizeof(one->buf))) { dev_warn_ratelimited(port->dev, "ttySC%i: Possible RX FIFO overrun: %d\n", port->line, rxlen); port->icount.buf_overrun++; /* Ensure sanity of RX level */ - rxlen = sizeof(s->buf); + rxlen = sizeof(one->buf); } while (rxlen) { @@ -636,10 +640,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, lsr = 0; if (read_lsr) { - s->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); + one->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); bytes_read = 1; } else { - sc16is7xx_fifo_read(port, s->buf, rxlen); + sc16is7xx_fifo_read(port, one->buf, rxlen); bytes_read = rxlen; } @@ -672,7 +676,7 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, } for (i = 0; i < bytes_read; ++i) { - ch = s->buf[i]; + ch = one->buf[i]; if (uart_handle_sysrq_char(port, ch)) continue; @@ -690,10 +694,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, static void sc16is7xx_handle_tx(struct uart_port *port) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct tty_port *tport = &port->state->port; unsigned long flags; unsigned int txlen; + unsigned char *tail; if (unlikely(port->x_char)) { sc16is7xx_port_write(port, SC16IS7XX_THR_REG, port->x_char); @@ -718,8 +722,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) txlen = 0; } - txlen = uart_fifo_out(port, s->buf, txlen); - sc16is7xx_fifo_write(port, s->buf, txlen); + txlen = kfifo_out_linear_ptr(&tport->xmit_fifo, &tail, txlen); + sc16is7xx_fifo_write(port, tail, txlen); + uart_xmit_advance(port, txlen); uart_port_lock_irqsave(port, &flags); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 9a18d0b95a41..5bea3af46abc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -881,6 +881,14 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, new_flags = (__force upf_t)new_info->flags; old_custom_divisor = uport->custom_divisor; + if (!(uport->flags & UPF_FIXED_PORT)) { + unsigned int uartclk = new_info->baud_base * 16; + /* check needs to be done here before other settings made */ + if (uartclk == 0) { + retval = -EINVAL; + goto exit; + } + } if (!capable(CAP_SYS_ADMIN)) { retval = -EPERM; if (change_irq || change_port || diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 53f8c2329c30..14f8f00fdcf9 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -770,8 +770,6 @@ static void sysrq_of_get_keyreset_config(void) { u32 key; struct device_node *np; - struct property *prop; - const __be32 *p; np = of_find_node_by_path("/chosen/linux,sysrq-reset-seq"); if (!np) { @@ -782,7 +780,7 @@ static void sysrq_of_get_keyreset_config(void) /* Reset in case a __weak definition was present */ sysrq_reset_seq_len = 0; - of_property_for_each_u32(np, "keyset", prop, p, key) { + of_property_for_each_u32(np, "keyset", key) { if (key == KEY_RESERVED || key > KEY_MAX || sysrq_reset_seq_len == SYSRQ_KEY_RESET_MAX) break; diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index dc2177fec715..a931fcde7ad9 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -76,8 +76,7 @@ static void addpair(int fp, int un) int main(int argc, char *argv[]) { FILE *ctbl; - const char *tblname, *rel_tblname; - const char *abs_srctree; + const char *tblname; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -102,16 +101,6 @@ int main(int argc, char *argv[]) } } - abs_srctree = getenv("abs_srctree"); - if (abs_srctree && !strncmp(abs_srctree, tblname, strlen(abs_srctree))) - { - rel_tblname = tblname + strlen(abs_srctree); - while (*rel_tblname == '/') - ++rel_tblname; - } - else - rel_tblname = tblname; - /* For now we assume the default font is always 256 characters. */ fontlen = 256; @@ -255,16 +244,13 @@ int main(int argc, char *argv[]) printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include <linux/types.h>\n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", rel_tblname, fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index ce36154ce963..7aea8fbaeee8 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -316,6 +316,11 @@ static inline int ufshcd_rpm_get_sync(struct ufs_hba *hba) return pm_runtime_get_sync(&hba->ufs_device_wlun->sdev_gendev); } +static inline int ufshcd_rpm_get_if_active(struct ufs_hba *hba) +{ + return pm_runtime_get_if_active(&hba->ufs_device_wlun->sdev_gendev); +} + static inline int ufshcd_rpm_put_sync(struct ufs_hba *hba) { return pm_runtime_put_sync(&hba->ufs_device_wlun->sdev_gendev); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index dc757ba47522..0b3d0c8e0dda 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2416,7 +2416,17 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) return err; } + /* + * The UFSHCI 3.0 specification does not define MCQ_SUPPORT and + * LSDB_SUPPORT, but [31:29] as reserved bits with reset value 0s, which + * means we can simply read values regardless of version. + */ hba->mcq_sup = FIELD_GET(MASK_MCQ_SUPPORT, hba->capabilities); + /* + * 0h: legacy single doorbell support is available + * 1h: indicate that legacy single doorbell support has been removed + */ + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); if (!hba->mcq_sup) return 0; @@ -4090,11 +4100,16 @@ static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba) min_sleep_time_us = MIN_DELAY_BEFORE_DME_CMDS_US - delta; else - return; /* no more delay required */ + min_sleep_time_us = 0; /* no more delay required */ } - /* allow sleep for extra 50us if needed */ - usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + if (min_sleep_time_us > 0) { + /* allow sleep for extra 50us if needed */ + usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + } + + /* update the last_dme_cmd_tstamp */ + hba->last_dme_cmd_tstamp = ktime_get(); } /** @@ -6553,7 +6568,8 @@ again: if (ufshcd_err_handling_should_stop(hba)) goto skip_err_handling; - if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { + if ((hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) && + !hba->force_reset) { bool ret; spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -8211,7 +8227,10 @@ static void ufshcd_update_rtc(struct ufs_hba *hba) */ val = ts64.tv_sec - hba->dev_info.rtc_time_baseline; - ufshcd_rpm_get_sync(hba); + /* Skip update RTC if RPM state is not RPM_ACTIVE */ + if (ufshcd_rpm_get_if_active(hba) <= 0) + return; + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, &val); ufshcd_rpm_put_sync(hba); @@ -10265,9 +10284,6 @@ int ufshcd_system_restore(struct device *dev) */ ufshcd_readl(hba, REG_UTP_TASK_REQ_LIST_BASE_H); - /* Resuming from hibernate, assume that link was OFF */ - ufshcd_set_link_off(hba); - return 0; } @@ -10496,6 +10512,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } if (!is_mcq_supported(hba)) { + if (!hba->lsdb_sup) { + dev_err(hba->dev, "%s: failed to initialize (legacy doorbell mode not supported)\n", + __func__); + err = -EINVAL; + goto out_disable; + } err = scsi_add_host(host, hba->dev); if (err) { dev_err(hba->dev, "scsi_add_host failed\n"); diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 16ad3528d80b..9ec318ef52bf 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1293,6 +1293,9 @@ static void exynos_ufs_fmp_resume(struct ufs_hba *hba) { struct arm_smccc_res res; + if (!(hba->caps & UFSHCD_CAP_CRYPTO)) + return; + arm_smccc_smc(SMC_CMD_FMP_SECURITY, 0, SMU_EMBEDDED, CFG_DESCTYPE_3, 0, 0, 0, 0, &res); if (res.a0) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d8b096859337..e0ceaa721949 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3734,11 +3734,9 @@ static int ffs_func_set_alt(struct usb_function *f, if (alt > MAX_ALT_SETTINGS) return -EINVAL; - if (alt != (unsigned)-1) { - intf = ffs_func_revmap_intf(func, interface); - if (intf < 0) - return intf; - } + intf = ffs_func_revmap_intf(func, interface); + if (intf < 0) + return intf; if (ffs->func) ffs_func_eps_disable(ffs->func); @@ -3753,12 +3751,6 @@ static int ffs_func_set_alt(struct usb_function *f, if (ffs->state != FFS_ACTIVE) return -ENODEV; - if (alt == (unsigned)-1) { - ffs->func = NULL; - ffs_event_add(ffs, FUNCTIONFS_DISABLE); - return 0; - } - ffs->func = func; ret = ffs_func_eps_enable(func); if (ret >= 0) { @@ -3770,7 +3762,23 @@ static int ffs_func_set_alt(struct usb_function *f, static void ffs_func_disable(struct usb_function *f) { - ffs_func_set_alt(f, 0, (unsigned)-1); + struct ffs_function *func = ffs_func_from_usb(f); + struct ffs_data *ffs = func->ffs; + + if (ffs->func) + ffs_func_eps_disable(ffs->func); + + if (ffs->state == FFS_DEACTIVATED) { + ffs->state = FFS_CLOSING; + INIT_WORK(&ffs->reset_work, ffs_reset_work); + schedule_work(&ffs->reset_work); + return; + } + + if (ffs->state == FFS_ACTIVE) { + ffs->func = NULL; + ffs_event_add(ffs, FUNCTIONFS_DISABLE); + } } static int ffs_func_setup(struct usb_function *f, diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 38e8ed3144f0..3f63253ad3e0 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -642,12 +642,21 @@ static void process_ump_stream_msg(struct f_midi2_ep *ep, const u32 *data) if (format) return; // invalid blk = (*data >> 8) & 0xff; - if (blk >= ep->num_blks) - return; - if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) - reply_ump_stream_fb_info(ep, blk); - if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) - reply_ump_stream_fb_name(ep, blk); + if (blk == 0xff) { + /* inquiry for all blocks */ + for (blk = 0; blk < ep->num_blks; blk++) { + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } + } else if (blk < ep->num_blks) { + /* only the specified block */ + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } return; } } diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 89af0feb7512..24299576972f 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -592,16 +592,25 @@ int u_audio_start_capture(struct g_audio *audio_dev) struct usb_ep *ep, *ep_fback; struct uac_rtd_params *prm; struct uac_params *params = &audio_dev->params; - int req_len, i; + int req_len, i, ret; prm = &uac->c_prm; dev_dbg(dev, "start capture with rate %d\n", prm->srate); ep = audio_dev->out_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for out_ep failed (%d)\n", ret); + return ret; + } + req_len = ep->maxpacket; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for out_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { @@ -629,9 +638,18 @@ int u_audio_start_capture(struct g_audio *audio_dev) return 0; /* Setup feedback endpoint */ - config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed in_ep_fback failed (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } + prm->fb_ep_enabled = true; - usb_ep_enable(ep_fback); + ret = usb_ep_enable(ep_fback); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep_fback (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } req_len = ep_fback->maxpacket; req_fback = usb_ep_alloc_request(ep_fback, GFP_ATOMIC); @@ -687,13 +705,17 @@ int u_audio_start_playback(struct g_audio *audio_dev) struct uac_params *params = &audio_dev->params; unsigned int factor; const struct usb_endpoint_descriptor *ep_desc; - int req_len, i; + int req_len, i, ret; unsigned int p_pktsize; prm = &uac->p_prm; dev_dbg(dev, "start playback with rate %d\n", prm->srate); ep = audio_dev->in_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for in_ep failed (%d)\n", ret); + return ret; + } ep_desc = ep->desc; /* @@ -720,7 +742,11 @@ int u_audio_start_playback(struct g_audio *audio_dev) uac->p_residue_mil = 0; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index eec7f7a2e40f..b394105e55d6 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1441,6 +1441,7 @@ void gserial_suspend(struct gserial *gser) spin_lock(&port->port_lock); spin_unlock(&serial_port_lock); port->suspended = true; + port->start_delayed = true; spin_unlock_irqrestore(&port->port_lock, flags); } EXPORT_SYMBOL_GPL(gserial_suspend); diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index b0a613758414..cf6478f97f4a 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -118,12 +118,10 @@ int usb_ep_enable(struct usb_ep *ep) goto out; /* UDC drivers can't handle endpoints with maxpacket size 0 */ - if (usb_endpoint_maxp(ep->desc) == 0) { - /* - * We should log an error message here, but we can't call - * dev_err() because there's no way to find the gadget - * given only ep. - */ + if (!ep->desc || usb_endpoint_maxp(ep->desc) == 0) { + WARN_ONCE(1, "%s: ep%d (%s) has %s\n", __func__, ep->address, ep->name, + (!ep->desc) ? "NULL descriptor" : "maxpacket 0"); + ret = -EINVAL; goto out; } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475daf..937ce5fd5809 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059..4ea2c3e072a9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f82..efdf4c228b8c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be306..1a8d5e80b9ae 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index b98cda1cef73..e24cdb667307 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -382,11 +382,9 @@ static void usb251xb_get_ports_field(struct usb251xb *hub, bool ds_only, u8 *fld) { struct device *dev = hub->dev; - struct property *prop; - const __be32 *p; u32 port; - of_property_for_each_u32(dev->of_node, prop_name, prop, p, port) { + of_property_for_each_u32(dev->of_node, prop_name, port) { if ((port >= ds_only ? 1 : 0) && (port <= port_cnt)) *fld |= BIT(port); else diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 612bea504d7a..0870c6533f80 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -863,4 +863,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Winchiphead CH341 USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 670e942fdaaa..6d6ec7eed87c 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -104,7 +104,7 @@ struct garmin_packet { int seq; /* the real size of the data array, always > 0 */ int size; - __u8 data[]; + __u8 data[] __counted_by(size); }; /* structure used to keep the current state of the driver */ @@ -267,8 +267,7 @@ static int pkt_add(struct garmin_data *garmin_data_p, /* process only packets containing data ... */ if (data_length) { - pkt = kmalloc(sizeof(struct garmin_packet)+data_length, - GFP_ATOMIC); + pkt = kmalloc(struct_size(pkt, data, data_length), GFP_ATOMIC); if (!pkt) return 0; diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index 1f7bb3e4fcf2..942cb0153423 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -1315,4 +1315,5 @@ module_usb_serial_driver(serial_drivers, mxuport_idtable); MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); MODULE_AUTHOR("<support@moxa.com>"); +MODULE_DESCRIPTION("Moxa UPORT USB Serial driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index 20277c52dded..82791fd67c46 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -112,4 +112,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Navman USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c index 929ffba663f2..015bb7c5d19d 100644 --- a/drivers/usb/serial/qcaux.c +++ b/drivers/usb/serial/qcaux.c @@ -84,4 +84,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Qualcomm USB Auxiliary Serial Port driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 09a972a838ee..6b294bf8bc43 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -49,16 +49,6 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); -struct spcp8x5_usb_ctrl_arg { - u8 type; - u8 cmd; - u8 cmd_type; - u16 value; - u16 index; - u16 length; -}; - - /* spcp8x5 spec register define */ #define MCR_CONTROL_LINE_RTS 0x02 #define MCR_CONTROL_LINE_DTR 0x01 diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index d7f73ad6e778..9aabb087f733 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -190,4 +190,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Symbol USB barcode to serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 24b8772a345e..82f4f0b992aa 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -163,4 +163,5 @@ static const struct usb_device_id id_table[] = { MODULE_DEVICE_TABLE(usb, id_table); module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("USB Serial 'Simple' driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index 6934970f180d..61a8425b7762 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -76,6 +76,11 @@ static void usb_debug_process_read_urb(struct urb *urb) usb_serial_generic_process_read_urb(urb); } +static void usb_debug_init_termios(struct tty_struct *tty) +{ + tty->termios.c_lflag &= ~(ECHO | ECHONL); +} + static struct usb_serial_driver debug_device = { .driver = { .owner = THIS_MODULE, @@ -85,6 +90,7 @@ static struct usb_serial_driver debug_device = { .num_ports = 1, .bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -96,6 +102,7 @@ static struct usb_serial_driver dbc_device = { .id_table = dbc_id_table, .num_ports = 1, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -104,4 +111,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table_combined); +MODULE_DESCRIPTION("USB Debug cable driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/typec/mux/fsa4480.c b/drivers/usb/typec/mux/fsa4480.c index cb7cdf90cb0a..cd235339834b 100644 --- a/drivers/usb/typec/mux/fsa4480.c +++ b/drivers/usb/typec/mux/fsa4480.c @@ -13,6 +13,10 @@ #include <linux/usb/typec_dp.h> #include <linux/usb/typec_mux.h> +#define FSA4480_DEVICE_ID 0x00 + #define FSA4480_DEVICE_ID_VENDOR_ID GENMASK(7, 6) + #define FSA4480_DEVICE_ID_VERSION_ID GENMASK(5, 3) + #define FSA4480_DEVICE_ID_REV_ID GENMASK(2, 0) #define FSA4480_SWITCH_ENABLE 0x04 #define FSA4480_SWITCH_SELECT 0x05 #define FSA4480_SWITCH_STATUS1 0x07 @@ -251,6 +255,7 @@ static int fsa4480_probe(struct i2c_client *client) struct typec_switch_desc sw_desc = { }; struct typec_mux_desc mux_desc = { }; struct fsa4480 *fsa; + int val = 0; int ret; fsa = devm_kzalloc(dev, sizeof(*fsa), GFP_KERNEL); @@ -268,6 +273,15 @@ static int fsa4480_probe(struct i2c_client *client) if (IS_ERR(fsa->regmap)) return dev_err_probe(dev, PTR_ERR(fsa->regmap), "failed to initialize regmap\n"); + ret = regmap_read(fsa->regmap, FSA4480_DEVICE_ID, &val); + if (ret || !val) + return dev_err_probe(dev, -ENODEV, "FSA4480 not found\n"); + + dev_dbg(dev, "Found FSA4480 v%lu.%lu (Vendor ID = %lu)\n", + FIELD_GET(FSA4480_DEVICE_ID_VERSION_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_REV_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_VENDOR_ID, val)); + /* Safe mode */ fsa->cur_enable = FSA4480_ENABLE_DEVICE | FSA4480_ENABLE_USB; fsa->mode = TYPEC_STATE_SAFE; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index b862fdf3fe1d..3e3dcb983dde 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -67,7 +67,7 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) return regmap_raw_write(tcpci->regmap, reg, &val, sizeof(u16)); } -static bool tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) +static int tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) { unsigned int reg; int ret; diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 26f9006e95e1..4b02d6474259 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4515,7 +4515,7 @@ static inline enum tcpm_state hard_reset_state(struct tcpm_port *port) return ERROR_RECOVERY; if (port->pwr_role == TYPEC_SOURCE) return SRC_UNATTACHED; - if (port->state == SNK_WAIT_CAPABILITIES) + if (port->state == SNK_WAIT_CAPABILITIES_TIMEOUT) return SNK_READY; return SNK_UNATTACHED; } @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index ea768b19a7f1..dd51a25480bf 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1191,14 +1191,14 @@ static int tps6598x_apply_patch(struct tps6598x *tps) dev_info(tps->dev, "Firmware update succeeded\n"); release_fw: - release_firmware(fw); if (ret) { dev_err(tps->dev, "Failed to write patch %s of %zu bytes\n", firmware_name, fw->size); } + release_firmware(fw); return ret; -}; +} static int cd321x_init(struct tps6598x *tps) { diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dcd3765cc1f5..4039851551c1 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) @@ -238,13 +238,10 @@ static int ucsi_send_command_common(struct ucsi *ucsi, u64 cmd, mutex_lock(&ucsi->ppm_lock); ret = ucsi_run_command(ucsi, cmd, &cci, data, size, conn_ack); - if (cci & UCSI_CCI_BUSY) { - ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false); - return ret ? ret : -EBUSY; - } - - if (cci & UCSI_CCI_ERROR) - return ucsi_read_error(ucsi, connector_num); + if (cci & UCSI_CCI_BUSY) + ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false) ?: -EBUSY; + else if (cci & UCSI_CCI_ERROR) + ret = ucsi_read_error(ucsi, connector_num); mutex_unlock(&ucsi->ppm_lock); return ret; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 82650c11e451..302a89aeb258 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -745,6 +745,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag * */ if (usb_pipedevice(urb->pipe) == 0) { + struct usb_device *old; __u8 type = usb_pipetype(urb->pipe); struct usb_ctrlrequest *ctrlreq = (struct usb_ctrlrequest *) urb->setup_packet; @@ -755,14 +756,15 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto no_need_xmit; } + old = vdev->udev; switch (ctrlreq->bRequest) { case USB_REQ_SET_ADDRESS: /* set_address may come when a device is reset */ dev_info(dev, "SetAddress Request (%d) to port %d\n", ctrlreq->wValue, vdev->rhport); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); spin_lock(&vdev->ud.lock); vdev->ud.status = VDEV_ST_USED; @@ -781,8 +783,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag usbip_dbg_vhci_hc( "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n"); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); goto out; default: @@ -1067,6 +1069,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) static void vhci_device_reset(struct usbip_device *ud) { struct vhci_device *vdev = container_of(ud, struct vhci_device, ud); + struct usb_device *old = vdev->udev; unsigned long flags; spin_lock_irqsave(&ud->lock, flags); @@ -1074,8 +1077,8 @@ static void vhci_device_reset(struct usbip_device *ud) vdev->speed = 0; vdev->devid = 0; - usb_put_dev(vdev->udev); vdev->udev = NULL; + usb_put_dev(old); if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c index 7fa0491bb201..11bd76ae18cf 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c @@ -140,7 +140,7 @@ static int octep_process_mbox(struct octep_hw *oct_hw, u16 id, u16 qid, void *bu val = octep_read_sig(mbox); if ((val & 0xFFFF) != MBOX_RSP_SIG) { dev_warn(&pdev->dev, "Invalid Signature from mbox : %d response\n", id); - return ret; + return -EINVAL; } val = octep_read_sts(mbox); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e31ec9ebc4ce..478cd46a49ed 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1481,13 +1481,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - PFN_DOWN(notify.addr), PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a9b93e99c23a..bc1f962e483b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -305,15 +305,9 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; - if (dev->config->create_avq) { - err = dev->config->create_avq(dev); - if (err) - goto err; - } - err = drv->probe(dev); if (err) - goto err_probe; + goto err; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -326,9 +320,6 @@ static int virtio_dev_probe(struct device *_d) return 0; -err_probe: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -344,9 +335,6 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -524,9 +512,6 @@ int virtio_device_freeze(struct virtio_device *dev) } } - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - return 0; } EXPORT_SYMBOL_GPL(virtio_device_freeze); @@ -562,16 +547,10 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; - if (dev->config->create_avq) { - ret = dev->config->create_avq(dev); - if (ret) - goto err; - } - if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err_restore; + goto err; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -582,9 +561,6 @@ int virtio_device_restore(struct virtio_device *dev) return 0; -err_restore: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7d82facafd75..c44d8ba00c02 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -46,12 +46,26 @@ bool vp_notify(struct virtqueue *vq) return true; } +/* Notify all slow path virtqueues on an interrupt. */ +static void vp_vring_slow_path_interrupt(int irq, + struct virtio_pci_device *vp_dev) +{ + struct virtio_pci_vq_info *info; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->slow_virtqueues, node) + vring_interrupt(irq, info->vq); + spin_unlock_irqrestore(&vp_dev->lock, flags); +} + /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); + vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } @@ -125,6 +139,9 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (!per_vq_vectors) + desc = NULL; + if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ @@ -171,11 +188,17 @@ error: return err; } +static bool vp_is_slow_path_vector(u16 msix_vec) +{ + return msix_vec == VP_MSIX_CONFIG_VECTOR; +} + static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, - u16 msix_vec) + u16 msix_vec, + struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); @@ -194,13 +217,16 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); + if (!vp_is_slow_path_vector(msix_vec)) + list_add(&info->node, &vp_dev->virtqueues); + else + list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } - vp_dev->vqs[index] = info; + *p_info = info; return vq; out_info: @@ -236,13 +262,11 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->is_avq && vp_dev->is_avq(vdev, vq->index)) - continue; - if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; - if (v != VIRTIO_MSI_NO_VECTOR) { + if (v != VIRTIO_MSI_NO_VECTOR && + !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); @@ -284,21 +308,85 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->vqs = NULL; } +enum vp_vq_vector_policy { + VP_VQ_VECTOR_POLICY_EACH, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, + VP_VQ_VECTOR_POLICY_SHARED, +}; + +static struct virtqueue * +vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, + vq_callback_t *callback, const char *name, bool ctx, + bool slow_path, int *allocated_vectors, + enum vp_vq_vector_policy vector_policy, + struct virtio_pci_vq_info **p_info) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq; + u16 msix_vec; + int err; + + if (!callback) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || + (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && + !slow_path)) + msix_vec = (*allocated_vectors)++; + else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && + slow_path) + msix_vec = VP_MSIX_CONFIG_VECTOR; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, + p_info); + if (IS_ERR(vq)) + return vq; + + if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || + msix_vec == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(msix_vec)) + return vq; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], vq); + if (err) { + vp_del_vq(vq); + return ERR_PTR(err); + } + + return vq; +} + static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], - bool per_vq_vectors, + enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; - u16 msix_vec; int i, err, nvectors, allocated_vectors, queue_idx = 0; + struct virtqueue *vq; + bool per_vq_vectors; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto error_find; + } + + per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; + if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -307,13 +395,14 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, if (vqi->name && vqi->callback) ++nvectors; } + if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) + ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, - per_vq_vectors ? desc : NULL); + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; @@ -325,37 +414,27 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - - if (!vqi->callback) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, - vqi->name, vqi->ctx, msix_vec); + vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx, false, + &allocated_vectors, vector_policy, + &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } + } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), vqi->name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, + avq->name, false, true, &allocated_vectors, + vector_policy, &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto error_find; } + return 0; error_find: @@ -368,12 +447,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; + struct virtqueue *vq; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto out_del_vqs; + } + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) @@ -390,13 +478,24 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, - VIRTIO_MSI_NO_VECTOR); + VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, + false, VIRTIO_MSI_NO_VECTOR, + &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto out_del_vqs; + } + return 0; out_del_vqs: vp_del_vqs(vdev); @@ -411,11 +510,20 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_EACH, desc); + if (!err) + return 0; + /* Fallback: MSI-X with one shared vector for config and + * slow path queues, one vector per queue for the rest. + */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ @@ -466,7 +574,8 @@ const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || - vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR) + vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, @@ -574,6 +683,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); + INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 3c4bb2d6163a..1d9c49947f52 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -35,7 +35,7 @@ struct virtio_pci_vq_info { /* the actual virtqueue */ struct virtqueue *vq; - /* the list node for the virtqueues list */ + /* the list node for the virtqueues or slow_virtqueues list */ struct list_head node; /* MSI-X vector (or none) */ @@ -44,9 +44,9 @@ struct virtio_pci_vq_info { struct virtio_pci_admin_vq { /* Virtqueue info associated with this admin queue. */ - struct virtio_pci_vq_info info; - /* serializing admin commands execution and virtqueue deletion */ - struct mutex cmd_lock; + struct virtio_pci_vq_info *info; + /* Protects virtqueue access. */ + spinlock_t lock; u64 supported_cmds; /* Name of the admin queue: avq.$vq_index. */ char name[10]; @@ -66,9 +66,12 @@ struct virtio_pci_device { /* Where to read and clear interrupt */ u8 __iomem *isr; - /* a list of queues so we can dispatch IRQs */ + /* Lists of queues and potentially slow path queues + * so we can dispatch IRQs. + */ spinlock_t lock; struct list_head virtqueues; + struct list_head slow_virtqueues; /* Array of all virtqueues reported in the * PCI common config num_queues field @@ -102,7 +105,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); - bool (*is_avq)(struct virtio_device *vdev, unsigned int index); + int (*avq_index)(struct virtio_device *vdev, u16 *index, u16 *num); }; /* Constants for MSI-X */ @@ -175,6 +178,7 @@ struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev); #define VIRTIO_ADMIN_CMD_BITMAP 0 #endif +void vp_modern_avq_done(struct virtqueue *vq); int vp_modern_admin_cmd_exec(struct virtio_device *vdev, struct virtio_admin_cmd *cmd); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 3b5b9499a53a..9193c30d640a 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -28,6 +28,21 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static int vp_avq_index(struct virtio_device *vdev, u16 *index, u16 *num) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + *num = 0; + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return 0; + + *num = vp_modern_avq_num(&vp_dev->mdev); + if (!(*num)) + return -EINVAL; + *index = vp_modern_avq_index(&vp_dev->mdev); + return 0; +} + static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -38,17 +53,35 @@ static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) return index == vp_dev->admin_vq.vq_index; } +void vp_modern_avq_done(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&admin_vq->lock, flags); + do { + virtqueue_disable_cb(vq); + while ((cmd = virtqueue_get_buf(vq, &len))) + complete(&cmd->completion); + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&admin_vq->lock, flags); +} + static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, u16 opcode, struct scatterlist **sgs, unsigned int out_num, unsigned int in_num, - void *data) + struct virtio_admin_cmd *cmd) { struct virtqueue *vq; - int ret, len; + unsigned long flags; + int ret; - vq = admin_vq->info.vq; + vq = admin_vq->info->vq; if (!vq) return -EIO; @@ -57,21 +90,33 @@ static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, !((1ULL << opcode) & admin_vq->supported_cmds)) return -EOPNOTSUPP; - ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, data, GFP_KERNEL); - if (ret < 0) - return -EIO; + init_completion(&cmd->completion); - if (unlikely(!virtqueue_kick(vq))) +again: + if (virtqueue_is_broken(vq)) return -EIO; - while (!virtqueue_get_buf(vq, &len) && - !virtqueue_is_broken(vq)) - cpu_relax(); + spin_lock_irqsave(&admin_vq->lock, flags); + ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_KERNEL); + if (ret < 0) { + if (ret == -ENOSPC) { + spin_unlock_irqrestore(&admin_vq->lock, flags); + cpu_relax(); + goto again; + } + goto unlock_err; + } + if (!virtqueue_kick(vq)) + goto unlock_err; + spin_unlock_irqrestore(&admin_vq->lock, flags); - if (virtqueue_is_broken(vq)) - return -EIO; + wait_for_completion(&cmd->completion); - return 0; + return cmd->ret; + +unlock_err: + spin_unlock_irqrestore(&admin_vq->lock, flags); + return -EIO; } int vp_modern_admin_cmd_exec(struct virtio_device *vdev, @@ -122,12 +167,9 @@ int vp_modern_admin_cmd_exec(struct virtio_device *vdev, in_num++; } - mutex_lock(&vp_dev->admin_vq.cmd_lock); ret = virtqueue_exec_admin_cmd(&vp_dev->admin_vq, le16_to_cpu(cmd->opcode), - sgs, out_num, in_num, sgs); - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - + sgs, out_num, in_num, cmd); if (ret) { dev_err(&vdev->dev, "Failed to execute command on admin vq: %d\n.", ret); @@ -188,25 +230,29 @@ end: static void vp_modern_avq_activate(struct virtio_device *vdev) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_unbreak(admin_vq->info.vq); virtio_pci_admin_cmd_list_init(vdev); } -static void vp_modern_avq_deactivate(struct virtio_device *vdev) +static void vp_modern_avq_cleanup(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + struct virtqueue *vq; if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_break(admin_vq->info.vq); + vq = vp_dev->vqs[vp_dev->admin_vq.vq_index]->vq; + if (!vq) + return; + + while ((cmd = virtqueue_detach_unused_buf(vq))) { + cmd->ret = -EIO; + complete(&cmd->completion); + } } static void vp_transport_features(struct virtio_device *vdev, u64 features) @@ -403,7 +449,7 @@ static void vp_reset(struct virtio_device *vdev) while (vp_modern_get_status(mdev)) msleep(1); - vp_modern_avq_deactivate(vdev); + vp_modern_avq_cleanup(vdev); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); @@ -552,8 +598,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (index >= vp_modern_get_num_queues(mdev) && !is_avq) return ERR_PTR(-EINVAL); - num = is_avq ? - VIRTIO_AVQ_SGS_MAX : vp_modern_get_queue_size(mdev, index); + num = vp_modern_get_queue_size(mdev, index); /* Check if queue is either not available or already active. */ if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); @@ -580,12 +625,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, goto err; } - if (is_avq) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = vq; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - return vq; err: @@ -620,12 +659,6 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - if (vp_is_avq(&vp_dev->vdev, vq->index)) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = NULL; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - if (vp_dev->msix_enabled) vp_modern_queue_vector(mdev, vq->index, VIRTIO_MSI_NO_VECTOR); @@ -735,45 +768,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, return true; } -static int vp_modern_create_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *avq; - struct virtqueue *vq; - u16 admin_q_num; - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return 0; - - admin_q_num = vp_modern_avq_num(&vp_dev->mdev); - if (!admin_q_num) - return -EINVAL; - - avq = &vp_dev->admin_vq; - avq->vq_index = vp_modern_avq_index(&vp_dev->mdev); - sprintf(avq->name, "avq.%u", avq->vq_index); - vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL, - avq->name, NULL, VIRTIO_MSI_NO_VECTOR); - if (IS_ERR(vq)) { - dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld", - PTR_ERR(vq)); - return PTR_ERR(vq); - } - - vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true); - return 0; -} - -static void vp_modern_destroy_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return; - - vp_dev->del_vq(&vp_dev->admin_vq.info); -} - static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get = NULL, .set = NULL, @@ -792,8 +786,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -814,8 +806,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; /* the PCI probing function */ @@ -839,11 +829,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; - vp_dev->is_avq = vp_is_avq; + vp_dev->avq_index = vp_avq_index; vp_dev->isr = mdev->isr; vp_dev->vdev.id = mdev->id; - mutex_init(&vp_dev->admin_vq.cmd_lock); + spin_lock_init(&vp_dev->admin_vq.lock); return 0; } @@ -851,6 +841,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) { struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - mutex_destroy(&vp_dev->admin_vq.cmd_lock); vp_modern_remove(mdev); } diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a97ceb105cd8..24fdc74caeba 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, err ?: total, false); } diff --git a/fs/afs/file.c b/fs/afs/file.c index c3f0c45ae9a9..ec1be0091fdb 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op) req->error = error; if (subreq) { - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, error ?: req->actual_len, false); req->subreq = NULL; } else if (req->done) { diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0..331a17f3f113 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278..fe730a6bf0c1 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708..fd3a2522bc3e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: @@ -829,7 +824,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e..fd790b03fbe1 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; @@ -142,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ @@ -166,8 +176,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } @@ -232,52 +242,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292e..8563c2d26847 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } @@ -1738,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); @@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026..386d231ceca3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a982..d4da6343efa9 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; @@ -763,27 +761,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98..7daecadb764e 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd..0c7086e00d18 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ @@ -893,6 +894,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddf..c75f2e0f32bb 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,9 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -836,6 +838,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0..e34cb2bf329c 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c..88d8958281e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb..3df3dd2723a1 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a947..eb3002c4eae7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4a..56ea9a77cd4a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1767,6 +1744,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { @@ -1952,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b..2e84d22e17bd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; @@ -1921,6 +1922,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c..dca62375d7d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b..79954490627c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1..51d6289b8dee 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,27 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; +} + +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; } int bch2_btree_key_cache_journal_flush(struct journal *, diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 001107226377..b28c649c6838 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9..a0101d9c5d83 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -927,7 +883,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); @@ -973,9 +929,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; @@ -1060,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1084,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2..b3454d4619e8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d24663..be2bbd248631 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, @@ -806,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { @@ -897,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -907,7 +924,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", @@ -1028,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1045,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d..edbdffd508fc 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d..f70eb2127d32 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead..6a854c918496 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5..45aec1afdb0e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308e..32bfdf19289a 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a09..8945145865c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23..e972e2bca546 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,73 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +static inline bool is_zero(char *start, char *end) { - return 0; + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) +{ + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], + c, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) @@ -465,6 +527,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -501,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -530,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && @@ -697,6 +762,15 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -712,9 +786,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69..f29fd0dd9581 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ @@ -107,41 +106,20 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -162,13 +140,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b33..7b6e6c97e6aa 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -124,20 +125,23 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; +} __packed; + +struct bch_acct_inum { + __u64 inum; +} __packed; + +struct bch_acct_rebalance_work { }; struct disk_accounting_pos { @@ -149,12 +153,13 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; - }; - }; + struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c63..141a4c63142f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -1809,6 +1808,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) @@ -2235,6 +2237,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2266,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb6249..90962b3c0130 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bf..ab5a7adece10 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820..95afa7bf2020 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad..2f1b86978f36 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <linux/printk.h> +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35f..4419ad3e454e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec..1a6ddee48041 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a..94c392abef65 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1..2be6be33afa3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a745099..f1fcb4c58039 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8..177ed331c00b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12..7ee3b75480df 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d97..1d4761d15002 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63..7664b68e6a15 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb371..96f2f4f8c397 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11..e6a7d8241bb8 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f..cda1725702ea 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0a..c32a05e252e2 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218..a62abcc5332a 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525..e59c0abb4772 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67a..51afe11d8ed6 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047a..1223b710755d 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731..650a1f77ca40 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,6 +61,20 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ @@ -79,6 +93,21 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a..d3a498617303 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5..8b18a9b483a4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e962..eb5ef64221d6 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b55862..dbe834cb349f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb2..a8299ba2cab2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc819832790..c8c2ccbdfbb5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec..e7fa2de35014 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276..f393023a3ae2 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b4..dfad1d06633d 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb..c62f00322d1e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c..f2b4c17a0307 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c85..c188a5ad64ce 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c26545d71d39..cd6d5bbb4b9d 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -72,8 +72,10 @@ #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET #define DATA_START_OFFSET_WORDS (0) +#define MAX_SHARED_LIBS_UPDATE (0) #else #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS) +#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS) #endif struct lib_info { @@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm) return res; /* Update data segment pointers for all libraries */ - for (i = 0; i < MAX_SHARED_LIBS; i++) { + for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) { if (!libinfo.lib_list[i].loaded) continue; for (j = 0; j < MAX_SHARED_LIBS; j++) { diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c216..2e49d978f504 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4..75fa563e4cac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2ac9296edccb..06a9e0542d70 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ef15e998be03..05f634eb472d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4..67adbe9d294a 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c..feec49e6f9c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); @@ -5471,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 parent, int level) { + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *head; struct btrfs_path *path; struct btrfs_extent_inline_ref *iref; int ret; + bool exists = false; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: ret = lookup_extent_backref(trans, path, &iref, bytenr, root->fs_info->nodesize, parent, btrfs_root_id(root), level, 0); + if (ret != -ENOENT) { + /* + * If we get 0 then we found our reference, return 1, else + * return the error if it's not -ENOENT; + */ + btrfs_free_path(path); + return (ret < 0 ) ? ret : 1; + } + + /* + * We could have a delayed ref with this reference, so look it up while + * we're holding the path open to make sure we don't race with the + * delayed ref running. + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + if (!head) + goto out; + if (!mutex_trylock(&head->mutex)) { + /* + * We're contended, means that the delayed ref is running, get a + * reference and wait for the ref head to be complete and then + * try again. + */ + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref_head(head); + goto again; + } + + exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + mutex_unlock(&head->mutex); +out: + spin_unlock(&delayed_refs->lock); btrfs_free_path(path); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - return 1; + return exists ? 1 : 0; } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa7f8148cd0d..c73cd4f89015 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, free_extent_map(em); em = NULL; + /* + * Although the PageDirty bit might be cleared before entering + * this function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * page for range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); btrfs_set_range_writeback(inode, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(inode->root->fs_info, @@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, page->index, cur, end); } - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * page for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); submit_extent_page(bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80..10ac5f657e38 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } @@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c return 0; /* - * We want to be fast because we can be called from any path trying to - * allocate memory, so if the lock is busy we don't want to spend time + * We want to be fast so if the lock is busy we don't want to spend time * waiting for it - either some task is about to do IO for the inode or * we may have another task shrinking extent maps, here in this code, so * skip this inode. @@ -1191,9 +1190,7 @@ next: /* * Stop if we need to reschedule or there's contention on the * lock. This is to avoid slowing other tasks trying to take the - * lock and because the shrinker might be called during a memory - * allocation path and we want to avoid taking a very long time - * and slowing down all sorts of tasks. + * lock. */ if (need_resched() || rwlock_needbreak(&tree->lock)) break; @@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx if (ctx->scanned >= ctx->nr_to_scan) break; - /* - * We may be called from memory allocation paths, so we don't - * want to take too much time and slowdown tasks. - */ - if (need_resched()) - break; + cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) ctx.last_ino); } - /* - * We may be called from memory allocation paths, so we don't want to - * take too much time and slowdown tasks, so stop if we need reschedule. - */ - while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { + while (ctx.scanned < ctx.nr_to_scan) { struct btrfs_root *root; unsigned long count; + cond_resched(); + spin_lock(&fs_info->fs_roots_radix_lock); count = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)&root, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6..9914419f3b7d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -1857,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543a..eaa1dbd31352 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,18 +2719,19 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; - spin_unlock(&ctl->tree_lock); + WARN_ON(block_group->zone_unusable > block_group->length); + } if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2743,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab6955647..b1b6564ab68f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -1581,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1594,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2255,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; @@ -4188,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); @@ -5660,7 +5668,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; @@ -7198,6 +7206,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10133,6 +10147,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da3..fc821aa446f0 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d7100018..0de9162ff481 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ca711a773ef..619fa0b8b3f6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 offset = key->offset; u64 end; u64 bs = sctx->send_root->fs_info->sectorsize; + struct btrfs_file_extent_item *ei; + u64 disk_byte; + u64 data_offset; + u64 num_bytes; + struct btrfs_inode_info info = { 0 }; end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); if (offset >= end) return 0; - if (clone_root && IS_ALIGNED(end, bs)) { - struct btrfs_file_extent_item *ei; - u64 disk_byte; - u64 data_offset; + num_bytes = end - offset; - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); - data_offset = btrfs_file_extent_offset(path->nodes[0], ei); - ret = clone_range(sctx, path, clone_root, disk_byte, - data_offset, offset, end - offset); - } else { - ret = send_extent_data(sctx, path, offset, end - offset); - } + if (!clone_root) + goto write_data; + + if (IS_ALIGNED(end, bs)) + goto clone_data; + + /* + * If the extent end is not aligned, we can clone if the extent ends at + * the i_size of the inode and the clone range ends at the i_size of the + * source inode, otherwise the clone operation fails with -EINVAL. + */ + if (end != sctx->cur_inode_size) + goto write_data; + + ret = get_inode_info(clone_root->root, clone_root->ino, &info); + if (ret < 0) + return ret; + + if (clone_root->offset + num_bytes == info.size) + goto clone_data; + +write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); + sctx->cur_inode_next_write_offset = end; + return ret; + +clone_data: + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); + data_offset = btrfs_file_extent_offset(path->nodes[0], ei); + ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset, + num_bytes); sctx->cur_inode_next_write_offset = end; return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8..68e14fd48638 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c16..88b44221ce97 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fb..98fa0f382480 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,6 +28,7 @@ #include <linux/btrfs.h> #include <linux/security.h> #include <linux/fs_parser.h> +#include <linux/swap.h> #include "messages.h" #include "delayed-inode.h" #include "ctree.h" @@ -683,8 +684,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } @@ -2398,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) @@ -2406,6 +2416,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); + /* + * We may be called from any task trying to allocate memory and we don't + * want to slow it down with scanning and dropping extent maps. It would + * also cause heavy lock contention if many tasks concurrently enter + * here. Therefore only allow kswapd tasks to scan and drop extent maps. + */ + if (!current_is_kswapd()) + return 0; + return btrfs_free_extent_maps(fs_info, nr_to_scan); } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b8..56e61ac1cc64 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [120K, 128K)"); + goto out; + } + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + ret = -ENOMEM; + goto out; + } + + /* + * Compressed extent for the file range [108K, 144K), which overlaps + * with the [120K, 128K) we previously inserted. + */ + em->start = SZ_1K * 108; + em->len = SZ_1K * 36; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_1K * 36; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + + /* + * Try to add the extent map but with a search range of [140K, 144K), + * this should succeed and adjust the extent map to the range + * [128K, 144K), with a length of 16K and an offset of 20K. + * + * This simulates a scenario where in the subvolume tree of an inode we + * have a compressed file extent item for the range [108K, 144K) and we + * have an overlapping compressed extent map for the range [120K, 128K), + * which was created by an encoded write, but its ordered extent was not + * yet completed, so the subvolume tree doesn't have yet the file extent + * item for that range - we only have the extent map in the inode's + * extent map tree. + */ + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [108K, 144K)"); + goto out; + } + + if (em->start != SZ_128K) { + test_err("unexpected extent map start %llu (should be 128K)", em->start); + ret = -EINVAL; + goto out; + } + if (em->len != SZ_16K) { + test_err("unexpected extent map length %llu (should be 16K)", em->len); + ret = -EINVAL; + goto out; + } + if (em->offset != SZ_1K * 20) { + test_err("unexpected extent map offset %llu (should be 20K)", em->offset); + ret = -EINVAL; + goto out; + } +out: + ret2 = free_extent_map_tree(inode); + if (ret == 0) + ret = ret2; + + return ret; +} + struct rmap_test_vector { u64 raid_type; u64 physical_start; @@ -1078,6 +1174,9 @@ int btrfs_test_extent_map(void) ret = test_case_7(fs_info, BTRFS_I(inode)); if (ret) goto out; + ret = test_case_8(fs_info, BTRFS_I(inode)); + if (ret) + goto out; test_msg("running rmap tests"); for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) { diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6388786fd8b5..634d69964fe4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -569,9 +569,10 @@ static int check_dir_item(struct extent_buffer *leaf, /* dir type check */ dir_type = btrfs_dir_ftype(leaf, di); - if (unlikely(dir_type >= BTRFS_FT_MAX)) { + if (unlikely(dir_type <= BTRFS_FT_UNKNOWN || + dir_type >= BTRFS_FT_MAX)) { dir_item_err(leaf, slot, - "invalid dir item type, have %u expect [0, %u)", + "invalid dir item type, have %u expect (0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; } @@ -634,7 +635,7 @@ static int check_dir_item(struct extent_buffer *leaf, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); @@ -1289,6 +1290,19 @@ static void extent_err(const struct extent_buffer *eb, int slot, va_end(args); } +static bool is_valid_dref_root(u64 rootid) +{ + /* + * The following tree root objectids are allowed to have a data backref: + * - subvolume trees + * - data reloc tree + * - tree root + * For v1 space cache + */ + return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + rootid == BTRFS_ROOT_TREE_OBJECTID; +} + static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) @@ -1441,6 +1455,8 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; + u64 dref_root; + u64 dref_objectid; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1484,11 +1500,26 @@ static int check_extent_item(struct extent_buffer *leaf, */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); + dref_root = btrfs_extent_data_ref_root(leaf, dref); + dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); + if (unlikely(!is_valid_dref_root(dref_root))) { + extent_err(leaf, slot, + "invalid data ref root value %llu", + dref_root); + return -EUCLEAN; + } + if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || + dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid data ref objectid value %llu", + dref_root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1627,6 +1658,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { + u64 root; + u64 objectid; u64 offset; /* @@ -1634,7 +1667,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf, * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; + root = btrfs_extent_data_ref_root(leaf, dref); + objectid = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); + if (unlikely(!is_valid_dref_root(root))) { + extent_err(leaf, slot, + "invalid extent data backref root value %llu", + root); + return -EUCLEAN; + } + if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || + objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid extent data backref objectid value %llu", + root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", @@ -1716,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1752,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c16bc5250ef..c4744a02db75 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (err >= 0) { if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); - if (err < subreq->len) + if (err < subreq->len && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { err = ceph_fscrypt_decrypt_extents(inode, @@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); if (subreq->start >= inode->i_size) @@ -424,6 +426,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) struct ceph_netfs_request_data *priv; int ret = 0; + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + if (rreq->origin != NETFS_READAHEAD) return 0; @@ -498,6 +503,11 @@ const struct netfs_request_ops ceph_netfs_ops = { }; #ifdef CONFIG_CEPH_FSCACHE +static void ceph_set_page_fscache(struct page *page) +{ + folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ +} + static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; @@ -515,6 +525,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b ceph_fscache_write_terminated, inode, true, caching); } #else +static inline void ceph_set_page_fscache(struct page *page) +{ +} + static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } @@ -706,6 +720,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = wlen; set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { @@ -789,6 +805,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ + err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 @@ -1062,7 +1080,8 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page)) { + if (PageWriteback(page) || + PagePrivate2(page) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", page); unlock_page(page); @@ -1070,6 +1089,7 @@ get_more_pages: } doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { @@ -1254,6 +1274,8 @@ new_request: } set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c4941ba245ac..808c9c048276 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2097,7 +2099,7 @@ retry: } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2105,7 +2107,8 @@ retry: ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2180,6 +2183,11 @@ retry: queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3067,10 +3075,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, flags, &_got); WARN_ON_ONCE(ret == -EAGAIN); if (!ret) { +#ifdef CONFIG_DEBUG_FS struct ceph_mds_client *mdsc = fsc->mdsc; struct cap_wait cw; +#endif DEFINE_WAIT_FUNC(wait, woken_wake_function); +#ifdef CONFIG_DEBUG_FS cw.ino = ceph_ino(inode); cw.tgid = current->tgid; cw.need = need; @@ -3079,6 +3090,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, spin_lock(&mdsc->caps_list_lock); list_add(&cw.list, &mdsc->cap_wait_list); spin_unlock(&mdsc->caps_list_lock); +#endif /* make sure used fmode not timeout */ ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS); @@ -3097,9 +3109,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, remove_wait_queue(&ci->i_cap_wq, &wait); ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS); +#ifdef CONFIG_DEBUG_FS spin_lock(&mdsc->caps_list_lock); list_del(&cw.list); spin_unlock(&mdsc->caps_list_lock); +#endif if (ret == -EAGAIN) continue; @@ -3504,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3699,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3735,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3792,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5aadc56e0cc0..18c72b305858 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) } spin_lock(&mdsc->dentry_list_lock); - __dentry_dir_lease_touch(mdsc, di), + __dentry_dir_lease_touch(mdsc, di); spin_unlock(&mdsc->dentry_list_lock); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8f8de8f33abb..71cd70514efa 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); spin_lock_init(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c2157f6e0c69..276e34ab3e2c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); + ceph_flush_cap_releases(mdsc, s); + mutex_lock(&s->s_mutex); if (renew_caps) send_renew_caps(mdsc, s); @@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; INIT_LIST_HEAD(&mdsc->cap_delay_list); +#ifdef CONFIG_DEBUG_FS INIT_LIST_HEAD(&mdsc->cap_wait_list); +#endif spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); INIT_LIST_HEAD(&mdsc->snap_flush_list); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index cfa18cf915a0..9bcc7f181bfe 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -416,6 +416,8 @@ struct ceph_quotarealm_inode { struct inode *inode; }; +#ifdef CONFIG_DEBUG_FS + struct cap_wait { struct list_head list; u64 ino; @@ -424,6 +426,8 @@ struct cap_wait { int want; }; +#endif + enum { CEPH_MDSC_STOPPING_BEGIN = 1, CEPH_MDSC_STOPPING_FLUSHING = 2, @@ -512,7 +516,9 @@ struct ceph_mds_client { spinlock_t caps_list_lock; struct list_head caps_list; /* unused (reserved or unreserved) */ +#ifdef CONFIG_DEBUG_FS struct list_head cap_wait_list; +#endif int caps_total_count; /* total caps allocated */ int caps_use_count; /* in use */ int caps_use_max; /* max used caps */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 885cb5d4e771..0cdf84cd1791 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -961,7 +961,8 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; - ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, + (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); if (!ceph_wb_pagevec_pool) goto bad_pagevec_pool; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b63b4cd9b5b6..6e817bf1337c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8be60797ea2f..1b7eba38ba1e 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -21,38 +21,32 @@ void erofs_put_metabuf(struct erofs_buf *buf) if (!buf->page) return; erofs_unmap_metabuf(buf); - put_page(buf->page); + folio_put(page_folio(buf->page)); buf->page = NULL; } -/* - * Derive the block size from inode->i_blkbits to make compatible with - * anonymous inode in fscache mode. - */ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, enum erofs_kmap_type type) { pgoff_t index = offset >> PAGE_SHIFT; - struct page *page = buf->page; - struct folio *folio; - unsigned int nofs_flag; + struct folio *folio = NULL; - if (!page || page->index != index) { + if (buf->page) { + folio = page_folio(buf->page); + if (folio_file_page(folio, index) != buf->page) + erofs_unmap_metabuf(buf); + } + if (!folio || !folio_contains(folio, index)) { erofs_put_metabuf(buf); - - nofs_flag = memalloc_nofs_save(); - folio = read_cache_folio(buf->mapping, index, NULL, NULL); - memalloc_nofs_restore(nofs_flag); + folio = read_mapping_folio(buf->mapping, index, NULL); if (IS_ERR(folio)) return folio; - - /* should already be PageUptodate, no need to lock page */ - page = folio_file_page(folio, index); - buf->page = page; } + buf->page = folio_file_page(folio, index); + if (buf->kmap_type == EROFS_NO_KMAP) { if (type == EROFS_KMAP) - buf->base = kmap_local_page(page); + buf->base = kmap_local_page(buf->page); buf->kmap_type = type; } else if (buf->kmap_type != type) { DBG_BUGON(1); diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 06a722b85a45..40666815046f 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -188,7 +188,7 @@ again: !rq->partial_decoding); buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in); rq->inputsize -= buf.in_size; - buf.in = dctx.kin + rq->pageofs_in, + buf.in = dctx.kin + rq->pageofs_in; dctx.bounce = strm->bounce; do { dctx.avail_out = buf.out_size - buf.out_pos; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f..c3b90abdee37 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 5f6439a63af7..419432be3223 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); +#endif + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } out_unlock: erofs_put_metabuf(&buf); return err; @@ -334,14 +332,29 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags) { struct inode *const inode = d_inode(path->dentry); + bool compressed = + erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout); - if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) + if (compressed) stat->attributes |= STATX_ATTR_COMPRESSED; - stat->attributes |= STATX_ATTR_IMMUTABLE; stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); + /* + * Return the DIO alignment restrictions if requested. + * + * In EROFS, STATX_DIOALIGN is not supported in ondemand mode and + * compressed files, so in these cases we report no DIO support. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN; + if (!erofs_is_fscache_mode(inode->i_sb) && !compressed) { + stat->dio_mem_align = + bdev_logical_block_size(inode->i_sb->s_bdev); + stat->dio_offset_align = stat->dio_mem_align; + } + } generic_fillattr(idmap, request_mask, inode, stat); return 0; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396..45dc15ebd870 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 35268263aaed..6cb5c8916174 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { @@ -576,6 +562,21 @@ static const struct export_operations erofs_export_ops = { .get_parent = erofs_get_parent, }; +static void erofs_set_sysfs_name(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + if (erofs_is_fscache_mode(sb)) { + if (sbi->domain_id) + super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id, + sbi->fsid); + else + super_set_sysfs_name_generic(sb, "%s", sbi->fsid); + return; + } + super_set_sysfs_name_id(sb); +} + static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -643,6 +644,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_POSIXACL; else sb->s_flags &= ~SB_POSIXACL; + erofs_set_sysfs_name(sb); #ifdef CONFIG_EROFS_FS_ZIP xa_init(&sbi->managed_pslots); diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index b80f612867c2..37afe2024840 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -38,11 +38,13 @@ void *z_erofs_get_gbuf(unsigned int requiredpages) { struct z_erofs_gbuf *gbuf; + migrate_disable(); gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; spin_lock(&gbuf->lock); /* check if the buffer is too small */ if (requiredpages > gbuf->nrpages) { spin_unlock(&gbuf->lock); + migrate_enable(); /* (for sparse checker) pretend gbuf->lock is still taken */ __acquire(gbuf->lock); return NULL; @@ -57,6 +59,7 @@ void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; DBG_BUGON(gbuf->ptr != ptr); spin_unlock(&gbuf->lock); + migrate_enable(); } int z_erofs_gbuf_growsize(unsigned int nrpages) @@ -108,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } diff --git a/fs/exec.c b/fs/exec.c index a126e3d1cacb..50e76cc633c4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1692,6 +1692,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; + int err; if (!mnt_may_suid(file->f_path.mnt)) return; @@ -1708,12 +1709,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) /* Be careful if suid/sgid is set */ inode_lock(inode); - /* reload atomically mode/uid/gid now that lock held */ + /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); + /* Did the exec bit vanish out from under us? Give up. */ + if (err) + return; + /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) diff --git a/fs/file.c b/fs/file.c index a3b72aa64f11..655338effe9c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu) #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) +#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ -static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, - unsigned int count) +static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, + unsigned int copy_words) { - unsigned int cpy, set; - - cpy = count / BITS_PER_BYTE; - set = (nfdt->max_fds - count) / BITS_PER_BYTE; - memcpy(nfdt->open_fds, ofdt->open_fds, cpy); - memset((char *)nfdt->open_fds + cpy, 0, set); - memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); - memset((char *)nfdt->close_on_exec + cpy, 0, set); - - cpy = BITBIT_SIZE(count); - set = BITBIT_SIZE(nfdt->max_fds) - cpy; - memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); - memset((char *)nfdt->full_fds_bits + cpy, 0, set); + unsigned int nwords = fdt_words(nfdt); + + bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits, + copy_words, nwords); } /* @@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); + copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt)); } /* @@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int open_files = sane_fdtable_size(old_fdt, max_fds); } - copy_fd_bitmaps(new_fdt, old_fdt, open_files); + copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG); old_fds = old_fdt->fd; new_fds = new_fdt->fd; @@ -1248,6 +1244,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de..7146038b2fe7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 777c1f77ff58..22df574ca99e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -17,6 +17,7 @@ #include <linux/writeback.h> #include <linux/mount.h> #include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/namei.h> #include "hostfs.h" #include <init.h> @@ -929,7 +930,6 @@ static const struct inode_operations hostfs_link_iops = { static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct hostfs_fs_info *fsi = sb->s_fs_info; - const char *host_root = fc->source; struct inode *root_inode; int err; @@ -943,15 +943,6 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - /* NULL is printed as '(null)' by printf(): avoid that. */ - if (fc->source == NULL) - host_root = ""; - - fsi->host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); - if (fsi->host_root_path == NULL) - return -ENOMEM; - root_inode = hostfs_iget(sb, fsi->host_root_path); if (IS_ERR(root_inode)) return PTR_ERR(root_inode); @@ -977,6 +968,58 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) return 0; } +enum hostfs_parma { + Opt_hostfs, +}; + +static const struct fs_parameter_spec hostfs_param_specs[] = { + fsparam_string_empty("hostfs", Opt_hostfs), + {} +}; + +static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + struct fs_parse_result result; + char *host_root; + int opt; + + opt = fs_parse(fc, hostfs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_hostfs: + host_root = param->string; + if (!*host_root) + host_root = ""; + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + break; + } + + return 0; +} + +static int hostfs_parse_monolithic(struct fs_context *fc, void *data) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + char *host_root = (char *)data; + + /* NULL is printed as '(null)' by printf(): avoid that. */ + if (host_root == NULL) + host_root = ""; + + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + + return 0; +} + static int hostfs_fc_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, hostfs_fill_super); @@ -994,6 +1037,8 @@ static void hostfs_fc_free(struct fs_context *fc) } static const struct fs_context_operations hostfs_context_ops = { + .parse_monolithic = hostfs_parse_monolithic, + .parse_param = hostfs_parse_param, .get_tree = hostfs_fc_get_tree, .free = hostfs_fc_free, }; diff --git a/fs/inode.c b/fs/inode.c index 86670941884b..10c4619faeef 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode) this_cpu_dec(nr_unused); } +static void inode_pin_lru_isolating(struct inode *inode) +{ + lockdep_assert_held(&inode->i_lock); + WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode->i_state |= I_LRU_ISOLATING; +} + +static void inode_unpin_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); + inode->i_state &= ~I_LRU_ISOLATING; + smp_mb(); + wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); +} + +static void inode_wait_for_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_state & I_LRU_ISOLATING) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); + __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_LRU_ISOLATING); + } + spin_unlock(&inode->i_lock); +} + /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add @@ -657,6 +690,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_wait_for_lru_isolating(inode); + /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since @@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * be under pressure before the cache inside the highmem zone. */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { - __iget(inode); + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); if (remove_inode_buffers(inode)) { @@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, __count_vm_events(PGINODESTEAL, reap); mm_account_reclaimed_pages(reap); } - iput(inode); + inode_unpin_lru_isolating(inode); spin_lock(lru_lock); return LRU_RETRY; } diff --git a/fs/libfs.c b/fs/libfs.c index 8aa34870449f..02602d00939e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } +static int offset_dir_open(struct inode *inode, struct file *file) +{ + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + + file->private_data = (void *)ctx->next_offset; + return 0; +} + /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx) */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { + struct inode *inode = file->f_inode; + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + switch (whence) { case SEEK_CUR: offset += file->f_pos; @@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) } /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; + if (!offset) + file->private_data = (void *)ctx->next_offset; return vfs_setpos(file, offset, LONG_MAX); } @@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return ERR_PTR(-ENOENT); + return; + + if (dentry2offset(dentry) >= last_index) { + dput(dentry); + return; + } if (!offset_dir_emit(ctx, dentry)) { dput(dentry); - break; + return; } ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; } /** @@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; + long last_index = (long)file->private_data; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + offset_iterate_dir(d_inode(dir), ctx, last_index); return 0; } const struct file_operations simple_offset_dir_operations = { + .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, diff --git a/fs/locks.c b/fs/locks.c index 9afb16e0683f..e45cad40f8b6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2984,7 +2984,7 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - filelease_cache = kmem_cache_create("file_lock_cache", + filelease_cache = kmem_cache_create("file_lease_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index 1b78e8b65ebc..7701c037c328 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -24,7 +24,7 @@ config NETFS_STATS config NETFS_DEBUG bool "Enable dynamic debugging netfslib and FS-Cache" - depends on NETFS + depends on NETFS_SUPPORT help This permits debugging to be dynamically enabled in the local caching management module. If this is set, the debugging output may be diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a688d4c75d99..27c750d39476 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -10,6 +10,97 @@ #include "internal.h" /* + * [DEPRECATED] Unlock the folios in a read operation for when the filesystem + * is using PG_private_2 and direct writing to the cache from here rather than + * marking the page for writeback. + * + * Note that we don't touch folio->private in this code. + */ +static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, + size_t *account) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + /* Walk through the pagecache and the I/O request lists simultaneously. + * We may have a mixture of cached and uncached sections and we only + * really want to write out the uncached sections. This is slightly + * complicated by the possibility that we might have huge pages with a + * mixture inside. + */ + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); + + rcu_read_lock(); + xas_for_each(&xas, folio, last_page) { + loff_t pg_end; + bool pg_failed = false; + bool folio_started = false; + + if (xas_retry(&xas, folio)) + continue; + + pg_end = folio_pos(folio) + folio_size(folio) - 1; + + for (;;) { + loff_t sreq_end; + + if (!subreq) { + pg_failed = true; + break; + } + + if (!folio_started && + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && + fscache_operation_valid(&rreq->cache_resources)) { + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); + folio_start_private_2(folio); + folio_started = true; + } + + pg_failed |= subreq_failed; + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) + break; + + *account += subreq->transferred; + if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + + if (pg_end == sreq_end) + break; + } + + if (!pg_failed) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) + _debug("no unlock"); + else + folio_unlock(folio); + } + } + rcu_read_unlock(); +} + +/* * Unlock the folios in a read operation. We need to set PG_writeback on any * folios we're going to write back before we unlock them. * @@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } } + /* Handle deprecated PG_private_2 case. */ + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { + netfs_rreq_unlock_folios_pgpriv2(rreq, &account); + goto out; + } + /* Walk through the pagecache and the I/O request lists simultaneously. * We may have a mixture of cached and uncached sections and we only * really want to write out the uncached sections. This is slightly @@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) loff_t pg_end; bool pg_failed = false; bool wback_to_cache = false; - bool folio_started = false; if (xas_retry(&xas, folio)) continue; @@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, - &subreq->flags)) { - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); - folio_start_private_2(folio); - folio_started = true; - } - } else { - wback_to_cache |= - test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); - } + + wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) @@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } rcu_read_unlock(); +out: task_io_account_read(account); if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -395,7 +483,7 @@ zero_out: } /** - * netfs_write_begin - Helper to prepare for writing + * netfs_write_begin - Helper to prepare for writing [DEPRECATED] * @ctx: The netfs context * @file: The file to read from * @mapping: The mapping to read from @@ -426,6 +514,9 @@ zero_out: * inode before calling this. * * This is usable whether or not caching is enabled. + * + * Note that this should be considered deprecated and netfs_perform_write() + * used instead. */ int netfs_write_begin(struct netfs_inode *ctx, struct file *file, struct address_space *mapping, @@ -466,7 +557,7 @@ retry: if (!netfs_is_cache_enabled(ctx) && netfs_skip_folio_read(folio, pos, len, false)) { netfs_stat(&netfs_n_rh_write_zskip); - goto have_folio; + goto have_folio_no_wait; } rreq = netfs_alloc_request(mapping, file, @@ -507,6 +598,10 @@ retry: netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: + ret = folio_wait_private_2_killable(folio); + if (ret < 0) + goto error; +have_folio_no_wait: *_folio = folio; _leave(" = 0"); return 0; diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 4726c315453c..ca53c5d1622e 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; - size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; + size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0..d4d4b3a8b106 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -741,6 +741,10 @@ again_locked: spin_lock(&cookie->lock); } if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) { + if (atomic_read(&cookie->n_accesses) != 0) + /* still being accessed: postpone it */ + break; + __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LRU_DISCARDING); wake = true; diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b98368..5367caf3fa28 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -99,6 +99,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) } /* + * [DEPRECATED] Deal with the completion of writing the data to the cache. We + * have to clear the PG_fscache bits on the folios involved and release the + * caller's ref. + * + * May be called in softirq mode and we inherit a ref from the caller. + */ +static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, + bool was_async) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t unlocked = 0; + bool have_unlocked = false; + + rcu_read_lock(); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); + + xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + + /* We might have multiple writes from the same huge + * folio, but we mustn't unlock a folio more than once. + */ + if (have_unlocked && folio->index <= unlocked) + continue; + unlocked = folio_next_index(folio) - 1; + trace_netfs_folio(folio, netfs_folio_trace_end_copy); + folio_end_private_2(folio); + have_unlocked = true; + } + } + + rcu_read_unlock(); + netfs_rreq_completed(rreq, was_async); +} + +static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) /* [DEPRECATED] */ +{ + struct netfs_io_subrequest *subreq = priv; + struct netfs_io_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) { + netfs_stat(&netfs_n_rh_write_failed); + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_copy_to_cache); + } else { + netfs_stat(&netfs_n_rh_write_done); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); + + /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, was_async); + + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); +} + +/* + * [DEPRECATED] Perform any outstanding writes to the cache. We inherit a ref + * from the caller. + */ +static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct netfs_io_subrequest *subreq, *next, *p; + struct iov_iter iter; + int ret; + + trace_netfs_rreq(rreq, netfs_rreq_trace_copy); + + /* We don't want terminating writes trying to wake us up whilst we're + * still going through the list. + */ + atomic_inc(&rreq->nr_copy_ops); + + list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { + list_del_init(&subreq->rreq_link); + netfs_put_subrequest(subreq, false, + netfs_sreq_trace_put_no_copy); + } + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + /* Amalgamate adjacent writes */ + while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + next = list_next_entry(subreq, rreq_link); + if (next->start != subreq->start + subreq->len) + break; + subreq->len += next->len; + list_del_init(&next->rreq_link); + netfs_put_subrequest(next, false, + netfs_sreq_trace_put_merged); + } + + ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, + subreq->len, rreq->i_size, true); + if (ret < 0) { + trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); + trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); + continue; + } + + iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, + subreq->start, subreq->len); + + atomic_inc(&rreq->nr_copy_ops); + netfs_stat(&netfs_n_rh_write); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); + trace_netfs_sreq(subreq, netfs_sreq_trace_write); + cres->ops->write(cres, subreq->start, &iter, + netfs_rreq_copy_terminated, subreq); + } + + /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, false); +} + +static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */ +{ + struct netfs_io_request *rreq = + container_of(work, struct netfs_io_request, work); + + netfs_rreq_do_write_to_cache(rreq); +} + +static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */ +{ + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); +} + +/* * Handle a short read. */ static void netfs_rreq_short_read(struct netfs_io_request *rreq, @@ -275,6 +415,10 @@ again: clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) && + test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) + return netfs_rreq_write_to_cache(rreq); + netfs_rreq_completed(rreq, was_async); } @@ -386,7 +530,8 @@ incomplete: if (transferred_or_error == 0) { if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { - subreq->error = -ENODATA; + if (rreq->origin != NETFS_DIO_READ) + subreq->error = -ENODATA; goto failed; } } else { @@ -457,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, } if (subreq->len > ictx->zero_point - subreq->start) subreq->len = ictx->zero_point - subreq->start; + + /* We limit buffered reads to the EOF, but let the + * server deal with larger-than-EOF DIO/unbuffered + * reads. + */ + if (subreq->len > rreq->i_size - subreq->start) + subreq->len = rreq->i_size - subreq->start; } - if (subreq->len > rreq->i_size - subreq->start) - subreq->len = rreq->i_size - subreq->start; if (rreq->rsize && subreq->len > rreq->rsize) subreq->len = rreq->rsize; @@ -595,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) do { _debug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); - if (rreq->origin == NETFS_DIO_READ && - rreq->start + rreq->submitted >= rreq->i_size) - break; if (!netfs_rreq_submit_slice(rreq, &io_iter)) break; + if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags)) + break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index f4a642727479..0294df70c3ff 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct netfs_io_request *rreq; mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; struct kmem_cache *cache = mempool->pool_data; - bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || - origin == NETFS_DIO_READ || - origin == NETFS_DIO_WRITE); - bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); int ret; for (;;) { @@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, refcount_set(&rreq->ref, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (cached) { - __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); - if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) - /* Filesystem uses deprecated PG_private_2 marking. */ - __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); - } if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 9258d30cffe3..3f7e37e50c7d 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, { struct netfs_io_request *wreq; struct netfs_inode *ictx; + bool is_buffered = (origin == NETFS_WRITEBACK || + origin == NETFS_WRITETHROUGH); wreq = netfs_alloc_request(mapping, file, start, 0, origin); if (IS_ERR(wreq)) @@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, _enter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) + if (is_buffered && netfs_is_cache_enabled(ictx)) fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); wreq->contiguity = wreq->start; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7202ce84d0eb..7a558dea75c4 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi { rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); return 0; } @@ -361,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index fbed0027996f..e8adae1bc260 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e0ea6fc2aa3..34eb2c2cbcde 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) continue; } - ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, - SVC_SOCK_ANONYMOUS, + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 0131d83b912d..c034080c334b 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -51,12 +51,21 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); bh->b_bdev = inode->i_sb->s_bdev; @@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) folio_unlock(bh->b_folio); folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, @@ -217,8 +232,8 @@ retry: } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index a139970e4804..862bdf23120e 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c71ae5c04306..4a20e92474b2 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) static void cifs_security_flags_handle_must_flags(unsigned int *flags) { - unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL); if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) *flags = CIFSSEC_MUST_KRB5; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index c92937bed133..2c4b357d85e2 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1894,12 +1894,12 @@ init_cifs(void) WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); if (!serverclose_wq) { rc = -ENOMEM; - goto out_destroy_serverclose_wq; + goto out_destroy_deferredclose_wq; } rc = cifs_init_inodecache(); if (rc) - goto out_destroy_deferredclose_wq; + goto out_destroy_serverclose_wq; rc = cifs_init_netfs(); if (rc) @@ -1967,6 +1967,8 @@ out_destroy_netfs: cifs_destroy_netfs(); out_destroy_inodecache: cifs_destroy_inodecache(); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_destroy_deferredclose_wq: destroy_workqueue(deferredclose_wq); out_destroy_cifsoplockd_wq: @@ -1977,8 +1979,6 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); -out_destroy_serverclose_wq: - destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 62d5fee3e5eb..ca2bd204bcc5 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 49 -#define CIFS_VERSION "2.49" +#define SMB3_PRODUCT_BUILD 50 +#define CIFS_VERSION "2.50" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8e86fec7dcd2..5c9b3e6cd95f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -345,7 +345,7 @@ struct smb_version_operations { /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); - /* close tree connecion */ + /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, @@ -816,7 +816,7 @@ struct TCP_Server_Info { * Protected by @refpath_lock and @srv_lock. The @refpath_lock is * mostly used for not requiring a copy of @leaf_fullpath when getting * cached or new DFS referrals (which might also sleep during I/O). - * While @srv_lock is held for making string and NULL comparions against + * While @srv_lock is held for making string and NULL comparisons against * both fields as in mount(2) and cache refresh. * * format: \\HOST\SHARE[\OPTIONAL PATH] @@ -1471,29 +1471,6 @@ struct cifs_io_parms { struct TCP_Server_Info *server; }; -struct cifs_aio_ctx { - struct kref refcount; - struct list_head list; - struct mutex aio_mutex; - struct completion done; - struct iov_iter iter; - struct kiocb *iocb; - struct cifsFileInfo *cfile; - struct bio_vec *bv; - loff_t pos; - unsigned int nr_pinned_pages; - ssize_t rc; - unsigned int len; - unsigned int total_len; - unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ - bool should_dirty; - /* - * Indicates if this aio_ctx is for direct_io, - * If yes, iter is a copy of the user passed iov_iter - */ - bool direct_io; -}; - struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; @@ -1904,7 +1881,7 @@ static inline bool is_replayable_error(int error) #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 -#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 @@ -1914,11 +1891,11 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ -#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) @@ -2010,7 +1987,6 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled - * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c15bb5ee7eb7..497bf3c447bc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); -struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); -void cifs_aio_ctx_release(struct kref *refcount); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 7a16e12f5da8..d2307162a2de 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2614,6 +2614,13 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions\n"); rc = -EOPNOTSUPP; goto out_fail; + } else if (ses->server->vals->protocol_id == SMB10_PROT_ID) + if (cap_unix(ses)) + cifs_dbg(FYI, "Unix Extensions requested on SMB1 mount\n"); + else { + cifs_dbg(VFS, "SMB1 Unix Extensions not supported by server\n"); + rc = -EOPNOTSUPP; + goto out_fail; } else { cifs_dbg(VFS, "Check vers= mount option. SMB3.11 disabled but required for POSIX extensions\n"); @@ -3686,6 +3693,7 @@ error: } #endif +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* * Issue a TREE_CONNECT request. */ @@ -3807,11 +3815,25 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, else tcon->Flags = 0; cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags); - } + /* + * reset_cifs_unix_caps calls QFSInfo which requires + * need_reconnect to be false, but we would not need to call + * reset_caps if this were not a reconnect case so must check + * need_reconnect flag here. The caller will also clear + * need_reconnect when tcon was successful but needed to be + * cleared earlier in the case of unix extensions reconnect + */ + if (tcon->need_reconnect && tcon->unix_ext) { + cifs_dbg(FYI, "resetting caps for %s\n", tcon->tree_name); + tcon->need_reconnect = false; + reset_cifs_unix_caps(xid, tcon, NULL, NULL); + } + } cifs_buf_release(smb_buffer); return rc; } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ static void delayed_free(struct rcu_head *p) { diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4..1fc66bcf49eb 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) goto out; } - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); rc = rdata->server->ops->async_readv(rdata); out: @@ -315,7 +316,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -323,8 +324,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } @@ -2749,6 +2754,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2765,12 +2771,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 4a8aa1de9522..dd0afa23734c 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse.tag = tag)) { - case 0: /* SMB1 symlink */ + data->reparse.tag = tag; + if (!data->reparse.tag) { if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, &data->symlink_target); } + if (rc == -EOPNOTSUPP) + data->reparse.tag = IO_REPARSE_TAG_INTERNAL; + } + + switch (data->reparse.tag) { + case 0: /* SMB1 symlink */ + break; + case IO_REPARSE_TAG_INTERNAL: + rc = 0; + if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) { + cifs_create_junction_fattr(fattr, sb); + goto out; + } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 855ac5a62edf..44dbaf9929a4 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, static int cifs_shutdown(struct super_block *sb, unsigned long arg) { struct cifs_sb_info *sbi = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *tcon; __u32 flags; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; - if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) - return -EINVAL; + tlink = cifs_sb_tlink(sbi); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + trace_smb3_shutdown_enter(flags, tcon->tid); + if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) { + rc = -EINVAL; + goto shutdown_out_err; + } if (cifs_forced_shutdown(sbi)) - return 0; + goto shutdown_good; cifs_dbg(VFS, "shut down requested (%d)", flags); -/* trace_cifs_shutdown(sb, flags);*/ /* * see: @@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_DEFAULT: cifs_dbg(FYI, "shutdown with default flag not supported\n"); - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; /* * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not * data) but metadata writes are not cached on the client, so can treat @@ -210,11 +221,18 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; - return 0; + goto shutdown_good; default: - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; } + +shutdown_good: + trace_smb3_shutdown_done(flags, tcon->tid); return 0; +shutdown_out_err: + trace_smb3_shutdown_err(rc, flags, tcon->tid); + return rc; } static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 07c468ddb88a..c6f11e6f9eb9 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) * on simple responses (wct, bcc both zero) * in particular have seen this on * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized + * one byte of bcc potentially uninitialized */ /* zero rest of bcc */ tmp[sizeof(struct smb_hdr)+1] = 0; @@ -995,60 +995,6 @@ parse_DFS_referrals_exit: return rc; } -struct cifs_aio_ctx * -cifs_aio_ctx_alloc(void) -{ - struct cifs_aio_ctx *ctx; - - /* - * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io - * to false so that we know when we have to unreference pages within - * cifs_aio_ctx_release() - */ - ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_LIST_HEAD(&ctx->list); - mutex_init(&ctx->aio_mutex); - init_completion(&ctx->done); - kref_init(&ctx->refcount); - return ctx; -} - -void -cifs_aio_ctx_release(struct kref *refcount) -{ - struct cifs_aio_ctx *ctx = container_of(refcount, - struct cifs_aio_ctx, refcount); - - cifsFileInfo_put(ctx->cfile); - - /* - * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly - * which means that iov_iter_extract_pages() was a success and thus - * that we may have references or pins on pages that we need to - * release. - */ - if (ctx->bv) { - if (ctx->should_dirty || ctx->bv_need_unpin) { - unsigned int i; - - for (i = 0; i < ctx->nr_pinned_pages; i++) { - struct page *page = ctx->bv[i].bv_page; - - if (ctx->should_dirty) - set_page_dirty(page); - if (ctx->bv_need_unpin) - unpin_user_page(page); - } - } - kvfree(ctx->bv); - } - - kfree(ctx); -} - /** * cifs_alloc_hash - allocate hash and hash context together * @name: The name of the crypto hash algo @@ -1288,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, const char *full_path, bool *islink) { + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_ses *ses = tcon->ses; size_t len; char *path; @@ -1304,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, !is_tcon_dfs(tcon)) return 0; - spin_lock(&tcon->tc_lock); - if (!tcon->origin_fullpath) { - spin_unlock(&tcon->tc_lock); + spin_lock(&server->srv_lock); + if (!server->leaf_fullpath) { + spin_unlock(&server->srv_lock); return 0; } - spin_unlock(&tcon->tc_lock); + spin_unlock(&server->srv_lock); /* * Slow path - tcon is DFS and @full_path has prefix path, so attempt diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index a0ffbda90733..689d8a506d45 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -505,6 +505,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, } switch (tag) { + case IO_REPARSE_TAG_INTERNAL: + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + fallthrough; case IO_REPARSE_TAG_DFS: case IO_REPARSE_TAG_DFSR: case IO_REPARSE_TAG_MOUNT_POINT: diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 6b55d1df9e2f..2c0644bc4e65 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -12,6 +12,12 @@ #include "fs_context.h" #include "cifsglob.h" +/* + * Used only by cifs.ko to ignore reparse points from files when client or + * server doesn't support FSCTL_GET_REPARSE_POINT. + */ +#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U) + static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf) { u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); @@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode) static inline bool reparse_inode_match(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cinode = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); - return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) && - CIFS_I(inode)->reparse_tag == fattr->cf_cifstag && + /* + * Do not match reparse tags when client or server doesn't support + * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct + * reparse tag from query dir response but the client won't be able to + * read the reparse point data anyway. This spares us a revalidation. + */ + if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL && + cinode->reparse_tag != fattr->cf_cifstag) + return false; + return (cinode->cifsAttrs & ATTR_REPARSE) && timespec64_equal(&ctime, &fattr->cf_ctime); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5c02a12251c8..9f5bc41433c1 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: + rc = parse_create_response(data, cifs_sb, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba @@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid, cmds[num_cmds++] = SMB2_OP_GET_REPARSE; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, - FILE_READ_ATTRIBUTES | FILE_READ_EA, + FILE_READ_ATTRIBUTES | + FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); @@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); cifs_get_readable_path(tcon, full_path, &cfile); - oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES, + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7fe59235f090..322cabc69c6f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1812,6 +1812,10 @@ smb2_copychunk_range(const unsigned int xid, tcon = tlink_tcon(trgtfile->tlink); + trace_smb3_copychunk_enter(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); + while (len > 0) { pcchunk->SourceOffset = cpu_to_le64(src_off); pcchunk->TargetOffset = cpu_to_le64(dest_off); @@ -1863,6 +1867,9 @@ smb2_copychunk_range(const unsigned int xid, le32_to_cpu(retbuf->ChunksWritten), le32_to_cpu(retbuf->ChunkBytesWritten), bytes_written); + trace_smb3_copychunk_done(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); } else if (rc == -EINVAL) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) goto cchunk_out; @@ -2046,7 +2053,9 @@ smb2_duplicate_extents(const unsigned int xid, dup_ext_buf.ByteCount = cpu_to_le64(len); cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n", src_off, dest_off, len); - + trace_smb3_clone_enter(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); inode = d_inode(trgtfile->dentry); if (inode->i_size < dest_off + len) { rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); @@ -2075,6 +2084,15 @@ smb2_duplicate_extents(const unsigned int xid, cifs_dbg(FYI, "Non-zero response length in duplicate extents\n"); duplicate_extents_out: + if (rc) + trace_smb3_clone_err(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, + tcon->tid, tcon->ses->Suid, src_off, + dest_off, len, rc); + else + trace_smb3_clone_done(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); return rc; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9fc5b11c0b6c..83facb54276a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) if (tcon->seal && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) return 1; + if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + return 1; return 0; } @@ -1562,8 +1565,14 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) cifs_small_buf_release(sess_data->iov[0].iov_base); if (rc == 0) sess_data->ses->expired_pwd = false; - else if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) + else if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) { + if (sess_data->ses->expired_pwd == false) + trace_smb3_key_expired(sess_data->server->hostname, + sess_data->ses->user_name, + sess_data->server->conn_id, + &sess_data->server->dstaddr, rc); sess_data->ses->expired_pwd = true; + } memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec)); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d74e829de51c..7bcc379014ca 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work) else response = get_empty_queue_buffer(info); if (!response) { - /* now switch to emtpy packet queue */ + /* now switch to empty packet queue */ if (use_receive_queue) { use_receive_queue = 0; continue; @@ -618,7 +618,7 @@ out: /* * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requries FRWR on RDMA read/write + * This implementation requires FRWR on RDMA read/write * return value: true if it is supported */ static bool frwr_is_supported(struct ib_device_attr *attrs) @@ -2177,7 +2177,7 @@ cleanup_entries: * MR available in the list. It may access the list while the * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock * as they never modify the same places. However, there may be several CPUs - * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ static struct smbd_mr *get_mr(struct smbd_connection *info) @@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* * There is no need for waiting for complemtion on ib_post_send * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actaully send I/O to remote peer + * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 36d5295c2a6f..0f0c10c7ada7 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -206,6 +206,116 @@ DEFINE_SMB3_OTHER_ERR_EVENT(query_dir_err); DEFINE_SMB3_OTHER_ERR_EVENT(zero_err); DEFINE_SMB3_OTHER_ERR_EVENT(falloc_err); +/* + * For logging errors in reflink and copy_range ops e.g. smb2_copychunk_range + * and smb2_duplicate_extents + */ +DECLARE_EVENT_CLASS(smb3_copy_range_err_class, + TP_PROTO(unsigned int xid, + __u64 src_fid, + __u64 target_fid, + __u32 tid, + __u64 sesid, + __u64 src_offset, + __u64 target_offset, + __u32 len, + int rc), + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len, rc), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u64, src_fid) + __field(__u64, target_fid) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, src_offset) + __field(__u64, target_offset) + __field(__u32, len) + __field(int, rc) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->src_fid = src_fid; + __entry->target_fid = target_fid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->src_offset = src_offset; + __entry->target_offset = target_offset; + __entry->len = len; + __entry->rc = rc; + ), + TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, + __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) +) + +#define DEFINE_SMB3_COPY_RANGE_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_copy_range_err_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u64 src_fid, \ + __u64 target_fid, \ + __u32 tid, \ + __u64 sesid, \ + __u64 src_offset, \ + __u64 target_offset, \ + __u32 len, \ + int rc), \ + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len, rc)) + +DEFINE_SMB3_COPY_RANGE_ERR_EVENT(clone_err); +/* TODO: Add SMB3_COPY_RANGE_ERR_EVENT(copychunk_err) */ + +DECLARE_EVENT_CLASS(smb3_copy_range_done_class, + TP_PROTO(unsigned int xid, + __u64 src_fid, + __u64 target_fid, + __u32 tid, + __u64 sesid, + __u64 src_offset, + __u64 target_offset, + __u32 len), + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u64, src_fid) + __field(__u64, target_fid) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, src_offset) + __field(__u64, target_offset) + __field(__u32, len) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->src_fid = src_fid; + __entry->target_fid = target_fid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->src_offset = src_offset; + __entry->target_offset = target_offset; + __entry->len = len; + ), + TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, + __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) +) + +#define DEFINE_SMB3_COPY_RANGE_DONE_EVENT(name) \ +DEFINE_EVENT(smb3_copy_range_done_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u64 src_fid, \ + __u64 target_fid, \ + __u32 tid, \ + __u64 sesid, \ + __u64 src_offset, \ + __u64 target_offset, \ + __u32 len), \ + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len)) + +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(copychunk_enter); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(clone_enter); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(copychunk_done); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(clone_done); + /* For logging successful read or write */ DECLARE_EVENT_CLASS(smb3_rw_done_class, @@ -1171,6 +1281,46 @@ DEFINE_EVENT(smb3_connect_err_class, smb3_##name, \ DEFINE_SMB3_CONNECT_ERR_EVENT(connect_err); +DECLARE_EVENT_CLASS(smb3_sess_setup_err_class, + TP_PROTO(char *hostname, char *username, __u64 conn_id, + const struct __kernel_sockaddr_storage *dst_addr, int rc), + TP_ARGS(hostname, username, conn_id, dst_addr, rc), + TP_STRUCT__entry( + __string(hostname, hostname) + __string(username, username) + __field(__u64, conn_id) + __array(__u8, dst_addr, sizeof(struct sockaddr_storage)) + __field(int, rc) + ), + TP_fast_assign( + struct sockaddr_storage *pss = NULL; + + __entry->conn_id = conn_id; + __entry->rc = rc; + pss = (struct sockaddr_storage *)__entry->dst_addr; + *pss = *dst_addr; + __assign_str(hostname); + __assign_str(username); + ), + TP_printk("rc=%d user=%s conn_id=0x%llx server=%s addr=%pISpsfc", + __entry->rc, + __get_str(username), + __entry->conn_id, + __get_str(hostname), + __entry->dst_addr) +) + +#define DEFINE_SMB3_SES_SETUP_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_sess_setup_err_class, smb3_##name, \ + TP_PROTO(char *hostname, \ + char *username, \ + __u64 conn_id, \ + const struct __kernel_sockaddr_storage *addr, \ + int rc), \ + TP_ARGS(hostname, username, conn_id, addr, rc)) + +DEFINE_SMB3_SES_SETUP_ERR_EVENT(key_expired); + DECLARE_EVENT_CLASS(smb3_reconnect_class, TP_PROTO(__u64 currmid, __u64 conn_id, @@ -1238,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class, __entry->command = command; ), TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x", - __entry->xid, __entry->fid, __entry->command) + __entry->xid, __entry->fid, __entry->command) ) #define DEFINE_SMB3_IOCTL_EVENT(name) \ @@ -1250,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \ DEFINE_SMB3_IOCTL_EVENT(ioctl); +DECLARE_EVENT_CLASS(smb3_shutdown_class, + TP_PROTO(__u32 flags, + __u32 tid), + TP_ARGS(flags, tid), + TP_STRUCT__entry( + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("flags=0x%x tid=0x%x", + __entry->flags, __entry->tid) +) + +#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \ + TP_PROTO(__u32 flags, \ + __u32 tid), \ + TP_ARGS(flags, tid)) + +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter); +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done); +DECLARE_EVENT_CLASS(smb3_shutdown_err_class, + TP_PROTO(int rc, + __u32 flags, + __u32 tid), + TP_ARGS(rc, flags, tid), + TP_STRUCT__entry( + __field(int, rc) + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->rc = rc; + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("rc=%d flags=0x%x tid=0x%x", + __entry->rc, __entry->flags, __entry->tid) +) +#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \ + TP_PROTO(int rc, \ + __u32 flags, \ + __u32 tid), \ + TP_ARGS(rc, flags, tid)) +DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err); DECLARE_EVENT_CLASS(smb3_credit_class, TP_PROTO(__u64 currmid, diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index adfe0d058701..6e68aaf5bd20 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, out: /* * This will dequeue all mids. After this it is important that the - * demultiplex_thread will not process any of these mids any futher. + * demultiplex_thread will not process any of these mids any further. * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d25..c769f9dbc0b4 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592..7889df8112b4 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2..5b947175c048 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index e0a6b758094f..d8d03070ae44 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -15,6 +15,7 @@ #include "share_config.h" #include "user_config.h" #include "user_session.h" +#include "../connection.h" #include "../transport_ipc.h" #include "../misc.h" @@ -120,12 +121,13 @@ static int parse_veto_list(struct ksmbd_share_config *share, return 0; } -static struct ksmbd_share_config *share_config_request(struct unicode_map *um, +static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config_response *resp; struct ksmbd_share_config *share = NULL; struct ksmbd_share_config *lookup; + struct unicode_map *um = work->conn->um; int ret; resp = ksmbd_ipc_share_config_request(name); @@ -181,7 +183,14 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, KSMBD_SHARE_CONFIG_VETO_LIST(resp), resp->veto_list_sz); if (!ret && share->path) { + if (__ksmbd_override_fsids(work, share)) { + kill_share(share); + share = NULL; + goto out; + } + ret = kern_path(share->path, 0, &share->vfs_path); + ksmbd_revert_fsids(work); if (ret) { ksmbd_debug(SMB, "failed to access '%s'\n", share->path); @@ -214,7 +223,7 @@ out: return share; } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config *share; @@ -227,7 +236,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, if (share) return share; - return share_config_request(um, name); + return share_config_request(work, name); } bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h index 5f591751b923..d4ac2dd4de20 100644 --- a/fs/smb/server/mgmt/share_config.h +++ b/fs/smb/server/mgmt/share_config.h @@ -11,6 +11,8 @@ #include <linux/path.h> #include <linux/unicode.h> +struct ksmbd_work; + struct ksmbd_share_config { char *name; char *path; @@ -68,7 +70,7 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share) __ksmbd_share_config_put(share); } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name); bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, const char *filename); diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index d2c81a8a11dd..94a52a75014a 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -16,17 +16,18 @@ #include "user_session.h" struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name) +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) { struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; struct sockaddr *peer_addr; + struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; int ret; - sc = ksmbd_share_config_get(conn->um, share_name); + sc = ksmbd_share_config_get(work, share_name); if (!sc) return status; @@ -61,7 +62,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, struct ksmbd_share_config *new_sc; ksmbd_share_config_del(sc); - new_sc = ksmbd_share_config_get(conn->um, share_name); + new_sc = ksmbd_share_config_get(work, share_name); if (!new_sc) { pr_err("Failed to update stale share config\n"); status.ret = -ESTALE; diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 6377a70b811c..a42cdd051041 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -13,6 +13,7 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; +struct ksmbd_work; enum { TREE_NEW = 0, @@ -50,8 +51,7 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn, struct ksmbd_session; struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name); +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name); void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c..99416ce9f501 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 37a39ab4ee65..0bc9edf22ba4 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } @@ -1955,7 +1958,7 @@ int smb2_tree_connect(struct ksmbd_work *work) ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n", name, treename); - status = ksmbd_tree_conn_connect(conn, sess, name); + status = ksmbd_tree_conn_connect(work, name); if (status.ret == KSMBD_TREE_CONN_STATUS_OK) rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id); else @@ -2210,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted @@ -5357,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5383,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5596,6 +5599,11 @@ int smb2_query_info(struct ksmbd_work *work) ksmbd_debug(SMB, "GOT query info request\n"); + if (ksmbd_override_fsids(work)) { + rc = -ENOMEM; + goto err_out; + } + switch (req->InfoType) { case SMB2_O_INFO_FILE: ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n"); @@ -5614,6 +5622,7 @@ int smb2_query_info(struct ksmbd_work *work) req->InfoType); rc = -EOPNOTSUPP; } + ksmbd_revert_fsids(work); if (!rc) { rsp->StructureSize = cpu_to_le16(9); @@ -5623,6 +5632,7 @@ int smb2_query_info(struct ksmbd_work *work) le32_to_cpu(rsp->OutputBufferLength)); } +err_out: if (rc < 0) { if (rc == -EACCES) rsp->hdr.Status = STATUS_ACCESS_DENIED; diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 474dadf6b7b8..13818ecb6e1b 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -732,10 +732,10 @@ bool is_asterisk(char *p) return p && p[0] == '*'; } -int ksmbd_override_fsids(struct ksmbd_work *work) +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share) { struct ksmbd_session *sess = work->sess; - struct ksmbd_share_config *share = work->tcon->share_conf; struct cred *cred; struct group_info *gi; unsigned int uid; @@ -775,6 +775,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work) return 0; } +int ksmbd_override_fsids(struct ksmbd_work *work) +{ + return __ksmbd_override_fsids(work, work->tcon->share_conf); +} + void ksmbd_revert_fsids(struct ksmbd_work *work) { const struct cred *cred; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index f1092519c0c2..cc1d6dfe29d5 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { @@ -447,6 +447,8 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command); int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp); +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share); int ksmbd_override_fsids(struct ksmbd_work *work); void ksmbd_revert_fsids(struct ksmbd_work *work); diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 16bd693d0b3a..d5918eba27e3 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; diff --git a/fs/super.c b/fs/super.c index 095ba793e10c..38d72a3cf6fc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -736,6 +736,17 @@ struct super_block *sget_fc(struct fs_context *fc, struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; int err; + /* + * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is + * not set, as the filesystem is likely unprepared to handle it. + * This can happen when fsconfig() is called from init_user_ns with + * an fs_fd opened in another user namespace. + */ + if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) { + errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed"); + return ERR_PTR(-EPERM); + } + retry: spin_lock(&sb_lock); if (test) { diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5d88c184f0fc..01e99e98457d 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -112,7 +112,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - call_rcu(&ei->rcu, free_ei_rcu); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) @@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); - ei = NULL; + ei = ERR_PTR(-EBUSY); } return ei; } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1028ab6d9a74..1748dff58c3b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) struct tracefs_inode *ti; unsigned long flags; - ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; @@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) return &ti->vfs_inode; } -static void tracefs_free_inode_rcu(struct rcu_head *rcu) +static void tracefs_free_inode(struct inode *inode) { - struct tracefs_inode *ti; + struct tracefs_inode *ti = get_tracefs(inode); - ti = container_of(rcu, struct tracefs_inode, rcu); kmem_cache_free(tracefs_inode_cachep, ti); } -static void tracefs_free_inode(struct inode *inode) +static void tracefs_destroy_inode(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); unsigned long flags; @@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode) spin_lock_irqsave(&tracefs_inode_lock, flags); list_del_rcu(&ti->list); spin_unlock_irqrestore(&tracefs_inode_lock, flags); - - call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode) static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .show_options = tracefs_show_options, diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index f704d8348357..d83c2a25f288 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -10,10 +10,7 @@ enum { }; struct tracefs_inode { - union { - struct inode vfs_inode; - struct rcu_head rcu; - }; + struct inode vfs_inode; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ struct list_head list; unsigned long flags; diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 75461777c466..0b48cbab8a3d 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -82,6 +82,7 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /** * ubifs_compress - compress data. + * @c: UBIFS file-system description object * @in_buf: data to compress * @in_len: length of the data to compress * @out_buf: output buffer where compressed data should be stored @@ -140,6 +141,7 @@ no_compr: /** * ubifs_decompress - decompress data. + * @c: UBIFS file-system description object * @in_buf: data to decompress * @in_len: length of the data to decompress * @out_buf: output buffer where decompressed data should diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ac77ac1fd73e..d91cec93d968 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2827,9 +2827,9 @@ void dbg_debugfs_init_fs(struct ubifs_info *c) const char *fname; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ return; } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index ed966108da80..d425861e6b82 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -19,10 +19,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, /* * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + * + 1 for "_" and 2 for UBI device numbers and 3 for volume number and 1 for + * the trailing zero byte. */ #define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2 + 3 + 1) /** * ubifs_debug_info - per-FS debugging information. @@ -103,7 +104,7 @@ struct ubifs_debug_info { unsigned int chk_fs:1; unsigned int tst_rcvry:1; - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index eac0fef801f1..c77ea57fe696 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -71,8 +71,13 @@ static int inherit_flags(const struct inode *dir, umode_t mode) * @is_xattr: whether the inode is xattr inode * * This function finds an unused inode number, allocates new inode and - * initializes it. Returns new inode in case of success and an error code in - * case of failure. + * initializes it. Non-xattr new inode may be written with xattrs(selinux/ + * encryption) before writing dentry, which could cause inconsistent problem + * when powercut happens between two operations. To deal with it, non-xattr + * new inode is initialized with zero-nlink and added into orphan list, caller + * should make sure that inode is relinked later, and make sure that orphan + * removing and journal writing into an committing atomic operation. Returns + * new inode in case of success and an error code in case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, umode_t mode, bool is_xattr) @@ -163,9 +168,25 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, ui->creat_sqnum = ++c->max_sqnum; spin_unlock(&c->cnt_lock); + if (!is_xattr) { + set_nlink(inode, 0); + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + ubifs_err(c, "ubifs_add_orphan failed: %i", err); + goto out_iput; + } + down_read(&c->commit_sem); + ui->del_cmtno = c->cmt_no; + up_read(&c->commit_sem); + } + if (encrypted) { err = fscrypt_set_context(inode, NULL); if (err) { + if (!is_xattr) { + set_nlink(inode, 1); + ubifs_delete_orphan(c, inode->i_ino); + } ubifs_err(c, "fscrypt_set_context failed: %i", err); goto out_iput; } @@ -320,12 +341,13 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -340,8 +362,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -386,7 +408,6 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) return inode; out_inode: - make_bad_inode(inode); iput(inode); out_free: ubifs_err(c, "cannot create whiteout file, error %d", err); @@ -470,6 +491,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); d_tmpfile(file, inode); @@ -479,7 +501,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, mutex_unlock(&ui->ui_mutex); lock_2_inodes(dir, inode); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -492,7 +514,6 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, out_cancel: unlock_2_inodes(dir, inode); out_inode: - make_bad_inode(inode); if (!instantiated) iput(inode); out_budg: @@ -760,10 +781,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); - /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ - if (inode->i_nlink == 0) - ubifs_delete_orphan(c, inode->i_ino); - inc_nlink(inode); ihold(inode); inode_set_ctime_current(inode); @@ -771,7 +788,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, inode->i_nlink == 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -785,8 +802,6 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); - if (inode->i_nlink == 0) - ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -846,7 +861,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -950,7 +965,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -1017,6 +1032,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); insert_inode_hash(inode); inc_nlink(inode); @@ -1025,7 +1041,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) { ubifs_err(c, "cannot create directory, error %d", err); goto out_cancel; @@ -1042,8 +1058,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; drop_nlink(dir); mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1102,22 +1118,25 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) { + kfree(dev); + goto out_inode; + } + init_special_inode(inode, inode->i_mode, rdev); inode->i_size = ubifs_inode(inode)->ui_size = devlen; ui = ubifs_inode(inode); ui->data = dev; ui->data_len = devlen; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1132,10 +1151,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - /* Free inode->i_link before inode is marked as bad. */ - fscrypt_free_inode(inode); - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1186,6 +1203,10 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + ui = ubifs_inode(inode); ui->data = kmalloc(disk_link.len, GFP_NOFS); if (!ui->data) { @@ -1210,17 +1231,14 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1234,10 +1252,10 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: /* Free inode->i_link before inode is marked as bad. */ fscrypt_free_inode(inode); - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1405,14 +1423,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, */ err = ubifs_budget_space(c, &wht_req); if (err) { - /* - * Whiteout inode can not be written on flash by - * ubifs_jnl_write_inode(), because it's neither - * dirty nor zero-nlink. - */ iput(whiteout); goto out_release; } + set_nlink(whiteout, 1); /* Add the old_dentry size to the old_dir size. */ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); @@ -1491,7 +1505,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, - new_inode, &new_nm, whiteout, sync); + new_inode, &new_nm, whiteout, sync, !!whiteout); if (err) goto out_cancel; @@ -1544,6 +1558,7 @@ out_cancel: unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { ubifs_release_budget(c, &wht_req); + set_nlink(whiteout, 0); iput(whiteout); } out_release: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a1f46919934c..68e104423a48 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1027,7 +1027,7 @@ static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc, /* Is the folio fully inside i_size? */ if (folio_pos(folio) + len <= i_size) { - if (folio_pos(folio) >= synced_i_size) { + if (folio_pos(folio) + len > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_redirty; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 6ebf3c04ac5f..643718906b9f 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -73,7 +73,7 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -340,7 +340,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -581,7 +581,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -773,7 +773,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 74aee92433d7..4a35f9e8f668 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -359,7 +359,7 @@ static void wake_up_reservation(struct ubifs_info *c) } /** - * wake_up_reservation - add current task in queue or start queuing. + * add_or_start_queue - add current task in queue or start queuing. * @c: UBIFS file-system description object * * This function starts queuing if queuing is not started, otherwise adds @@ -643,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) * @inode: inode to update * @deletion: indicates a directory entry deletion i.e unlink or rmdir * @xent: non-zero if the directory entry is an extended attribute entry + * @in_orphan: indicates whether the @inode is in orphan list * * This function updates an inode by writing a directory entry (or extended * attribute entry), the inode itself, and the parent directory inode (or the @@ -664,7 +665,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent) + int deletion, int xent, int in_orphan) { int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0; int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); @@ -750,7 +751,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_release; - if (last_reference) { + if (last_reference && !in_orphan) { err = ubifs_add_orphan(c, inode->i_ino); if (err) { release_head(c, BASEHD); @@ -806,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_ro; + if (in_orphan && inode->i_nlink) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; @@ -1336,6 +1340,7 @@ out_free: * @new_nm: new name of the new directory entry * @whiteout: whiteout inode * @sync: non-zero if the write-buffer has to be synchronized + * @delete_orphan: indicates an orphan entry deletion for @whiteout * * This function implements the re-name operation which may involve writing up * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) @@ -1348,7 +1353,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync) + const struct inode *whiteout, int sync, int delete_orphan) { void *p; union ubifs_key key; @@ -1565,6 +1570,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_ro; } + if (delete_orphan) + ubifs_delete_orphan(c, whiteout->i_ino); + finish_reservation(c); if (new_inode) { mark_inode_clean(c, new_ui); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index a11c3dab7e16..8788740ec57f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1005,7 +1005,7 @@ out: * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @lst: lprops statistics to update + * @arg: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 778a22bf9a92..441d0beca4cf 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1918,6 +1918,7 @@ out_err: * @pnode: where to keep a pnode * @cnode: where to keep a cnode * @in_tree: is the node in the tree in memory + * @ptr: union of node pointers * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in * the tree * @ptr.pnode: ditto for pnode diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 7adc37c10b6a..a148760fa49e 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -67,10 +67,13 @@ static int mst_node_check_hash(const struct ubifs_info *c, { u8 calc[UBIFS_MAX_HASH_LEN]; const void *node = mst; + int ret; - crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), + ret = crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc); + if (ret) + return ret; if (ubifs_check_hash(c, expected, calc)) return -EPERM; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 4909321d84cf..fb957d963ba6 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -42,24 +42,30 @@ static int dbg_check_orphans(struct ubifs_info *c); -static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, - struct ubifs_orphan *parent_orphan) +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) { struct ubifs_orphan *orphan, *o; struct rb_node **p, *parent = NULL; orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); if (!orphan) - return ERR_PTR(-ENOMEM); + return -ENOMEM; orphan->inum = inum; orphan->new = 1; - INIT_LIST_HEAD(&orphan->child_list); spin_lock(&c->orphan_lock); if (c->tot_orphans >= c->max_orphans) { spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-ENFILE); + return -ENFILE; } p = &c->orph_tree.rb_node; while (*p) { @@ -73,7 +79,7 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, ubifs_err(c, "orphaned twice"); spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-EINVAL); + return -EINVAL; } } c->tot_orphans += 1; @@ -83,14 +89,9 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, list_add_tail(&orphan->list, &c->orph_list); list_add_tail(&orphan->new_list, &c->orph_new); - if (parent_orphan) { - list_add_tail(&orphan->child_list, - &parent_orphan->child_list); - } - spin_unlock(&c->orphan_lock); dbg_gen("ino %lu", (unsigned long)inum); - return orphan; + return 0; } static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum) @@ -135,6 +136,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) if (orph->cmt) { orph->del = 1; + rb_erase(&orph->rb, &c->orph_tree); orph->dnext = c->orph_dnext; c->orph_dnext = orph; dbg_gen("delete later ino %lu", (unsigned long)orph->inum); @@ -145,59 +147,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) } /** - * ubifs_add_orphan - add an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * Add an orphan. This function is called when an inodes link count drops to - * zero. - */ -int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) -{ - int err = 0; - ino_t xattr_inum; - union ubifs_key key; - struct ubifs_dent_node *xent, *pxent = NULL; - struct fscrypt_name nm = {0}; - struct ubifs_orphan *xattr_orphan; - struct ubifs_orphan *orphan; - - orphan = orphan_add(c, inum, NULL); - if (IS_ERR(orphan)) - return PTR_ERR(orphan); - - lowest_xent_key(c, &key, inum); - while (1) { - xent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(xent)) { - err = PTR_ERR(xent); - if (err == -ENOENT) - break; - kfree(pxent); - return err; - } - - fname_name(&nm) = xent->name; - fname_len(&nm) = le16_to_cpu(xent->nlen); - xattr_inum = le64_to_cpu(xent->inum); - - xattr_orphan = orphan_add(c, xattr_inum, orphan); - if (IS_ERR(xattr_orphan)) { - kfree(pxent); - kfree(xent); - return PTR_ERR(xattr_orphan); - } - - kfree(pxent); - pxent = xent; - key_read(c, &xent->key, &key); - } - kfree(pxent); - - return 0; -} - -/** * ubifs_delete_orphan - delete an orphan. * @c: UBIFS file-system description object * @inum: orphan inode number @@ -206,7 +155,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) */ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) { - struct ubifs_orphan *orph, *child_orph, *tmp_o; + struct ubifs_orphan *orph; spin_lock(&c->orphan_lock); @@ -219,11 +168,6 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) return; } - list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { - list_del(&child_orph->child_list); - orphan_delete(c, child_orph); - } - orphan_delete(c, orph); spin_unlock(&c->orphan_lock); @@ -518,7 +462,6 @@ static void erase_deleted(struct ubifs_info *c) dnext = orphan->dnext; ubifs_assert(c, !orphan->new); ubifs_assert(c, orphan->del); - rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); @@ -571,51 +514,6 @@ int ubifs_clear_orphans(struct ubifs_info *c) } /** - * insert_dead_orphan - insert an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * This function is a helper to the 'do_kill_orphans()' function. The orphan - * must be kept until the next commit, so it is added to the rb-tree and the - * deletion list. - */ -static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) -{ - struct ubifs_orphan *orphan, *o; - struct rb_node **p, *parent = NULL; - - orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); - if (!orphan) - return -ENOMEM; - orphan->inum = inum; - - p = &c->orph_tree.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_orphan, rb); - if (inum < o->inum) - p = &(*p)->rb_left; - else if (inum > o->inum) - p = &(*p)->rb_right; - else { - /* Already added - no problem */ - kfree(orphan); - return 0; - } - } - c->tot_orphans += 1; - rb_link_node(&orphan->rb, parent, p); - rb_insert_color(&orphan->rb, &c->orph_tree); - list_add_tail(&orphan->list, &c->orph_list); - orphan->del = 1; - orphan->dnext = c->orph_dnext; - c->orph_dnext = orphan; - dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, - c->new_orphans, c->tot_orphans); - return 0; -} - -/** * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB @@ -691,12 +589,12 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { - union ubifs_key key1, key2; + union ubifs_key key; inum = le64_to_cpu(orph->inos[i]); - ino_key_init(c, &key1, inum); - err = ubifs_tnc_lookup(c, &key1, ino); + ino_key_init(c, &key, inum); + err = ubifs_tnc_lookup(c, &key, ino); if (err && err != -ENOENT) goto out_free; @@ -708,17 +606,10 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, dbg_rcvry("deleting orphaned inode %lu", (unsigned long)inum); - lowest_ino_key(c, &key1, inum); - highest_ino_key(c, &key2, inum); - - err = ubifs_tnc_remove_range(c, &key1, &key2); + err = ubifs_tnc_remove_ino(c, inum); if (err) goto out_ro; } - - err = insert_dead_orphan(c, inum); - if (err) - goto out_free; } *last_cmt_no = cmt_no; @@ -925,8 +816,12 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, inum = key_inum(c, &zbr->key); if (inum != ci->last_ino) { - /* Lowest node type is the inode node, so it comes first */ - if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + /* + * Lowest node type is the inode node or xattr entry(when + * selinux/encryption is enabled), so it comes first + */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY && + key_type(c, &zbr->key) != UBIFS_XENT_KEY) ubifs_err(c, "found orphan node ino %lu, type %d", (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 17da28d6247a..a950c5f2560e 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -29,6 +29,7 @@ * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @hash: node hash * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number * @list: links the replay list diff --git a/fs/ubifs/sysfs.c b/fs/ubifs/sysfs.c index 1c958148bb87..aae32222f11b 100644 --- a/fs/ubifs/sysfs.c +++ b/fs/ubifs/sysfs.c @@ -91,17 +91,17 @@ static struct kset ubifs_kset = { int ubifs_sysfs_register(struct ubifs_info *c) { int ret, n; - char dfs_dir_name[UBIFS_DFS_DIR_LEN+1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; c->stats = kzalloc(sizeof(struct ubifs_stats_info), GFP_KERNEL); if (!c->stats) { ret = -ENOMEM; goto out_last; } - n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ ret = -EINVAL; goto out_free; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1f3ea879d93a..d69a5a42d693 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -158,13 +158,6 @@ #endif /* - * The UBIFS sysfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. - */ -#define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) - -/* * Lockdep classes for UBIFS inode @ui_mutex. */ enum { @@ -923,8 +916,6 @@ struct ubifs_budget_req { * @rb: rb-tree node of rb-tree of orphans sorted by inode number * @list: list head of list of orphans in order added * @new_list: list head of list of orphans added since the last commit - * @child_list: list of xattr children if this orphan hosts xattrs, list head - * if this orphan is a xattr, not used otherwise. * @cnext: next orphan to commit * @dnext: next orphan to delete * @inum: inode number @@ -936,7 +927,6 @@ struct ubifs_orphan { struct rb_node rb; struct list_head list; struct list_head new_list; - struct list_head child_list; struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; @@ -1803,7 +1793,7 @@ int ubifs_consolidate_log(struct ubifs_info *c); /* journal.c */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent); + int deletion, int xent, int in_orphan); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); @@ -1820,7 +1810,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync); + const struct inode *whiteout, int sync, int delete_orphan); int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, loff_t old_size, loff_t new_size); int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 0847db521984..f734588b224a 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -149,7 +149,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (strcmp(fname_name(nm), UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) == 0) host_ui->flags |= UBIFS_CRYPT_FL; - err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + err = ubifs_jnl_update(c, host, nm, inode, 0, 1, 0); if (err) goto out_cancel; ubifs_set_inode_flags(host); diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c index bc1a7c8b5c8d..77b685db8275 100644 --- a/fs/unicode/mkutf8data.c +++ b/fs/unicode/mkutf8data.c @@ -3352,6 +3352,7 @@ static void write_file(void) fprintf(file, "};\n"); fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);"); fprintf(file, "\n"); + fprintf(file, "MODULE_DESCRIPTION(\"UTF8 data table\");\n"); fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n"); fclose(file); } diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c index eb2bbdd688d7..600e15efe9ed 100644 --- a/fs/unicode/utf8-selftest.c +++ b/fs/unicode/utf8-selftest.c @@ -14,8 +14,8 @@ #include "utf8n.h" -unsigned int failed_tests; -unsigned int total_tests; +static unsigned int failed_tests; +static unsigned int total_tests; /* Tests will be based on this version. */ #define UTF8_LATEST UNICODE_AGE(12, 1, 0) @@ -307,4 +307,5 @@ module_init(init_test_ucd); module_exit(exit_test_ucd); MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>"); +MODULE_DESCRIPTION("Kernel module for testing utf-8 support"); MODULE_LICENSE("GPL"); diff --git a/fs/unicode/utf8data.c_shipped b/fs/unicode/utf8data.c_shipped index d9b62901aa96..dafa5fed761d 100644 --- a/fs/unicode/utf8data.c_shipped +++ b/fs/unicode/utf8data.c_shipped @@ -4120,4 +4120,5 @@ struct utf8data_table utf8_data_table = { .utf8data = utf8data, }; EXPORT_SYMBOL_GPL(utf8_data_table); +MODULE_DESCRIPTION("UTF8 data table"); MODULE_LICENSE("GPL v2"); diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index cb035da3f990..fb05f44f6c75 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; * And, of course, we also need to take into account the dquot log format item * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) \ +#define XFS_DQUOT_LOGRES \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 3dc8f785bf29..45aaf169806a 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -338,11 +338,11 @@ xfs_calc_write_reservation( blksz); t1 += adj; t3 += adj; - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 1); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation( xfs_refcountbt_block_count(mp, 4), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 2); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -466,7 +466,7 @@ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -577,7 +577,7 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -641,7 +641,7 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -729,7 +729,7 @@ xfs_calc_icreate_reservation( struct xfs_mount *mp) { struct xfs_trans_resv *resp = M_RES(mp); - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; unsigned int t1, t2, t3 = 0; t1 = xfs_calc_icreate_resv_alloc(mp); @@ -747,7 +747,7 @@ STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { - uint res = XFS_DQUOT_LOGRES(mp); + uint res = XFS_DQUOT_LOGRES; res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); @@ -829,7 +829,7 @@ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + @@ -846,7 +846,7 @@ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); @@ -955,7 +955,7 @@ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + @@ -1003,7 +1003,7 @@ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); @@ -1043,7 +1043,7 @@ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + max((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)) + diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0dbc484b182f..2f98d90d7fd6 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -696,7 +696,7 @@ xrep_agfl_init_header( * step. */ xagb_bitmap_init(&af.used_extents); - af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), + af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f1..5ab2ac53c920 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 733c410a2279..91e7b51ce068 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -799,7 +799,7 @@ xchk_parent_pptr( } if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * Complain if the number of parent pointers doesn't match the link diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 92ef4cdc486e..c886d5d0eb02 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; __entry->ino = file_inode(xf->file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(xf->file, pathname, sizeof(pathname) - 1); + path = file_path(xf->file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("xfino 0x%lx path '%s'", __entry->ino, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 5c947e5ce8b8..7db386304875 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -139,7 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ - sbp->value = &sfe->nameval[sfe->namelen], + sbp->value = &sfe->nameval[sfe->namelen]; sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12..6b13666d4e96 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5646d300b286..180ce697305a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(file, pathname, sizeof(pathname) - 1); + path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe..8ede9d099d1f 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ab3d22f662f2..eaf849260bd6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,24 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); + /* + * Some xattrs must be resistant to allocation failure at ENOSPC, e.g. + * creating an inode with ACLs or security attributes requires the + * allocation of the xattr holding that information to succeed. Hence + * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY + * (LSM xattr) namespaces to dip into the reserve block pool to allow + * manipulation of these xattrs when at ENOSPC. These VFS xattr + * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk + * namespaces. + * + * For most of these cases, these special xattrs will fit in the inode + * itself and so consume no extra space or only require temporary extra + * space while an overwrite is being made. Hence the use of the reserved + * pool is largely to avoid the worst case reservation from preventing + * the xattr from being created at ENOSPC. + */ + return xfs_attr_set(args, op, + args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE)); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d527..9f1c1d225e32 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,13 +660,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, acpi_adr_space_type diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ad6afc5c4918..1ae44793132a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -911,13 +911,12 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) -#define RUNTIME_NAME(t,x) runtime_##t##_##x +#define NAMED_SECTION(name) \ + . = ALIGN(8); \ + name : AT(ADDR(name) - LOAD_OFFSET) \ + { BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) } -#define RUNTIME_CONST(t,x) \ - . = ALIGN(8); \ - RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ - *(RUNTIME_NAME(t,x)); \ - } +#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); diff --git a/include/dt-bindings/i3c/i3c.h b/include/dt-bindings/i3c/i3c.h new file mode 100644 index 000000000000..373439218bba --- /dev/null +++ b/include/dt-bindings/i3c/i3c.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright 2024 NXP + */ + +#ifndef _DT_BINDINGS_I3C_I3C_H +#define _DT_BINDINGS_I3C_I3C_H + +#define I2C_FM (1 << 4) +#define I2C_FM_PLUS (0 << 4) + +#define I2C_FILTER (0 << 5) +#define I2C_NO_FILTER_HIGH_FREQUENCY (1 << 5) +#define I2C_NO_FILTER_LOW_FREQUENCY (2 << 5) + +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f0b95c76c707..0687a442fec7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,6 +259,12 @@ static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif +#ifdef CONFIG_RISCV +void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa); +#else +static inline void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { } +#endif + #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01..d3b66d77df7a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } +static inline void bitmap_copy_and_extend(unsigned long *to, + const unsigned long *from, + unsigned int count, unsigned int size) +{ + unsigned int copy = BITS_TO_LONGS(count); + + memcpy(to, from, copy * sizeof(long)); + if (count % BITS_PER_LONG) + to[copy - 1] &= BITMAP_LAST_WORD_MASK(count); + memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long)); +} + /* * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64 * machines the order of hi and lo parts of numbers match the bitmap structure. diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 46d4bdc634c0..ba35bbf07798 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,12 +47,17 @@ extern unsigned long __sw_hweight64(__u64 w); __builtin_constant_p(*(const unsigned long *)(addr))) ? \ const##op(nr, addr) : op(nr, addr)) +/* + * The following macros are non-atomic versions of their non-underscored + * counterparts. + */ #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) #define __change_bit(nr, addr) bitop(___change_bit, nr, addr) #define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) + #define test_bit(nr, addr) bitop(_test_bit, nr, addr) #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 634a302a39e3..279b4a640644 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -874,8 +874,8 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? - prog->aux->dst_prog->type : prog->type; + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? + prog->aux->saved_dst_prog_type : prog->type; } static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2594553bb30b..2df665fa2964 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -297,6 +297,15 @@ static inline void *offset_to_ptr(const int *off) #define is_unsigned_type(type) (!is_signed_type(type)) /* + * Useful shorthand for "is this condition known at compile-time?" + * + * Note that the condition may involve non-constant values, + * but the compiler may know enough about the details of the + * values to determine that the condition is statically true. + */ +#define statically_true(x) (__builtin_constant_p(x) && (x)) + +/* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 89f5c34ce4df..9316c39260e0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,7 +100,6 @@ enum cpuhp_state { CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, - CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, @@ -122,6 +121,7 @@ enum cpuhp_state { CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, + CPUHP_TMIGR_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, + CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 099e8b32dd68..53158de44b83 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1033,53 +1033,17 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void -set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &__cpu_possible_mask); - else - cpumask_clear_cpu(cpu, &__cpu_possible_mask); -} +#define assign_cpu(cpu, mask, val) \ + assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) -static inline void -set_cpu_enabled(unsigned int cpu, bool can_be_onlined) -{ - if (can_be_onlined) - cpumask_set_cpu(cpu, &__cpu_enabled_mask); - else - cpumask_clear_cpu(cpu, &__cpu_enabled_mask); -} - -static inline void -set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &__cpu_present_mask); - else - cpumask_clear_cpu(cpu, &__cpu_present_mask); -} +#define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) +#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) +#define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) +#define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) +#define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) void set_cpu_online(unsigned int cpu, bool online); -static inline void -set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &__cpu_active_mask); - else - cpumask_clear_cpu(cpu, &__cpu_active_mask); -} - -static inline void -set_cpu_dying(unsigned int cpu, bool dying) -{ - if (dying) - cpumask_set_cpu(cpu, &__cpu_dying_mask); - else - cpumask_clear_cpu(cpu, &__cpu_dying_mask); -} - /** * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 60b25020281f..0bea1afbd747 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -21,6 +21,21 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +struct cxl_event_media_hdr { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + /* + * The meaning of Validity Flags from bit 2 is + * different across DRAM and General Media records + */ + u8 validity_flags[2]; + u8 channel; + u8 rank; +} __packed; + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_generic { struct cxl_event_record_hdr hdr; @@ -33,14 +48,7 @@ struct cxl_event_generic { */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 device[3]; u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; u8 reserved[46]; @@ -52,14 +60,7 @@ struct cxl_event_gen_media { */ #define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 nibble_mask[3]; u8 bank_group; u8 bank; @@ -95,21 +96,13 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; -/* - * General Media or DRAM Event Common Fields - * - provides common access to phys_addr - */ -struct cxl_event_common { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; -} __packed; - union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; - struct cxl_event_common common; + /* dram & gen_media event header */ + struct cxl_event_media_hdr media_hdr; } __packed; /* diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17..989c94eddb2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739..59b146a14dca 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), * * f = dentry_open(&path, O_RDONLY, current_cred()); * if (IS_ERR(f)) - * return PTR_ERR(fd); + * return PTR_ERR(f); * * fd_install(fd, f); * return take_fd(fd); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 00abe0e5d602..1cca14cf5652 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -462,9 +462,8 @@ struct fw_iso_packet { /* rx: Sync bit, wait for matching sy */ u32 tag:2; /* tx: Tag in packet header */ u32 sy:4; /* tx: Sy in packet header */ - u32 header_length:8; /* Length of immediate header */ - /* tx: Top of 1394 isoch. data_block */ - u32 header[] __counted_by(header_length); + u32 header_length:8; /* Size of immediate header */ + u32 header[]; /* tx: Top of 1394 isoch. data_block */ }; #define FW_ISO_CONTEXT_TRANSMIT 0 diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..fb0426f349fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cff002be83eb..e25d9ebfdf89 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -74,14 +74,20 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* - * Mask of all large folio orders supported for file THP. + * Mask of all large folio orders supported for file THP. Folios in a DAX + * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to + * it. */ -#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DAX \ + (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DEFAULT \ + ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) /* * Mask of all large folio orders supported for THP. */ -#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) +#define THP_ORDERS_ALL \ + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..45bf05ad5c53 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 07e33bbc9256..377def497298 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0ca27dd86956..074f632868d9 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -33,6 +33,7 @@ enum { struct i3c_master_controller; struct i3c_bus; struct i3c_device; +extern const struct bus_type i3c_bus_type; /** * struct i3c_i2c_dev_desc - Common part of the I3C/I2C device descriptor diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3bb6198d1523..3315005df117 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -404,7 +404,7 @@ struct io_ring_ctx { spinlock_t napi_lock; /* napi_list lock */ /* napi busy poll default timeout */ - unsigned int napi_busy_poll_to; + ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; bool napi_enabled; @@ -461,7 +461,6 @@ enum { REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, REQ_F_POLL_NO_LAZY_BIT, - REQ_F_CANCEL_SEQ_BIT, REQ_F_CAN_POLL_BIT, REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, @@ -536,8 +535,6 @@ enum { REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), - /* cancel sequence is set and valid */ - REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), /* file is pollable */ REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), /* buffer list was empty after selection of buffer */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311..04cbdae0052e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a7..b23c6d48392f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -2414,7 +2421,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs); + unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, @@ -2445,11 +2452,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * @@ -2476,8 +2483,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7e2eb091049a..0e5bf25d324f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -109,6 +109,7 @@ struct mem_cgroup_per_node { /* Fields which get updated often at the end. */ struct lruvec lruvec; + CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; }; diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 2ec559284a9f..98008dd92153 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -26,56 +26,100 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* is_signed_type() isn't a constexpr for pointer types */ -#define __is_signed(x) \ - __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ - is_signed_type(typeof(x)), 0) +/* + * __sign_use for integer expressions: + * bit #0 set if ok for unsigned comparisons + * bit #1 set if ok for signed comparisons + * + * In particular, statically non-negative signed integer + * expressions are ok for both. + * + * NOTE! Unsigned types smaller than 'int' are implicitly + * converted to 'int' in expressions, and are accepted for + * signed conversions for now. This is debatable. + * + * Note that 'x' is the original expression, and 'ux' is + * the unique variable that contains the value. + * + * We use 'ux' for pure type checking, and 'x' for when + * we need to look at the value (but without evaluating + * it for side effects! Careful to only ever evaluate it + * with sizeof() or __builtin_constant_p() etc). + * + * Pointers end up being checked by the normal C type + * rules at the actual comparison, and these expressions + * only need to be careful to not cause warnings for + * pointer use. + */ +#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) +#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) +#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ + __signed_type_use(x,ux):__unsigned_type_use(x,ux)) -/* True for a non-negative signed int constant */ -#define __is_noneg_int(x) \ - (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) +/* + * To avoid warnings about casting pointers to integers + * of different sizes, we need that special sign type. + * + * On 64-bit we can just always use 'long', since any + * integer or pointer type can just be cast to that. + * + * This does not work for 128-bit signed integers since + * the cast would truncate them, but we do not use s128 + * types in the kernel (we do use 'u128', but they will + * be handled by the !is_signed_type() case). + * + * NOTE! The cast is there only to avoid any warnings + * from when values that aren't signed integer types. + */ +#ifdef CONFIG_64BIT + #define __signed_type(ux) long +#else + #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#endif +#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) + +#define __types_ok(x,y,ux,uy) \ + (__sign_use(x,ux) & __sign_use(y,uy)) -#define __types_ok(x, y) \ - (__is_signed(x) == __is_signed(y) || \ - __is_signed((x) + 0) == __is_signed((y) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok3(x,y,z,ux,uy,uz) \ + (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) #define __cmp_op_min < #define __cmp_op_max > #define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) -#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - static_assert(__types_ok(x, y), \ - #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ - __cmp(op, unique_x, unique_y); }) +#define __cmp_once_unique(op, type, x, y, ux, uy) \ + ({ type ux = (x); type uy = (y); __cmp(op, ux, uy); }) + +#define __cmp_once(op, type, x, y) \ + __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) + +#define __careful_cmp_once(op, x, y, ux, uy) ({ \ + __auto_type ux = (x); __auto_type uy = (y); \ + BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + #op"("#x", "#y") signedness error"); \ + __cmp(op, ux, uy); }) -#define __careful_cmp(op, x, y) \ - __builtin_choose_expr(__is_constexpr((x) - (y)), \ - __cmp(op, x, y), \ - __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) +#define __careful_cmp(op, x, y) \ + __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ +#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ + __auto_type uval = (val); \ + __auto_type ulo = (lo); \ + __auto_type uhi = (hi); \ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ - __clamp(unique_val, unique_lo, unique_hi); }) + BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ + __clamp(uval, ulo, uhi); }) -#define __careful_clamp(val, lo, hi) ({ \ - __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ - __clamp(val, lo, hi), \ - __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ - __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) +#define __careful_clamp(val, lo, hi) \ + __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) /** * min - return minimum of two values of the same or compatible types @@ -108,13 +152,20 @@ #define umax(x, y) \ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) +#define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ + __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ + BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + #op"3("#x", "#y", "#z") signedness error"); \ + __cmp(op, ux, __cmp(op, uy, uz)); }) + /** * min3 - return minimum of three values * @x: first value * @y: second value * @z: third value */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define min3(x, y, z) \ + __careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * max3 - return maximum of three values @@ -122,7 +173,8 @@ * @y: second value * @z: third value */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) +#define max3(x, y, z) \ + __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero @@ -158,7 +210,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) +#define min_t(type, x, y) __cmp_once(min, type, x, y) /** * max_t - return maximum of two values, using the specified type @@ -166,7 +218,7 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) +#define max_t(type, x, y) __cmp_once(max, type, x, y) /* * Do not check the array parameter using __must_be_array(). @@ -270,4 +322,13 @@ static inline bool in_range32(u32 val, u32 start, u32 len) #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* + * Use these carefully: no type checking, and uses the arguments + * multiple times. Use for obvious constants only. + */ +#define MIN(a,b) __cmp(min,a,b) +#define MAX(a,b) __cmp(max,a,b) +#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) +#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) + #endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 4563b560ced7..6549d0979b28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} @@ -3137,21 +3148,7 @@ extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); /* Free the reserved page into the buddy system, so it gets managed. */ -static inline void free_reserved_page(struct page *page) -{ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - adjust_managed_page_count(page, 1); -} +void free_reserved_page(struct page *page); #define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d0288938cc2..983816608f15 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,8 +73,6 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark - * write to cache on read */ }; /* @@ -269,7 +267,6 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c12a329dd463..7b2ae2e43544 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -485,6 +485,9 @@ enum { NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, + NVME_ID_NS_NVM_QPIF_SHIFT = 9, + NVME_ID_NS_NVM_QPIF_MASK = 0xf, + NVME_ID_NS_NVM_QPIFS = 1 << 3, }; static inline __u8 nvme_elbaf_sts(__u32 elbaf) @@ -497,6 +500,11 @@ static inline __u8 nvme_elbaf_guard_type(__u32 elbaf) return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK; } +static inline __u8 nvme_elbaf_qualified_guard_type(__u32 elbaf) +{ + return (elbaf >> NVME_ID_NS_NVM_QPIF_SHIFT) & NVME_ID_NS_NVM_QPIF_MASK; +} + struct nvme_id_ctrl_nvm { __u8 vsl; __u8 wzsl; @@ -576,6 +584,7 @@ enum { NVME_NVM_NS_16B_GUARD = 0, NVME_NVM_NS_32B_GUARD = 1, NVME_NVM_NS_64B_GUARD = 2, + NVME_NVM_NS_QTYPE_GUARD = 3, }; static inline __u8 nvme_lbaf_index(__u8 flbas) diff --git a/include/linux/of.h b/include/linux/of.h index 13cf7a43b473..85b60ac9eec5 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -430,11 +430,9 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) /* - * struct property *prop; - * const __be32 *p; * u32 u; * - * of_property_for_each_u32(np, "propname", prop, p, u) + * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, @@ -1431,11 +1429,12 @@ static inline int of_property_read_s32(const struct device_node *np, err == 0; \ err = of_phandle_iterator_next(it)) -#define of_property_for_each_u32(np, propname, prop, p, u) \ - for (prop = of_find_property(np, propname, NULL), \ - p = of_prop_next_u32(prop, NULL, &u); \ - p; \ - p = of_prop_next_u32(prop, p, &u)) +#define of_property_for_each_u32(np, propname, u) \ + for (struct {struct property *prop; const __be32 *item; } _it = \ + {of_find_property(np, propname, NULL), \ + of_prop_next_u32(_it.prop, NULL, &u)}; \ + _it.item; \ + _it.item = of_prop_next_u32(_it.prop, _it.item, &u)) #define of_property_for_each_string(np, propname, prop, s) \ for (prop = of_find_property(np, propname, NULL), \ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 547e82cdc89a..fc6b9c87cb0a 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,13 +41,13 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b..54d90b6c5f47 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d9..207f0c83c8e9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/include/linux/profile.h b/include/linux/profile.h index 2fb487f61d12..3f53cdb0c27c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -10,7 +10,6 @@ #define CPU_PROFILING 1 #define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 #define KVM_PROFILING 4 struct proc_dir_entry; diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d..35f039ecb272 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471e..fd35d4ec12e1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); struct buffer_data_read_page; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a03..d47d5f14ff99 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474..b86ddca46b9e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9435185c10ef..42bedcddd511 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -680,7 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; - atomic_t ref; /* ref count for opened files */ + refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 3064314f4832..d8e4105a2f21 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -5,6 +5,7 @@ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/minmax.h> +#include <linux/nospec.h> #include <linux/sched.h> #include <linux/thread_info.h> @@ -138,13 +139,26 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) return raw_copy_to_user(to, from, n); } -#ifdef INLINE_COPY_FROM_USER +/* + * Architectures that #define INLINE_COPY_TO_USER use this function + * directly in the normal copy_to/from_user(), the other ones go + * through an extern _copy_to/from_user(), which expands the same code + * here. + * + * Rust code always uses the extern definition. + */ static inline __must_check unsigned long -_copy_from_user(void *to, const void __user *from, unsigned long n) +_inline_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); @@ -153,14 +167,11 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) memset(to + (n - res), 0, res); return res; } -#else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); -#endif -#ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long -_copy_to_user(void __user *to, const void *from, unsigned long n) +_inline_copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) @@ -171,25 +182,32 @@ _copy_to_user(void __user *to, const void *from, unsigned long n) } return n; } -#else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); -#endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (check_copy_size(to, n, false)) - n = _copy_from_user(to, from, n); - return n; + if (!check_copy_size(to, n, false)) + return n; +#ifdef INLINE_COPY_FROM_USER + return _inline_copy_from_user(to, from, n); +#else + return _copy_from_user(to, from, n); +#endif } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (check_copy_size(from, n, true)) - n = _copy_to_user(to, from, n); - return n; + if (!check_copy_size(from, n, true)) + return n; + +#ifdef INLINE_COPY_TO_USER + return _inline_copy_to_user(to, from, n); +#else + return _copy_to_user(to, from, n); +#endif } #ifndef copy_mc_to_kernel diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ecc5cb7b8c91..4b16844c6bc2 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,6 +10,7 @@ #include <linux/mod_devicetable.h> #include <linux/gfp.h> #include <linux/dma-mapping.h> +#include <linux/completion.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -109,6 +110,8 @@ struct virtio_admin_cmd { __le64 group_member_id; struct scatterlist *data_sg; struct scatterlist *result_sg; + struct completion completion; + int ret; }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ab4b9a3fef6b..169c7d367fac 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -104,8 +104,6 @@ struct virtqueue_info { * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. - * @create_avq: create admin virtqueue resource. - * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -133,8 +131,6 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); - int (*create_avq)(struct virtio_device *vdev); - void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c3..6c395a2600e8 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ retry: if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd17942036..9eb77c9007e6 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,13 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ @@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5..24d970f7a4fa 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -230,8 +230,12 @@ struct vsock_tap { int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); +int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 6439fd8b437b..7caa334f4888 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -275,6 +275,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a6aa112e5741..a51acefa785f 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba return 0; } +static inline bool cs35l56_is_otp_register(unsigned int reg) +{ + return (reg >> 16) == 3; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index ceca69b46a82..bf2e381cd124 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked( const char *pin); /* component controls */ +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl); +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 99c41bfc7827..00fd4d449ff3 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -16,11 +16,11 @@ #define __TAS2781_TLV_H__ static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); -static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); -static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); +static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); /* pow(10, db/20) * pow(2,30) */ -static const unsigned char tas2563_dvc_table[][4] = { +static const __maybe_unused unsigned char tas2563_dvc_table[][4] = { { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index 28c364c63245..d099ae27f849 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank { unsigned char cc_nrpn_msb, cc_nrpn_lsb; unsigned char cc_data_msb, cc_data_lsb; unsigned char cc_bank_msb, cc_bank_lsb; + bool cc_data_msb_set, cc_data_lsb_set; }; /* context for converting from MIDI1 byte stream to UMP packet */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 246c0fbd582e..0a523023bdcc 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 09e72215b9f9..085b749cdd97 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index da23484268df..606b4a0f92da 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -51,6 +51,7 @@ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ @@ -145,6 +146,7 @@ EM(netfs_folio_trace_clear_g, "clear-g") \ EM(netfs_folio_trace_clear_s, "clear-s") \ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h index 79f19e76a80b..47db5eaf2f9a 100644 --- a/include/trace/events/timer_migration.h +++ b/include/trace/events/timer_migration.h @@ -43,7 +43,7 @@ TRACE_EVENT(tmigr_connect_child_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -52,11 +52,11 @@ TRACE_EVENT(tmigr_connect_child_parent, __entry->lvl = child->parent->level; __entry->numa_node = child->parent->numa_node; __entry->num_children = child->parent->num_children; - __entry->childmask = child->childmask; + __entry->groupmask = child->groupmask; ), - TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->child, __entry->childmask, __entry->parent, + TP_printk("group=%p groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->child, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); @@ -72,7 +72,7 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -81,11 +81,11 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __entry->lvl = tmc->tmgroup->level; __entry->numa_node = tmc->tmgroup->numa_node; __entry->num_children = tmc->tmgroup->num_children; - __entry->childmask = tmc->childmask; + __entry->groupmask = tmc->groupmask; ), - TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->cpu, __entry->childmask, __entry->parent, + TP_printk("cpu=%d groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->cpu, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 985a262d0f9e..5bf6148cac2b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) -#define __NR_uretprobe 463 -__SYSCALL(__NR_uretprobe, sys_uretprobe) - #undef __NR_syscalls -#define __NR_syscalls 464 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac..adc2524fd8e3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6..5fad3d0fcd70 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include <linux/ioctl.h> +#include <linux/types.h> #define NSIO 0xb7 @@ -16,7 +17,7 @@ /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) /* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 2289b7c76c59..832c15d9155b 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -51,6 +51,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a43b14276bc3..cac0cdb9a916 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1109,6 +1109,7 @@ struct ufs_hba { bool ext_iid_sup; bool scsi_host_added; bool mcq_sup; + bool lsdb_sup; bool mcq_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65..9917c7743d80 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -77,6 +77,7 @@ enum { MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, MASK_CRYPTO_SUPPORT = 0x10000000, + MASK_LSDB_SUPPORT = 0x20000000, MASK_MCQ_SUPPORT = 0x40000000, }; diff --git a/init/Kconfig b/init/Kconfig index c41260ffe99a..5783a0b87517 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1902,6 +1902,7 @@ config RUST depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT + depends on !SHADOW_CALL_STACK depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE help Enables Rust support in the kernel. @@ -1919,12 +1920,15 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(shell,$(RUSTC) --version 2>/dev/null)" config BINDGEN_VERSION_TEXT string depends on RUST - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n) + # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 + # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when + # the minimum version is upgraded past that (0.69.1 already fixed the issue). + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8e6faa942a6f..3942db160f18 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1849,7 +1849,7 @@ fail: } while (1); /* avoid locking problems by failing it from a clean context */ - if (ret < 0) + if (ret) io_req_task_queue_fail(req, ret); } @@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (uts) { struct timespec64 ts; + ktime_t dt; if (get_timespec64(&ts, uts)) return -EFAULT; - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - io_napi_adjust_timeout(ctx, &iowq, &ts); + dt = timespec64_to_ktime(ts); + iowq.timeout = ktime_add(dt, ktime_get()); + io_napi_adjust_timeout(ctx, &iowq, dt); } if (sig) { @@ -3031,8 +3033,11 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) bool loop = false; io_uring_drop_tctx_refs(current); + if (!tctx_inflight(tctx, !cancel_all)) + break; + /* read completions before cancelations */ - inflight = tctx_inflight(tctx, !cancel_all); + inflight = tctx_inflight(tctx, false); if (!inflight) break; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e1ce908f0679..c2acf6180845 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -43,7 +43,7 @@ struct io_wait_queue { ktime_t timeout; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int napi_busy_poll_to; + ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; #endif }; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 29fa9285a33d..7fd9badcfaf8 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -110,10 +110,10 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) if (spin_trylock(&ctx->msg_lock)) { req = io_alloc_cache_get(&ctx->msg_cache); spin_unlock(&ctx->msg_lock); + if (req) + return req; } - if (req) - return req; - return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN); + return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); } static int io_msg_data_remote(struct io_kiocb *req) diff --git a/io_uring/napi.c b/io_uring/napi.c index 762254a7ff3f..1de1d4d62925 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -26,13 +26,18 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; - e->timeout = jiffies + NAPI_TIMEOUT; return e; } return NULL; } +static inline ktime_t net_to_ktime(unsigned long t) +{ + /* napi approximating usecs, reverse busy_loop_current_time */ + return ns_to_ktime(t << 10); +} + void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) { struct hlist_head *hash_list; @@ -102,14 +107,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) __io_napi_remove_stale(ctx); } -static inline bool io_napi_busy_loop_timeout(unsigned long start_time, - unsigned long bp_usec) +static inline bool io_napi_busy_loop_timeout(ktime_t start_time, + ktime_t bp) { - if (bp_usec) { - unsigned long end_time = start_time + bp_usec; - unsigned long now = busy_loop_current_time(); + if (bp) { + ktime_t end_time = ktime_add(start_time, bp); + ktime_t now = net_to_ktime(busy_loop_current_time()); - return time_after(now, end_time); + return ktime_after(now, end_time); } return true; @@ -124,7 +129,8 @@ static bool io_napi_busy_loop_should_end(void *data, return true; if (io_should_wake(iowq) || io_has_work(iowq->ctx)) return true; - if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) + if (io_napi_busy_loop_timeout(net_to_ktime(start_time), + iowq->napi_busy_poll_dt)) return true; return false; @@ -181,10 +187,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, */ void io_napi_init(struct io_ring_ctx *ctx) { + u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; + INIT_LIST_HEAD(&ctx->napi_list); spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; - ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); + ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); } /* @@ -196,7 +204,6 @@ void io_napi_init(struct io_ring_ctx *ctx) void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - LIST_HEAD(napi_list); unsigned int i; spin_lock(&ctx->napi_lock); @@ -217,11 +224,13 @@ void io_napi_free(struct io_ring_ctx *ctx) int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { - .busy_poll_to = ctx->napi_busy_poll_to, + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; struct io_uring_napi napi; + if (ctx->flags & IORING_SETUP_IOPOLL) + return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) @@ -230,7 +239,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); + WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_enabled, true); return 0; @@ -247,14 +256,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { - .busy_poll_to = ctx->napi_busy_poll_to, + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_to, 0); + WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_enabled, false); return 0; @@ -271,25 +280,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) * the NAPI timeout accordingly. */ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { - unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); - - if (ts) { - struct timespec64 poll_to_ts; - - poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); - if (timespec64_compare(ts, &poll_to_ts) < 0) { - s64 poll_to_ns = timespec64_to_ns(ts); - if (poll_to_ns > 0) { - u64 val = poll_to_ns + 999; - do_div(val, 1000); - poll_to = val; - } - } - } + ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); + + if (to_wait) + poll_dt = min(poll_dt, to_wait); - iowq->napi_busy_poll_to = poll_to; + iowq->napi_busy_poll_dt = poll_dt; } /* @@ -303,7 +301,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); - if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } @@ -315,10 +313,9 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { - LIST_HEAD(napi_list); bool is_stale = false; - if (!READ_ONCE(ctx->napi_busy_poll_to)) + if (!READ_ONCE(ctx->napi_busy_poll_dt)) return 0; if (list_empty_careful(&ctx->napi_list)) return 0; diff --git a/io_uring/napi.h b/io_uring/napi.h index 6fc0393d0dbe..27b88c3eb428 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -18,7 +18,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, - struct io_wait_queue *iowq, struct timespec64 *ts); + struct io_wait_queue *iowq, ktime_t to_wait); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -29,11 +29,11 @@ static inline bool io_napi(struct io_ring_ctx *ctx) static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { if (!io_napi(ctx)) return; - __io_napi_adjust_timeout(ctx, iowq, ts); + __io_napi_adjust_timeout(ctx, iowq, to_wait); } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, @@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_busy_poll_to)) + if (!READ_ONCE(ctx->napi_enabled)) return; sock = sock_from_file(req->file); @@ -88,7 +88,7 @@ static inline void io_napi_add(struct io_kiocb *req) } static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, diff --git a/io_uring/net.c b/io_uring/net.c index 594490a1389b..d08abcca89cc 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -601,17 +601,18 @@ retry_bundle: .iovs = &kmsg->fast_iov, .max_len = INT_MAX, .nr_iovs = 1, - .mode = KBUF_MODE_EXPAND, }; if (kmsg->free_iov) { arg.nr_iovs = kmsg->free_iov_nr; arg.iovs = kmsg->free_iov; - arg.mode |= KBUF_MODE_FREE; + arg.mode = KBUF_MODE_FREE; } if (!(sr->flags & IORING_RECVSEND_BUNDLE)) arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; ret = io_buffers_select(req, &arg, issue_flags); if (unlikely(ret < 0)) @@ -623,6 +624,7 @@ retry_bundle: if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } @@ -1094,6 +1096,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } else { void __user *buf; diff --git a/io_uring/poll.c b/io_uring/poll.c index 0a8e02944689..1f63b60e85e7 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -347,6 +347,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); + io_napi_add(req); return IOU_POLL_NO_ACTION; } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index b3722e5275e7..3b50dc9586d1 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -44,7 +44,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(data_race(sqd->thread) == current); atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 1c9bf07499b1..9973876d91b0 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -639,7 +639,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) - __must_hold(&req->ctx->timeout_lock) + __must_hold(&head->ctx->timeout_lock) { struct io_kiocb *req; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index a54163a83968..8391c7c7c1ec 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -265,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); io_req_set_res(req, ret, 0); - return ret < 0 ? ret : IOU_OK; + return IOU_OK; } int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 543b8c9edac7..148536446457 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17390,8 +17390,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; if (exact != NOT_EXACT && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026..b1fd2a3db91a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd1..64d44a52c011 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index a6e3792b15f8..d570535342cb 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -416,8 +416,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) diff --git a/kernel/events/core.c b/kernel/events/core.c index aa3450bdc227..c973e3c11e03 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9706,7 +9706,8 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); - if (event->prog && !bpf_overflow_handler(event, data, regs)) + if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT && + !bpf_overflow_handler(event, data, regs)) return ret; /* diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f201..18bdc87209d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,11 +2053,24 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - bool thread = flags & PIDFD_THREAD; - - if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + if (!pid) return -EINVAL; + scoped_guard(rcu) { + struct task_struct *tsk; + + if (flags & PIDFD_THREAD) + tsk = pid_task(pid, PIDTYPE_PID); + else + tsk = pid_task(pid, PIDTYPE_TGID); + if (!tsk) + return -EINVAL; + + /* Don't create pidfds for kernel threads for now. */ + if (tsk->flags & PF_KTHREAD) + return -EINVAL; + } + return __pidfd_prepare(pid, flags, ret); } @@ -2403,6 +2416,12 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; + /* Don't create pidfds for kernel threads for now. */ + if (args->kthread) { + retval = -EINVAL; + goto bad_fork_free_pid; + } + /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 07e99c936ba5..1dee88ba0ae4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -530,6 +530,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 4ad5ed8adf96..6dc76b590703 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -236,7 +236,7 @@ void static_key_disable_cpuslocked(struct static_key *key) } jump_label_lock(); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); jump_label_unlock(); } @@ -289,7 +289,7 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key) return; guard(mutex)(&jump_label_mutex); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); else WARN_ON_ONCE(!static_key_slow_try_dec(key)); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fb2c77368d18..a9a0ca605d4a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static void cleanup_symbol_name(char *s) -{ - char *res; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes only in LLVM LTO observed: - * - foo.llvm.[0-9a-f]+ - */ - res = strstr(s, ".llvm."); - if (res) - *res = '\0'; - - return; -} - -static int compare_symbol_name(const char *name, char *namebuf) -{ - /* The kallsyms_seqs_of_names is sorted based on names after - * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. - * To ensure correct bisection in kallsyms_lookup_names(), do - * cleanup_symbol_name(namebuf) before comparing name and namebuf. - */ - cleanup_symbol_name(namebuf); - return strcmp(name, namebuf); -} - static unsigned int get_symbol_seq(int index) { unsigned int i, seq = 0; @@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(mid); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - ret = compare_symbol_name(name, namebuf); + ret = strcmp(name, namebuf); if (ret > 0) low = mid + 1; else if (ret < 0) @@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(low - 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; low--; } @@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(high + 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; high++; } @@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = strlen(namebuf); - goto found; + return strlen(namebuf); } /* See if it's in a module or a BPF JITed image. */ @@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); -found: - cleanup_symbol_name(namebuf); return ret; } @@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr, int lookup_symbol_name(unsigned long addr, char *symname) { - int res; - symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - goto found; + return 0; } /* See if it's in a module. */ - res = lookup_module_symbol_name(addr, symname); - if (res) - return res; - -found: - cleanup_symbol_name(symname); - return 0; + return lookup_module_symbol_name(addr, symname); } /* Look up a kernel symbol and return it in a text buffer. */ diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 2f84896a7bcb..873f7c445488 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void) stat.min, stat.max, div_u64(stat.sum, stat.real_cnt)); } -static bool match_cleanup_name(const char *s, const char *name) -{ - char *p; - int len; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - p = strstr(s, ".llvm."); - if (!p) - return false; - - len = strlen(name); - if (p - s != len) - return false; - - return !strncmp(s, name, len); -} - static int find_symbol(void *data, const char *name, unsigned long addr) { struct test_stat *stat = (struct test_stat *)data; - if (strcmp(name, stat->name) == 0 || - (!stat->perf && match_cleanup_name(name, stat->name))) { + if (!strcmp(name, stat->name)) { stat->real_cnt++; stat->addr = addr; diff --git a/kernel/kcov.c b/kernel/kcov.c index f0a69d402066..274b6b7c718d 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area, kmsan_unpoison_memory(&area->list, sizeof(area->list)); } +/* + * Unlike in_serving_softirq(), this function returns false when called during + * a hardirq or an NMI that happened in the softirq context. + */ +static inline bool in_softirq_really(void) +{ + return in_serving_softirq() && !in_hardirq() && !in_nmi(); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * so we ignore code executed in interrupts, unless we are in a remote * coverage collection section in a softirq. */ - if (!in_task() && !(in_serving_softirq() && t->kcov_softirq)) + if (!in_task() && !(in_softirq_really() && t->kcov_softirq)) return false; mode = READ_ONCE(t->kcov_mode); /* @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle) if (WARN_ON(!kcov_check_handle(handle, true, true, true))) return; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); @@ -991,7 +1000,7 @@ void kcov_remote_stop(void) int sequence; unsigned long flags; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e85de37d9e1e..da59c68df841 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr) if (lookup_symbol_name(addr, symbuf)) return false; - return str_has_prefix("__cfi_", symbuf) || - str_has_prefix("__pfx_", symbuf); + return str_has_prefix(symbuf, "__cfi_") || + str_has_prefix(symbuf, "__pfx_"); } static int check_kprobe_address_safe(struct kprobe *p, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 07fb5987b42b..1bab21b4718f 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -92,7 +92,14 @@ static ssize_t profiling_store(struct kobject *kobj, const char *buf, size_t count) { int ret; + static DEFINE_MUTEX(lock); + /* + * We need serialization, for profile_setup() initializes prof_on + * value and profile_init() must not reallocate prof_buffer after + * once allocated. + */ + guard(mutex)(&lock); if (prof_on) return -EEXIST; /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 58c88220a478..0349f957e672 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, ip); @@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, _RET_IP_); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index f5a36e67b593..ac2e22502741 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - enum vcpu_state old = vcpu_halted; + u8 old = vcpu_halted; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will diff --git a/kernel/module/main.c b/kernel/module/main.c index d9592195c5bb..71396e297499 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3104,7 +3104,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) struct idempotent *existing; bool first; - u->ret = 0; + u->ret = -EINTR; u->cookie = cookie; init_completion(&u->complete); @@ -3140,7 +3140,7 @@ static int idempotent_complete(struct idempotent *u, int ret) hlist_for_each_entry_safe(pos, next, head, entry) { if (pos->cookie != cookie) continue; - hlist_del(&pos->entry); + hlist_del_init(&pos->entry); pos->ret = ret; complete(&pos->complete); } @@ -3148,6 +3148,28 @@ static int idempotent_complete(struct idempotent *u, int ret) return ret; } +/* + * Wait for the idempotent worker. + * + * If we get interrupted, we need to remove ourselves from the + * the idempotent list, and the completion may still come in. + * + * The 'idem_lock' protects against the race, and 'idem.ret' was + * initialized to -EINTR and is thus always the right return + * value even if the idempotent work then completes between + * the wait_for_completion and the cleanup. + */ +static int idempotent_wait_for_completion(struct idempotent *u) +{ + if (wait_for_completion_interruptible(&u->complete)) { + spin_lock(&idem_lock); + if (!hlist_unhashed(&u->entry)) + hlist_del(&u->entry); + spin_unlock(&idem_lock); + } + return u->ret; +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; @@ -3183,15 +3205,16 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int if (!f || !(f->f_mode & FMODE_READ)) return -EBADF; - /* See if somebody else is doing the operation? */ - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; + /* Are we the winners of the race and get to do this? */ + if (!idempotent(&idem, file_inode(f))) { + int ret = init_module_from_file(f, uargs, flags); + return idempotent_complete(&idem, ret); } - /* Otherwise, we'll do it and complete others */ - return idempotent_complete(&idem, - init_module_from_file(f, uargs, flags)); + /* + * Somebody else won the race and is loading the module. + */ + return idempotent_wait_for_completion(&idem); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) diff --git a/kernel/padata.c b/kernel/padata.c index 53f4bc912712..0fa6c2895460 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -517,6 +517,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) if (job->numa_aware) { int old_node = atomic_read(&last_used_nid); diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925..2a0449144f82 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fd..c22b07049c38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { diff --git a/kernel/profile.c b/kernel/profile.c index 2b775cc5c28f..1fcf1adcf4eb 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -47,30 +47,14 @@ static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); -static cpumask_var_t prof_cpu_mask; -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); -static DEFINE_PER_CPU(int, cpu_profile_flip); -static DEFINE_MUTEX(profile_flip_mutex); -#endif /* CONFIG_SMP */ - int profile_setup(char *str) { static const char schedstr[] = "schedule"; - static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; const char *select = NULL; int par; - if (!strncmp(str, sleepstr, strlen(sleepstr))) { -#ifdef CONFIG_SCHEDSTATS - force_schedstat_enabled(); - prof_on = SLEEP_PROFILING; - select = sleepstr; -#else - pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); -#endif /* CONFIG_SCHEDSTATS */ - } else if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { @@ -114,11 +98,6 @@ int __ref profile_init(void) buffer_bytes = prof_len*sizeof(atomic_t); - if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN); if (prof_buffer) return 0; @@ -132,195 +111,16 @@ int __ref profile_init(void) if (prof_buffer) return 0; - free_cpumask_var(prof_cpu_mask); return -ENOMEM; } -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -/* - * Each cpu has a pair of open-addressed hashtables for pending - * profile hits. read_profile() IPI's all cpus to request them - * to flip buffers and flushes their contents to prof_buffer itself. - * Flip requests are serialized by the profile_flip_mutex. The sole - * use of having a second hashtable is for avoiding cacheline - * contention that would otherwise happen during flushes of pending - * profile hits required for the accuracy of reported profile hits - * and so resurrect the interrupt livelock issue. - * - * The open-addressed hashtables are indexed by profile buffer slot - * and hold the number of pending hits to that profile buffer slot on - * a cpu in an entry. When the hashtable overflows, all pending hits - * are accounted to their corresponding profile buffer slots with - * atomic_add() and the hashtable emptied. As numerous pending hits - * may be accounted to a profile buffer slot in a hashtable entry, - * this amortizes a number of atomic profile buffer increments likely - * to be far larger than the number of entries in the hashtable, - * particularly given that the number of distinct profile buffer - * positions to which hits are accounted during short intervals (e.g. - * several seconds) is usually very small. Exclusion from buffer - * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from - * process context). - * The hash function is meant to be lightweight as opposed to strong, - * and was vaguely inspired by ppc64 firmware-supported inverted - * pagetable hash functions, but uses a full hashtable full of finite - * collision chains, not just pairs of them. - * - * -- nyc - */ -static void __profile_flip_buffers(void *unused) -{ - int cpu = smp_processor_id(); - - per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); -} - -static void profile_flip_buffers(void) -{ - int i, j, cpu; - - mutex_lock(&profile_flip_mutex); - j = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; - for (i = 0; i < NR_PROFILE_HIT; ++i) { - if (!hits[i].hits) { - if (hits[i].pc) - hits[i].pc = 0; - continue; - } - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].hits = hits[i].pc = 0; - } - } - mutex_unlock(&profile_flip_mutex); -} - -static void profile_discard_flip_buffers(void) -{ - int i, cpu; - - mutex_lock(&profile_flip_mutex); - i = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; - memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); - } - mutex_unlock(&profile_flip_mutex); -} - -static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) -{ - unsigned long primary, secondary, flags, pc = (unsigned long)__pc; - int i, j, cpu; - struct profile_hit *hits; - - pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); - i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - cpu = get_cpu(); - hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; - if (!hits) { - put_cpu(); - return; - } - /* - * We buffer the global profiler buffer into a per-CPU - * queue and thus reduce the number of global (and possibly - * NUMA-alien) accesses. The write-queue is self-coalescing: - */ - local_irq_save(flags); - do { - for (j = 0; j < PROFILE_GRPSZ; ++j) { - if (hits[i + j].pc == pc) { - hits[i + j].hits += nr_hits; - goto out; - } else if (!hits[i + j].hits) { - hits[i + j].pc = pc; - hits[i + j].hits = nr_hits; - goto out; - } - } - i = (i + secondary) & (NR_PROFILE_HIT - 1); - } while (i != primary); - - /* - * Add the current hit(s) and flush the write-queue out - * to the global buffer: - */ - atomic_add(nr_hits, &prof_buffer[pc]); - for (i = 0; i < NR_PROFILE_HIT; ++i) { - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].pc = hits[i].hits = 0; - } -out: - local_irq_restore(flags); - put_cpu(); -} - -static int profile_dead_cpu(unsigned int cpu) -{ - struct page *page; - int i; - - if (cpumask_available(prof_cpu_mask)) - cpumask_clear_cpu(cpu, prof_cpu_mask); - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); - per_cpu(cpu_profile_hits, cpu)[i] = NULL; - __free_page(page); - } - } - return 0; -} - -static int profile_prepare_cpu(unsigned int cpu) -{ - int i, node = cpu_to_mem(cpu); - struct page *page; - - per_cpu(cpu_profile_flip, cpu) = 0; - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) - continue; - - page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - profile_dead_cpu(cpu); - return -ENOMEM; - } - per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); - - } - return 0; -} - -static int profile_online_cpu(unsigned int cpu) -{ - if (cpumask_available(prof_cpu_mask)) - cpumask_set_cpu(cpu, prof_cpu_mask); - - return 0; -} - -#else /* !CONFIG_SMP */ -#define profile_flip_buffers() do { } while (0) -#define profile_discard_flip_buffers() do { } while (0) - static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); + if (pc < prof_len) + atomic_add(nr_hits, &prof_buffer[pc]); } -#endif /* !CONFIG_SMP */ void profile_hits(int type, void *__pc, unsigned int nr_hits) { @@ -334,8 +134,8 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && - cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) + /* This is the old kernel-only legacy profiling */ + if (!user_mode(regs)) profile_hit(type, (void *)profile_pc(regs)); } @@ -358,7 +158,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) char *pnt; unsigned long sample_step = 1UL << prof_shift; - profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) @@ -404,7 +203,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return -EINVAL; } #endif - profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } @@ -418,40 +216,14 @@ static const struct proc_ops profile_proc_ops = { int __ref create_proc_profile(void) { struct proc_dir_entry *entry; -#ifdef CONFIG_SMP - enum cpuhp_state online_state; -#endif - int err = 0; if (!prof_on) return 0; -#ifdef CONFIG_SMP - err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", - profile_prepare_cpu, profile_dead_cpu); - if (err) - return err; - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", - profile_online_cpu, NULL); - if (err < 0) - goto err_state_prep; - online_state = err; - err = 0; -#endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &profile_proc_ops); - if (!entry) - goto err_state_onl; - proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); - - return err; -err_state_onl: -#ifdef CONFIG_SMP - cpuhp_remove_state(online_state); -err_state_prep: - cpuhp_remove_state(CPUHP_PROFILE_PREPARE); -#endif + if (entry) + proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); return err; } subsys_initcall(create_proc_profile); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a9f655025607..f3951e4a55e5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7845,6 +7845,30 @@ void set_rq_offline(struct rq *rq) } } +static inline void sched_set_rq_online(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_online(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + +static inline void sched_set_rq_offline(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + /* * used to mark begin/end of suspend/resume: */ @@ -7895,10 +7919,25 @@ static int cpuset_cpu_inactive(unsigned int cpu) return 0; } +static inline void sched_smt_present_inc(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); +#endif +} + +static inline void sched_smt_present_dec(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif +} + int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; /* * Clear the balance_push callback and prepare to schedule @@ -7906,13 +7945,10 @@ int sched_cpu_activate(unsigned int cpu) */ balance_push_set(cpu, false); -#ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_inc_cpuslocked(&sched_smt_present); -#endif + sched_smt_present_inc(cpu); set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -7930,12 +7966,7 @@ int sched_cpu_activate(unsigned int cpu) * 2) At runtime, if cpuset_cpu_active() fails to rebuild the * domains. */ - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_online(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_online(rq, cpu); return 0; } @@ -7943,7 +7974,6 @@ int sched_cpu_activate(unsigned int cpu) int sched_cpu_deactivate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; int ret; /* @@ -7974,20 +8004,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu(); - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_offline(rq, cpu); -#ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_dec_cpuslocked(&sched_smt_present); + sched_smt_present_dec(cpu); +#ifdef CONFIG_SCHED_SMT sched_core_cpu_deactivate(cpu); #endif @@ -7997,6 +8021,8 @@ int sched_cpu_deactivate(unsigned int cpu) sched_update_numa(cpu, false); ret = cpuset_cpu_inactive(cpu); if (ret) { + sched_smt_present_inc(cpu); + sched_set_rq_online(rq, cpu); balance_push_set(cpu, false); set_cpu_active(cpu, true); sched_update_numa(cpu, true); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a5e00293ae43..0bed0fa1acd9 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, } stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); + /* + * Because mul_u64_u64_div_u64() can approximate on some + * achitectures; enforce the constraint that: a*b/(b+c) <= a. + */ + if (unlikely(stime > rtime)) + stime = rtime; update: /* diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 78e48f5426ee..eb0cdcd4d921 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, trace_sched_stat_blocked(p, delta); - /* - * Blocking time is in units of nanosecs, so shift by - * 20 to get a milliseconds-range estimation of the - * amount of time that the task spent sleeping: - */ - if (unlikely(prof_on == SLEEP_PROFILING)) { - profile_hits(SLEEP_PROFILING, - (void *)get_wchan(p), - delta >> 20); - } account_scheduler_latency(p, delta >> 10, 0); } } diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c2daa7ad3f9..5d14d639ac71 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -6,12 +6,14 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +#ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); +#endif /** * task_work_add - ask the @task to execute @work->func() @@ -57,6 +59,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work, if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; + if (!IS_ENABLED(CONFIG_IRQ_WORK)) + return -EINVAL; } else { /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); @@ -81,9 +85,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; +#ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; +#endif default: WARN_ON_ONCE(1); break; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d25ba49e313c..d0538a75f4c6 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); if (wd_delay <= WATCHDOG_MAX_SKEW) { - if (nretries > 1 || nretries >= max_retries) { + if (nretries > 1 && nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 406dccb79c2b..8d2dd214ec68 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -727,17 +727,16 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, } if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; + time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; + time_constant = clamp(txc->constant, 0, MAXTC); if (!(time_status & STA_NANO)) time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); + time_constant = clamp(time_constant, 0, MAXTC); } if (txc->modes & ADJ_TAI && diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b4843099a8da..ed58eebb4e8f 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,7 +1141,6 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1167,6 +1166,8 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2fa87dcfeda9..5391e4167d60 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2606,7 +2606,7 @@ int do_adjtimex(struct __kernel_timex *txc) clock_set |= timekeeping_advance(TK_ADV_FREQ); if (clock_set) - clock_was_set(CLOCK_REALTIME); + clock_was_set(CLOCK_SET_WALL); ntp_notify_cmos_timer(); diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 84413114db5c..8d57f7686bb0 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -475,9 +475,54 @@ static bool tmigr_check_lonely(struct tmigr_group *group) return bitmap_weight(&active, BIT_CNT) <= 1; } -typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *); +/** + * struct tmigr_walk - data required for walking the hierarchy + * @nextexp: Next CPU event expiry information which is handed into + * the timer migration code by the timer code + * (get_next_timer_interrupt()) + * @firstexp: Contains the first event expiry information when + * hierarchy is completely idle. When CPU itself was the + * last going idle, information makes sure, that CPU will + * be back in time. When using this value in the remote + * expiry case, firstexp is stored in the per CPU tmigr_cpu + * struct of CPU which expires remote timers. It is updated + * in top level group only. Be aware, there could occur a + * new top level of the hierarchy between the 'top level + * call' in tmigr_update_events() and the check for the + * parent group in walk_groups(). Then @firstexp might + * contain a value != KTIME_MAX even if it was not the + * final top level. This is not a problem, as the worst + * outcome is a CPU which might wake up a little early. + * @evt: Pointer to tmigr_event which needs to be queued (of idle + * child group) + * @childmask: groupmask of child group + * @remote: Is set, when the new timer path is executed in + * tmigr_handle_remote_cpu() + * @basej: timer base in jiffies + * @now: timer base monotonic + * @check: is set if there is the need to handle remote timers; + * required in tmigr_requires_handle_remote() only + * @tmc_active: this flag indicates, whether the CPU which triggers + * the hierarchy walk is !idle in the timer migration + * hierarchy. When the CPU is idle and the whole hierarchy is + * idle, only the first event of the top level has to be + * considered. + */ +struct tmigr_walk { + u64 nextexp; + u64 firstexp; + struct tmigr_event *evt; + u8 childmask; + bool remote; + unsigned long basej; + u64 now; + bool check; + bool tmc_active; +}; + +typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, struct tmigr_walk *); -static void __walk_groups(up_f up, void *data, +static void __walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { struct tmigr_group *child = NULL, *group = tmc->tmgroup; @@ -490,64 +535,17 @@ static void __walk_groups(up_f up, void *data, child = group; group = group->parent; + data->childmask = child->groupmask; } while (group); } -static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc) +static void walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { lockdep_assert_held(&tmc->lock); __walk_groups(up, data, tmc); } -/** - * struct tmigr_walk - data required for walking the hierarchy - * @nextexp: Next CPU event expiry information which is handed into - * the timer migration code by the timer code - * (get_next_timer_interrupt()) - * @firstexp: Contains the first event expiry information when last - * active CPU of hierarchy is on the way to idle to make - * sure CPU will be back in time. - * @evt: Pointer to tmigr_event which needs to be queued (of idle - * child group) - * @childmask: childmask of child group - * @remote: Is set, when the new timer path is executed in - * tmigr_handle_remote_cpu() - */ -struct tmigr_walk { - u64 nextexp; - u64 firstexp; - struct tmigr_event *evt; - u8 childmask; - bool remote; -}; - -/** - * struct tmigr_remote_data - data required for remote expiry hierarchy walk - * @basej: timer base in jiffies - * @now: timer base monotonic - * @firstexp: returns expiry of the first timer in the idle timer - * migration hierarchy to make sure the timer is handled in - * time; it is stored in the per CPU tmigr_cpu struct of - * CPU which expires remote timers - * @childmask: childmask of child group - * @check: is set if there is the need to handle remote timers; - * required in tmigr_requires_handle_remote() only - * @tmc_active: this flag indicates, whether the CPU which triggers - * the hierarchy walk is !idle in the timer migration - * hierarchy. When the CPU is idle and the whole hierarchy is - * idle, only the first event of the top level has to be - * considered. - */ -struct tmigr_remote_data { - unsigned long basej; - u64 now; - u64 firstexp; - u8 childmask; - bool check; - bool tmc_active; -}; - /* * Returns the next event of the timerqueue @group->events * @@ -618,10 +616,9 @@ static u64 tmigr_next_groupevt_expires(struct tmigr_group *group) static bool tmigr_active_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -649,8 +646,7 @@ static bool tmigr_active_up(struct tmigr_group *group, } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); - if ((walk_done == false) && group->parent) - data->childmask = group->childmask; + trace_tmigr_group_set_cpu_active(group, newstate, childmask); /* * The group is active (again). The group event might be still queued @@ -666,8 +662,6 @@ static bool tmigr_active_up(struct tmigr_group *group, */ group->groupevt.ignore = true; - trace_tmigr_group_set_cpu_active(group, newstate, childmask); - return walk_done; } @@ -675,7 +669,7 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc) { struct tmigr_walk data; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; trace_tmigr_cpu_active(tmc); @@ -860,10 +854,8 @@ unlock: static bool tmigr_new_timer_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_walk *data = ptr; - return tmigr_update_events(group, child, data); } @@ -995,9 +987,8 @@ unlock: static bool tmigr_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; struct tmigr_event *evt; unsigned long jif; u8 childmask; @@ -1034,12 +1025,10 @@ again: } /* - * Update of childmask for the next level and keep track of the expiry - * of the first event that needs to be handled (group->next_expiry was - * updated by tmigr_next_expired_groupevt(), next was set by - * tmigr_handle_remote_cpu()). + * Keep track of the expiry of the first event that needs to be handled + * (group->next_expiry was updated by tmigr_next_expired_groupevt(), + * next was set by tmigr_handle_remote_cpu()). */ - data->childmask = group->childmask; data->firstexp = group->next_expiry; raw_spin_unlock_irq(&group->lock); @@ -1055,12 +1044,12 @@ again: void tmigr_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; if (tmigr_is_not_available(tmc)) return; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; /* @@ -1068,7 +1057,7 @@ void tmigr_handle_remote(void) * in tmigr_handle_remote_up() anyway. Keep this check to speed up the * return when nothing has to be done. */ - if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask)) { + if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { /* * If this CPU was an idle migrator, make sure to clear its wakeup * value so it won't chase timers that have already expired elsewhere. @@ -1097,9 +1086,8 @@ void tmigr_handle_remote(void) static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; u8 childmask; childmask = data->childmask; @@ -1118,7 +1106,7 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, * group before reading the next_expiry value. */ if (group->parent && !data->tmc_active) - goto out; + return false; /* * The lock is required on 32bit architectures to read the variable @@ -1143,9 +1131,6 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, raw_spin_unlock(&group->lock); } -out: - /* Update of childmask for the next level */ - data->childmask = group->childmask; return false; } @@ -1157,7 +1142,7 @@ out: bool tmigr_requires_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; unsigned long jif; bool ret = false; @@ -1165,7 +1150,7 @@ bool tmigr_requires_handle_remote(void) return ret; data.now = get_jiffies_update(&jif); - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; data.tmc_active = !tmc->idle; data.check = false; @@ -1230,14 +1215,13 @@ u64 tmigr_cpu_new_timer(u64 nextexp) if (nextexp != tmc->cpuevt.nextevt.expires || tmc->cpuevt.ignore) { ret = tmigr_new_timer(tmc, nextexp); + /* + * Make sure the reevaluation of timers in idle path + * will not miss an event. + */ + WRITE_ONCE(tmc->wakeup, ret); } } - /* - * Make sure the reevaluation of timers in idle path will not miss an - * event. - */ - WRITE_ONCE(tmc->wakeup, ret); - trace_tmigr_cpu_new_timer_idle(tmc, nextexp); raw_spin_unlock(&tmc->lock); return ret; @@ -1245,10 +1229,9 @@ u64 tmigr_cpu_new_timer(u64 nextexp) static bool tmigr_inactive_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate, childstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -1299,9 +1282,10 @@ static bool tmigr_inactive_up(struct tmigr_group *group, WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active)); - if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, - newstate.state)) + if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { + trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); break; + } /* * The memory barrier is paired with the cmpxchg() in @@ -1317,22 +1301,6 @@ static bool tmigr_inactive_up(struct tmigr_group *group, /* Event Handling */ tmigr_update_events(group, child, data); - if (group->parent && (walk_done == false)) - data->childmask = group->childmask; - - /* - * data->firstexp was set by tmigr_update_events() and contains the - * expiry of the first global event which needs to be handled. It - * differs from KTIME_MAX if: - * - group is the top level group and - * - group is idle (which means CPU was the last active CPU in the - * hierarchy) and - * - there is a pending event in the hierarchy - */ - WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent); - - trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); - return walk_done; } @@ -1341,7 +1309,7 @@ static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp) struct tmigr_walk data = { .nextexp = nextexp, .firstexp = KTIME_MAX, .evt = &tmc->cpuevt, - .childmask = tmc->childmask }; + .childmask = tmc->groupmask }; /* * If nextexp is KTIME_MAX, the CPU event will be ignored because the @@ -1400,7 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp) * the only one in the level 0 group; and if it is the * only one in level 0 group, but there are more than a * single group active on the way to top level) - * * nextevt - when CPU is offline and has to handle timer on his own + * * nextevt - when CPU is offline and has to handle timer on its own * or when on the way to top in every group only a single * child is active but @nextevt is before the lowest * next_expiry encountered while walking up to top level. @@ -1419,7 +1387,7 @@ u64 tmigr_quick_check(u64 nextevt) if (WARN_ON_ONCE(tmc->idle)) return nextevt; - if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask)) + if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) return KTIME_MAX; do { @@ -1442,6 +1410,66 @@ u64 tmigr_quick_check(u64 nextevt) return KTIME_MAX; } +/* + * tmigr_trigger_active() - trigger a CPU to become active again + * + * This function is executed on a CPU which is part of cpu_online_mask, when the + * last active CPU in the hierarchy is offlining. With this, it is ensured that + * the other CPU is active and takes over the migrator duty. + */ +static long tmigr_trigger_active(void *unused) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + WARN_ON_ONCE(!tmc->online || tmc->idle); + + return 0; +} + +static int tmigr_cpu_offline(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + int migrator; + u64 firstexp; + + raw_spin_lock_irq(&tmc->lock); + tmc->online = false; + WRITE_ONCE(tmc->wakeup, KTIME_MAX); + + /* + * CPU has to handle the local events on his own, when on the way to + * offline; Therefore nextevt value is set to KTIME_MAX + */ + firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); + trace_tmigr_cpu_offline(tmc); + raw_spin_unlock_irq(&tmc->lock); + + if (firstexp != KTIME_MAX) { + migrator = cpumask_any_but(cpu_online_mask, cpu); + work_on_cpu(migrator, tmigr_trigger_active, NULL); + } + + return 0; +} + +static int tmigr_cpu_online(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + /* Check whether CPU data was successfully initialized */ + if (WARN_ON_ONCE(!tmc->tmgroup)) + return -EINVAL; + + raw_spin_lock_irq(&tmc->lock); + trace_tmigr_cpu_online(tmc); + tmc->idle = timer_base_is_idle(); + if (!tmc->idle) + __tmigr_cpu_activate(tmc); + tmc->online = true; + raw_spin_unlock_irq(&tmc->lock); + return 0; +} + static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, int node) { @@ -1514,21 +1542,25 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, } static void tmigr_connect_child_parent(struct tmigr_group *child, - struct tmigr_group *parent) + struct tmigr_group *parent, + bool activate) { - union tmigr_state childstate; + struct tmigr_walk data; raw_spin_lock_irq(&child->lock); raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); child->parent = parent; - child->childmask = BIT(parent->num_children++); + child->groupmask = BIT(parent->num_children++); raw_spin_unlock(&parent->lock); raw_spin_unlock_irq(&child->lock); trace_tmigr_connect_child_parent(child); + if (!activate) + return; + /* * To prevent inconsistent states, active children need to be active in * the new parent as well. Inactive children are already marked inactive @@ -1544,21 +1576,24 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, * child to the new parent. So tmigr_connect_child_parent() is * executed with the formerly top level group (child) and the newly * created group (parent). + * + * * It is ensured that the child is active, as this setup path is + * executed in hotplug prepare callback. This is exectued by an + * already connected and !idle CPU. Even if all other CPUs go idle, + * the CPU executing the setup will be responsible up to current top + * level group. And the next time it goes inactive, it will release + * the new childmask and parent to subsequent walkers through this + * @child. Therefore propagate active state unconditionally. */ - childstate.state = atomic_read(&child->migr_state); - if (childstate.migrator != TMIGR_NONE) { - struct tmigr_walk data; - - data.childmask = child->childmask; + data.childmask = child->groupmask; - /* - * There is only one new level per time. When connecting the - * child and the parent and set the child active when the parent - * is inactive, the parent needs to be the uppermost - * level. Otherwise there went something wrong! - */ - WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); - } + /* + * There is only one new level per time (which is protected by + * tmigr_mutex). When connecting the child and the parent and set the + * child active when the parent is inactive, the parent needs to be the + * uppermost level. Otherwise there went something wrong! + */ + WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); } static int tmigr_setup_groups(unsigned int cpu, unsigned int node) @@ -1611,12 +1646,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) * Update tmc -> group / child -> group connection */ if (i == 0) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); raw_spin_lock_irq(&group->lock); tmc->tmgroup = group; - tmc->childmask = BIT(group->num_children++); + tmc->groupmask = BIT(group->num_children++); raw_spin_unlock_irq(&group->lock); @@ -1626,7 +1661,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) continue; } else { child = stack[i - 1]; - tmigr_connect_child_parent(child, group); + /* Will be activated at online time */ + tmigr_connect_child_parent(child, group, false); } /* check if uppermost level was newly created */ @@ -1637,12 +1673,21 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) lvllist = &tmigr_level_list[top]; if (group->num_children == 1 && list_is_singular(lvllist)) { + /* + * The target CPU must never do the prepare work, except + * on early boot when the boot CPU is the target. Otherwise + * it may spuriously activate the old top level group inside + * the new one (nevertheless whether old top level group is + * active or not) and/or release an uninitialized childmask. + */ + WARN_ON_ONCE(cpu == raw_smp_processor_id()); + lvllist = &tmigr_level_list[top - 1]; list_for_each_entry(child, lvllist, list) { if (child->parent) continue; - tmigr_connect_child_parent(child, group); + tmigr_connect_child_parent(child, group, true); } } } @@ -1664,80 +1709,31 @@ static int tmigr_add_cpu(unsigned int cpu) return ret; } -static int tmigr_cpu_online(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int ret; - - /* First online attempt? Initialize CPU data */ - if (!tmc->tmgroup) { - raw_spin_lock_init(&tmc->lock); - - ret = tmigr_add_cpu(cpu); - if (ret < 0) - return ret; - - if (tmc->childmask == 0) - return -EINVAL; - - timerqueue_init(&tmc->cpuevt.nextevt); - tmc->cpuevt.nextevt.expires = KTIME_MAX; - tmc->cpuevt.ignore = true; - tmc->cpuevt.cpu = cpu; - - tmc->remote = false; - WRITE_ONCE(tmc->wakeup, KTIME_MAX); - } - raw_spin_lock_irq(&tmc->lock); - trace_tmigr_cpu_online(tmc); - tmc->idle = timer_base_is_idle(); - if (!tmc->idle) - __tmigr_cpu_activate(tmc); - tmc->online = true; - raw_spin_unlock_irq(&tmc->lock); - return 0; -} - -/* - * tmigr_trigger_active() - trigger a CPU to become active again - * - * This function is executed on a CPU which is part of cpu_online_mask, when the - * last active CPU in the hierarchy is offlining. With this, it is ensured that - * the other CPU is active and takes over the migrator duty. - */ -static long tmigr_trigger_active(void *unused) +static int tmigr_cpu_prepare(unsigned int cpu) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); + int ret = 0; - WARN_ON_ONCE(!tmc->online || tmc->idle); - - return 0; -} - -static int tmigr_cpu_offline(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int migrator; - u64 firstexp; + /* Not first online attempt? */ + if (tmc->tmgroup) + return ret; - raw_spin_lock_irq(&tmc->lock); - tmc->online = false; + raw_spin_lock_init(&tmc->lock); + timerqueue_init(&tmc->cpuevt.nextevt); + tmc->cpuevt.nextevt.expires = KTIME_MAX; + tmc->cpuevt.ignore = true; + tmc->cpuevt.cpu = cpu; + tmc->remote = false; WRITE_ONCE(tmc->wakeup, KTIME_MAX); - /* - * CPU has to handle the local events on his own, when on the way to - * offline; Therefore nextevt value is set to KTIME_MAX - */ - firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); - trace_tmigr_cpu_offline(tmc); - raw_spin_unlock_irq(&tmc->lock); + ret = tmigr_add_cpu(cpu); + if (ret < 0) + return ret; - if (firstexp != KTIME_MAX) { - migrator = cpumask_any_but(cpu_online_mask, cpu); - work_on_cpu(migrator, tmigr_trigger_active, NULL); - } + if (tmc->groupmask == 0) + return -EINVAL; - return 0; + return ret; } static int __init tmigr_init(void) @@ -1796,6 +1792,11 @@ static int __init tmigr_init(void) tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP, tmigr_crossnode_level); + ret = cpuhp_setup_state(CPUHP_TMIGR_PREPARE, "tmigr:prepare", + tmigr_cpu_prepare, NULL); + if (ret) + goto err; + ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online", tmigr_cpu_online, tmigr_cpu_offline); if (ret) @@ -1807,4 +1808,4 @@ err: pr_err("Timer migration setup failed\n"); return ret; } -late_initcall(tmigr_init); +early_initcall(tmigr_init); diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h index 6c37d94a37d9..154accc7a543 100644 --- a/kernel/time/timer_migration.h +++ b/kernel/time/timer_migration.h @@ -22,7 +22,17 @@ struct tmigr_event { * struct tmigr_group - timer migration hierarchy group * @lock: Lock protecting the event information and group hierarchy * information during setup - * @parent: Pointer to the parent group + * @parent: Pointer to the parent group. Pointer is updated when a + * new hierarchy level is added because of a CPU coming + * online the first time. Once it is set, the pointer will + * not be removed or updated. When accessing parent pointer + * lock less to decide whether to abort a propagation or + * not, it is not a problem. The worst outcome is an + * unnecessary/early CPU wake up. But do not access parent + * pointer several times in the same 'action' (like + * activation, deactivation, check for remote expiry,...) + * without holding the lock as it is not ensured that value + * will not change. * @groupevt: Next event of the group which is only used when the * group is !active. The group event is then queued into * the parent timer queue. @@ -41,9 +51,8 @@ struct tmigr_event { * @num_children: Counter of group children to make sure the group is only * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup * only - * @childmask: childmask of the group in the parent group; is set - * during setup and will never change; can be read - * lockless + * @groupmask: mask of the group in the parent group; is set during + * setup and will never change; can be read lockless * @list: List head that is added to the per level * tmigr_level_list; is required during setup when a * new group needs to be connected to the existing @@ -59,7 +68,7 @@ struct tmigr_group { unsigned int level; int numa_node; unsigned int num_children; - u8 childmask; + u8 groupmask; struct list_head list; }; @@ -79,7 +88,7 @@ struct tmigr_group { * hierarchy * @remote: Is set when timers of the CPU are expired remotely * @tmgroup: Pointer to the parent group - * @childmask: childmask of tmigr_cpu in the parent group + * @groupmask: mask of tmigr_cpu in the parent group * @wakeup: Stores the first timer when the timer migration * hierarchy is completely idle and remote expiry was done; * is returned to timer code in the idle path and is only @@ -92,7 +101,7 @@ struct tmigr_cpu { bool idle; bool remote; struct tmigr_group *tmgroup; - u8 childmask; + u8 groupmask; u64 wakeup; struct tmigr_event cpuevt; }; @@ -108,8 +117,8 @@ union tmigr_state { u32 state; /** * struct - split state of tmigr_group - * @active: Contains each childmask bit of the active children - * @migrator: Contains childmask of the child which is migrator + * @active: Contains each mask bit of the active children + * @migrator: Contains mask of the child which is migrator * @seq: Sequence counter needs to be increased when an update * to the tmigr_state is done. It prevents a race when * updates in the child groups are propagated in changed diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index fc205ad167a9..d1d5ea2d0a1b 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -902,7 +902,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, i = *idx ? : task->curr_ret_stack; while (i > 0) { - ret_stack = get_ret_stack(current, i, &i); + ret_stack = get_ret_stack(task, i, &i); if (!ret_stack) break; /* diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index cb0871fbdb07..314ffc143039 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - static void busy_wait(ulong time) { u64 start, end; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28853966aa9a..cebd879a30cb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -693,18 +693,6 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, } /** - * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer - * @buffer: The ring_buffer to get the number of pages from - * @cpu: The cpu of the ring_buffer to get the number of pages from - * - * Returns the number of pages used by a per_cpu buffer of the ring buffer. - */ -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) -{ - return buffer->buffers[cpu]->nr_pages; -} - -/** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from * @cpu: The cpu of the ring_buffer to get the number of pages from diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10cd38bce2f1..ebe7ce2f5f4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8783bebd0562..bd3e3069300e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1634,6 +1634,29 @@ static inline void *event_file_data(struct file *filp) extern struct mutex event_mutex; extern struct list_head ftrace_events; +/* + * When the trace_event_file is the filp->i_private pointer, + * it must be taken under the event_mutex lock, and then checked + * if the EVENT_FILE_FL_FREED flag is set. If it is, then the + * data pointed to by the trace_event_file can not be trusted. + * + * Use the event_file_file() to access the trace_event_file from + * the filp the first time under the event_mutex and check for + * NULL. If it is needed to be retrieved again and the event_mutex + * is still held, then the event_file_data() can be used and it + * is guaranteed to be valid. + */ +static inline struct trace_event_file *event_file_file(struct file *filp) +{ + struct trace_event_file *file; + + lockdep_assert_held(&event_mutex); + file = READ_ONCE(file_inode(filp)->i_private); + if (!file || file->flags & EVENT_FILE_FL_FREED) + return NULL; + return file; +} + extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6ef29eba90ce..7266ec2a4eea 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,18 +992,18 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) void event_file_get(struct trace_event_file *file) { - atomic_inc(&file->ref); + refcount_inc(&file->ref); } void event_file_put(struct trace_event_file *file) { - if (WARN_ON_ONCE(!atomic_read(&file->ref))) { + if (WARN_ON_ONCE(!refcount_read(&file->ref))) { if (file->flags & EVENT_FILE_FL_FREED) kmem_cache_free(file_cachep, file); return; } - if (atomic_dec_and_test(&file->ref)) { + if (refcount_dec_and_test(&file->ref)) { /* Count should only go to zero when it is freed */ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) return; @@ -1386,12 +1386,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, char buf[4] = "0"; mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); - if (!file || flags & EVENT_FILE_FL_FREED) + if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && @@ -1424,8 +1424,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 1: ret = -ENODEV; mutex_lock(&event_mutex); - file = event_file_data(filp); - if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) { + file = event_file_file(filp); + if (likely(file)) { ret = tracing_update_buffers(file->tr); if (ret < 0) { mutex_unlock(&event_mutex); @@ -1540,7 +1540,8 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -1572,7 +1573,8 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; @@ -1627,12 +1629,14 @@ static int f_show(struct seq_file *m, void *v) static void *f_start(struct seq_file *m, loff_t *pos) { + struct trace_event_file *file; void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - if (!event_file_data(m->private)) + file = event_file_file(m->private); + if (!file) return ERR_PTR(-ENODEV); while (l < *pos && p) @@ -1706,8 +1710,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file && !(file->flags & EVENT_FILE_FL_FREED)) + file = event_file_file(filp); + if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); @@ -1736,9 +1740,13 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return PTR_ERR(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file) - err = apply_event_filter(file, buf); + file = event_file_file(filp); + if (file) { + if (file->flags & EVENT_FILE_FL_FREED) + err = -ENODEV; + else + err = apply_event_filter(file, buf); + } mutex_unlock(&event_mutex); kfree(buf); @@ -2485,7 +2493,6 @@ static int event_callback(const char *name, umode_t *mode, void **data, if (strcmp(name, "format") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_event_format_fops; - *data = call; return 1; } @@ -2996,7 +3003,7 @@ trace_create_new_event(struct trace_event_call *call, atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); - event_file_get(file); + refcount_set(&file->ref, 1); return file; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6ece1308d36a..5f9119eb7c67 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5601,7 +5601,7 @@ static int hist_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; @@ -5880,7 +5880,7 @@ static int hist_debug_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 8650562bdaa9..a8f076809db4 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -299,7 +299,7 @@ event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, strim(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (file) { call = file->event_call; size = parse_entry(buf, call, &entry); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4bec043c8690..a5e3d6acf1e1 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -159,7 +159,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos) /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) return ERR_PTR(-ENODEV); @@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) mutex_lock(&event_mutex); - if (unlikely(!event_file_data(file))) { + if (unlikely(!event_file_file(file))) { mutex_unlock(&event_mutex); return -ENODEV; } @@ -293,7 +293,7 @@ static ssize_t event_trigger_regex_write(struct file *file, strim(buf); mutex_lock(&event_mutex); - event_file = event_file_data(file); + event_file = event_file_file(file); if (unlikely(!event_file)) { mutex_unlock(&event_mutex); kfree(buf); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index a4dcf0f24352..3a56e7c8aa4f 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map) struct tracing_map_elt *elt = NULL; int idx; - idx = atomic_inc_return(&map->next_elt); + idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); if (idx < map->max_elts) { elt = *(TRACING_MAP_ELT(map->elts, idx)); if (map->ops && map->ops->elt_init) @@ -699,7 +699,7 @@ void tracing_map_clear(struct tracing_map *map) { unsigned int i; - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); atomic64_set(&map->hits, 0); atomic64_set(&map->drops, 0); @@ -783,7 +783,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits, map->map_bits = map_bits; map->max_elts = (1 << map_bits); - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); map->map_size = (1 << (map_bits + 1)); map->ops = ops; diff --git a/lib/btree.c b/lib/btree.c index 49420cae3a83..bb81d3393ac5 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -43,7 +43,6 @@ #include <linux/slab.h> #include <linux/module.h> -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NODESIZE MAX(L1_CACHE_BYTES, 128) struct btree_geo { diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c index a105e6369efc..6b62a6bdd50e 100644 --- a/lib/cpumask_kunit.c +++ b/lib/cpumask_kunit.c @@ -152,4 +152,5 @@ static struct kunit_suite test_cpumask_suite = { }; kunit_test_suite(test_cpumask_suite); +MODULE_DESCRIPTION("KUnit tests for cpumask"); MODULE_LICENSE("GPL"); diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 3518e7394eca..ca736166f100 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -232,7 +232,8 @@ static int INIT get_next_block(struct bunzip_data *bd) RUNB) */ symCount = symTotal+2; for (j = 0; j < groupCount; j++) { - unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + unsigned char length[MAX_SYMBOLS]; + unsigned short temp[MAX_HUFCODE_BITS+1]; int minLen, maxLen, pp; /* Read Huffman code lengths for each symbol. They're stored in a way similar to mtf; record a starting diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 20a858031f12..9d34d35908da 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -37,7 +37,9 @@ #include <linux/decompress/mm.h> +#ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif static long long INIT read_int(unsigned char *ptr, int size) { diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index d3fb09e6eff1..402e160e7186 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -194,4 +194,5 @@ static int __init find_bit_test(void) } module_init(find_bit_test); +MODULE_DESCRIPTION("Test for find_*_bit functions"); MODULE_LICENSE("GPL"); diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8..fa692c86f069 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index f314a0c15a6d..2abc78367dd1 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); static void overflow_allocation_test(struct kunit *test) { - const char device_name[] = "overflow-test"; struct device *dev; int count = 0; @@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test) } while (0) /* Create dummy device for devm_kmalloc()-family tests. */ - dev = kunit_device_register(test, device_name); + dev = kunit_device_register(test, "overflow-test"); KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), "Cannot register test device\n"); diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6dfb8d46a4ff..65a75d58ed9e 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -1486,4 +1486,5 @@ static void __init selftest(void) KSTM_MODULE_LOADERS(test_bitmap); MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>"); +MODULE_DESCRIPTION("Test cases for bitmap API"); MODULE_LICENSE("GPL"); diff --git a/lib/usercopy.c b/lib/usercopy.c index 499a7a7d54db..7b17b83c8042 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -12,40 +12,18 @@ /* out-of-line parts */ -#ifndef INLINE_COPY_FROM_USER +#if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST) unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long res = n; - might_fault(); - if (!should_fail_usercopy() && likely(access_ok(from, n))) { - /* - * Ensure that bad access_ok() speculation will not - * lead to nasty side effects *after* the copy is - * finished: - */ - barrier_nospec(); - instrument_copy_from_user_before(to, from, n); - res = raw_copy_from_user(to, from, n); - instrument_copy_from_user_after(to, from, n, res); - } - if (unlikely(res)) - memset(to + (n - res), 0, res); - return res; + return _inline_copy_from_user(to, from, n); } EXPORT_SYMBOL(_copy_from_user); #endif -#ifndef INLINE_COPY_TO_USER +#if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST) unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { - might_fault(); - if (should_fail_usercopy()) - return n; - if (likely(access_ok(to, n))) { - instrument_copy_to_user(to, from, n); - n = raw_copy_to_user(to, from, n); - } - return n; + return _inline_copy_to_user(to, from, n); } EXPORT_SYMBOL(_copy_to_user); #endif diff --git a/lib/vsprintf.c b/lib/vsprintf.c index cdd4e2314bfc..2d71b1115916 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1080,7 +1080,7 @@ char *resource_string(char *buf, char *end, struct resource *res, #define FLAG_BUF_SIZE (2 * sizeof(res->flags)) #define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]") #define RAW_BUF_SIZE sizeof("[mem - flags 0x]") - char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + char sym[MAX(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; char *p = sym, *pend = sym + sizeof(sym); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f9696c94e211..67c86a5d64a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -89,9 +89,17 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, bool smaps = tva_flags & TVA_SMAPS; bool in_pf = tva_flags & TVA_IN_PF; bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS; + unsigned long supported_orders; + /* Check the intersection of requested and supported orders. */ - orders &= vma_is_anonymous(vma) ? - THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE; + if (vma_is_anonymous(vma)) + supported_orders = THP_ORDERS_ALL_ANON; + else if (vma_is_dax(vma)) + supported_orders = THP_ORDERS_ALL_FILE_DAX; + else + supported_orders = THP_ORDERS_ALL_FILE_DEFAULT; + + orders &= supported_orders; if (!orders) return 0; @@ -877,7 +885,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret, off_sub; - if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) return 0; if (off_end <= off_align || (off_end - off_align) < size) @@ -1677,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1720,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1745,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914..0c3f56b3578e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/list_lru.c b/mm/list_lru.c index a29d96929d7c..9b7ff06e9d32 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -85,6 +85,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) } #endif /* CONFIG_MEMCG */ +/* The caller must ensure the memcg lifetime. */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -109,14 +110,22 @@ EXPORT_SYMBOL_GPL(list_lru_add); bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_add(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_add(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_add_obj); +/* The caller must ensure the memcg lifetime. */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -139,11 +148,18 @@ EXPORT_SYMBOL_GPL(list_lru_del); bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_del(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_del(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_del_obj); diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e..417c96f2da28 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 960371788687..f29157288b7d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3386,11 +3386,28 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1) static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&memcg_idr_lock); + ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1, + GFP_NOWAIT); + spin_unlock(&memcg_idr_lock); + idr_preload_end(); + return ret; +} static void mem_cgroup_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { + spin_lock(&memcg_idr_lock); idr_remove(&mem_cgroup_idr, memcg->id.id); + spin_unlock(&memcg_idr_lock); + memcg->id.id = 0; } } @@ -3524,8 +3541,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) if (!memcg) return ERR_PTR(error); - memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, - 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL); + memcg->id.id = mem_cgroup_alloc_id(); if (memcg->id.id < 0) { error = memcg->id.id; goto fail; @@ -3667,7 +3683,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) * publish it here at the end of onlining. This matches the * regular ID destruction during offlining. */ + spin_lock(&memcg_idr_lock); idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + spin_unlock(&memcg_idr_lock); return 0; offline_kmem: diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c9117..7066fc84f351 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } diff --git a/mm/memory.c b/mm/memory.c index 1ff7b6f51ec1..3c01d68065be 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4780,7 +4780,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; - bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE); + bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); pte_t entry; flush_icache_pages(vma, page, nr); @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d5..923ea80ba744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b..51960079875b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); @@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); @@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0..15bba28acc00 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8337926b89d4..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -2343,16 +2343,20 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - int count = READ_ONCE(pcp->count); - - while (count) { - int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); - count -= to_drain; + int count; + do { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); + count = pcp->count; + if (count) { + int to_drain = min(count, + pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } spin_unlock(&pcp->lock); - } + } while (count); } /* @@ -3068,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3364,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3389,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3446,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -5751,7 +5750,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) @@ -5815,6 +5813,16 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char return pages; } +void free_reserved_page(struct page *page) +{ + clear_page_tag_ref(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + adjust_managed_page_count(page, 1); +} +EXPORT_SYMBOL(free_reserved_page); + static int page_alloc_cpu_dead(unsigned int cpu) { struct zone *zone; @@ -6930,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6958,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7017,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401..641d93f6af4c 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/shmem.c b/mm/shmem.c index 2faa9daaf54b..5a77acf6ac6a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1629,11 +1629,6 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma->vm_flags; - /* - * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that - * are enabled for this vma. - */ - unsigned long orders = BIT(PMD_ORDER + 1) - 1; loff_t i_size; int order; @@ -1678,7 +1673,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, if (global_huge) mask |= READ_ONCE(huge_shmem_orders_inherit); - return orders & mask; + return THP_ORDERS_ALL_FILE_DEFAULT & mask; } static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, @@ -1686,6 +1681,7 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault unsigned long orders) { struct vm_area_struct *vma = vmf->vma; + pgoff_t aligned_index; unsigned long pages; int order; @@ -1697,9 +1693,9 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault order = highest_order(orders); while (orders) { pages = 1UL << order; - index = round_down(index, pages); - if (!xa_find(&mapping->i_pages, &index, - index + pages - 1, XA_PRESENT)) + aligned_index = round_down(index, pages); + if (!xa_find(&mapping->i_pages, &aligned_index, + aligned_index + pages - 1, XA_PRESENT)) break; order = next_order(&orders, order); } diff --git a/mm/slub.c b/mm/slub.c index 3520acaf9afa..c9d8a2497fd6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4690,6 +4690,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.slab) continue; + if (kfence_free(df.freelist)) + continue; + do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b..edcc7a6b0f6f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0f018c6f9ec5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a1..af2de36549d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..e875f2a4915f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d6581ab7c07..2d3163e4da96 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -120,8 +120,6 @@ #define CLASS_BITS 8 #define MAGIC_VAL_BITS 8 -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a4ebd93adfc..06da8ac13dca 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -119,13 +119,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: - /* If discovery was not started then it was initiated by the - * MGMT interface so no MGMT event shall be generated either - */ - if (old_state != DISCOVERY_STARTING) { - hdev->discovery.state = old_state; - return; - } mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dce8035ca799..d0c118c47f6c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1721,9 +1721,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) switch (enable) { case LE_SCAN_ENABLE: hci_dev_set_flag(hdev, HCI_LE_SCAN); - if (hdev->le_scan_type == LE_SCAN_ACTIVE) + if (hdev->le_scan_type == LE_SCAN_ACTIVE) { clear_pending_adv_report(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + } break; case LE_SCAN_DISABLE: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index cd2ed16da8a4..e79cd40bd079 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2976,6 +2976,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) */ filter_policy = hci_update_accept_list_sync(hdev); + /* If suspended and filter_policy set to 0x00 (no acceptlist) then + * passive scanning cannot be started since that would require the host + * to be woken up to process the reports. + */ + if (hdev->suspended && !filter_policy) { + /* Check if accept list is empty then there is no need to scan + * while suspended. + */ + if (list_empty(&hdev->le_accept_list)) + return 0; + + /* If there are devices is the accept_list that means some + * devices could not be programmed which in non-suspended case + * means filter_policy needs to be set to 0x00 so the host needs + * to filter, but since this is treating suspended case we + * can ignore device needing host to filter to allow devices in + * the acceptlist to be able to wakeup the system. + */ + filter_policy = 0x01; + } + /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support @@ -2998,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; + + /* Disable duplicates filter when scanning for advertisement + * monitor for the following reasons. + * + * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm + * controllers ignore RSSI_Sampling_Period when the duplicates + * filter is enabled. + * + * For SW pattern filtering, when we're not doing interleaved + * scanning, it is necessary to disable duplicates filter, + * otherwise hosts can only receive one advertisement and it's + * impossible to know if a peer is still in range. + */ + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c3c26bbb5dda..9988ba382b68 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6774,6 +6774,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { + l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9a1cb5079a7a..b2ae0d2434d2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); - hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); - br_multicast_gc(&deleted_head); + flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 09f6a773a708..8f9c19d992ac 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv, if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; + if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + WARN_ON_ONCE(skb_shared(skb)); - WARN_ON_ONCE(refcount_read(&nfct->use) != 1); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. diff --git a/net/core/dev.c b/net/core/dev.c index 6ea1d20676fb..f66e61407883 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5150,6 +5150,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) bpf_net_ctx_clear(bpf_net_ctx); return XDP_DROP; } + bpf_net_ctx_clear(bpf_net_ctx); } return XDP_PASS; out_redir: @@ -9911,6 +9912,15 @@ static void netdev_sync_lower_features(struct net_device *upper, } } +static bool netdev_has_ip_or_hw_csum(netdev_features_t features) +{ + netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + bool ip_csum = (features & ip_csum_mask) == ip_csum_mask; + bool hw_csum = features & NETIF_F_HW_CSUM; + + return ip_csum || hw_csum; +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -9992,15 +10002,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_LRO; } - if (features & NETIF_F_HW_TLS_TX) { - bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - bool hw_csum = features & NETIF_F_HW_CSUM; - - if (!ip_csum && !hw_csum) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; - } + if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; } if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { @@ -10008,6 +10012,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_HW_TLS_RX; } + if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n"); + features &= ~NETIF_F_GSO_UDP_L4; + } + return features; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8ec35194bfcb..ab150641142a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_wq, &linkwatch_work, 0); + mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else - schedule_delayed_work(&linkwatch_work, delay); + queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 87e67194f240..73fd7f543fd0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3288,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(tgt_net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index ae4b4b28a601..655ff5224ffa 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -35,7 +35,10 @@ struct cmis_cdb_fw_mng_features_rpl { __be16 resv7; }; -#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 +enum cmis_cdb_fw_write_mechanism { + CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01, + CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, +}; static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, @@ -64,7 +67,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, } rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; - if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL || + rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) { ethnl_module_fw_flash_ntf_err(dev, ntf_params, "Write LPL is not supported", NULL); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 983fee76f5cf..e18823bf2330 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1331,13 +1331,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 user_indir_len = 0, indir_bytes = 0; struct ethtool_rxfh_param rxfh_dev = {}; struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; bool locked = false; /* dev->ethtool->rss_lock taken */ - u32 indir_bytes = 0; bool create = false; u8 *rss_config; int ret; @@ -1369,23 +1369,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key, function - * or input transformation. - */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.key_size && rxfh.key_size != dev_key_size)) + return -EINVAL; + + /* Must request at least one change: indir size, hash key, function + * or input transformation. + * There's no need for any of it in case of context creation. + */ + if (!create && (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER); if (!rss_config) return -ENOMEM; @@ -1400,6 +1402,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, */ if (rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + user_indir_len = indir_bytes; rxfh_dev.indir = (u32 *)rss_config; rxfh_dev.indir_size = dev_indir_size; ret = ethtool_copy_validate_indir(rxfh_dev.indir, @@ -1426,7 +1429,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.key_size = dev_key_size; rxfh_dev.key = rss_config + indir_bytes; if (copy_from_user(rxfh_dev.key, - useraddr + rss_cfg_offset + indir_bytes, + useraddr + rss_cfg_offset + user_indir_len, rxfh.key_size)) { ret = -EFAULT; goto out; @@ -1449,12 +1452,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; u32 ctx_id; /* driver uses new API, core allocates ID */ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + XA_LIMIT(1, limit - 1), + GFP_KERNEL_ACCOUNT); if (ret < 0) { kfree(ctx); goto out; @@ -1474,16 +1478,21 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.input_xfrm = rxfh.input_xfrm; if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) + if (create) { ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); - else if (rxfh_dev.rss_delete) + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && + !memchr_inv(ethtool_rxfh_context_key(ctx), + 0, ctx->key_size)); + } else if (rxfh_dev.rss_delete) { ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, extack); - else + } else { ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); + } } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } @@ -1522,6 +1531,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } + + /* Fetch the defaults for the old API, in the new API drivers + * should write defaults into ctx themselves. + */ + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + + rxfh_dev.key = rss_config + indir_bytes; + rxfh_dev.key_size = dev_key_size; + + ret = ops->get_rxfh(dev, &rxfh_dev); + if (WARN_ON(ret)) { + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + goto out; + } } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); @@ -1530,12 +1555,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh_dev.indir) { for (i = 0; i < dev_indir_size; i++) ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; - ctx->indir_configured = 1; + ctx->indir_configured = + rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, dev_key_size); - ctx->key_configured = 1; + ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) ctx->hfunc = rxfh_dev.hfunc; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 71679137eff2..5c4c4505ab9a 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base, const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; - len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ + nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); + struct rss_req_info *request = RSS_REQINFO(req_base); + + if (request->rss_context && + nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) + return -EMSGSIZE; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 4d42d0756fd7..a5db7c67d61b 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv4_table, - iptable_nat_table_init); + int ret; + /* net->gen->ptr[iptable_nat_net_id] must be allocated + * before calling iptable_nat_table_init(). + */ + ret = register_pernet_subsys(&iptable_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&iptable_nat_net_ops); - if (ret < 0) { - xt_unregister_template(&nf_nat_ipv4_table); - return ret; - } + ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + if (ret < 0) + unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { - unregister_pernet_subsys(&iptable_nat_net_ops); xt_unregister_template(&nf_nat_ipv4_table); + unregister_pernet_subsys(&iptable_nat_net_ops); } module_init(iptable_nat_init); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6c4664c681ca..40053a02bae1 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <net/raw.h> -#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) +#define TCPUDP_MIB_MAX MAX_T(u32, UDP_MIB_MAX, TCP_MIB_MAX) /* * Report socket allocation statistics [mea@utu.fi] diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 85531437890c..db6516092daf 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +static void tcp_ao_info_free_rcu(struct rcu_head *head) { - struct tcp_ao_info *ao; + struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu); struct tcp_ao_key *key; struct hlist_node *n; + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); + } + kfree(ao); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao) +{ + size_t total_ao_sk_mem = 0; + struct tcp_ao_key *key; + + hlist_for_each_entry(key, &ao->head, node) + total_ao_sk_mem += tcp_ao_sizeof_key(key); + atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + if (twsk) { ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); - tcp_twsk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL); } else { ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL); } if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; - hlist_for_each_entry_safe(key, n, &ao->head, node) { - hlist_del_rcu(&key->node); - if (!twsk) - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); - call_rcu(&key->rcu, tcp_ao_key_free_rcu); - } - - kfree_rcu(ao, rcu); - static_branch_slow_dec_deferred(&tcp_ao_needed); + if (!twsk) + tcp_ao_sk_omem_free(sk, ao); + call_rcu(&ao->rcu, tcp_ao_info_free_rcu); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 454362e359da..e37488d3453f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4b791e74529e..e4ad3311e148 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (thlen < sizeof(*th)) goto out; + if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) + goto out; + if (!pskb_may_pull(skb, thlen)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index aa2e0a28ca61..b254a5dadfcf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -278,6 +278,16 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (unlikely(skb_checksum_start(gso_skb) != + skb_transport_header(gso_skb))) + return ERR_PTR(-EINVAL); + + /* We don't know if egress device can segment and checksum the packet + * when IPv6 extension headers are present. Fall back to software GSO. + */ + if (gso_skb->ip_summed != CHECKSUM_PARTIAL) + features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 70a0b2ad6bd7..b8eec1b6cc2c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { + bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; @@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, break; #endif default: - if (ndisc_is_useropt(dev, nd_opt)) { - ndopts->nd_useropts_end = nd_opt; - if (!ndopts->nd_useropts) - ndopts->nd_useropts = nd_opt; - } else { - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the - * protocol. - */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); - } + unknown = true; + } + if (ndisc_is_useropt(dev, nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else if (unknown) { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } next_opt: opt_len -= l; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 52cf104e3478..e119d4f090cc 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv6_table, - ip6table_nat_table_init); + int ret; + /* net->gen->ptr[ip6table_nat_net_id] must be allocated + * before calling ip6t_nat_register_lookups(). + */ + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&ip6table_nat_net_ops); + ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); if (ret) - xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { - unregister_pernet_subsys(&ip6table_nat_net_ops); xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f0844c9315d..4120e67a8ce6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; + q = inet_frag_find(nf_frag->fqdir, &key); if (!q) return NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 6d1d9221649d..752327b10dde 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -27,7 +27,7 @@ #include <net/ipv6.h> #define MAX4(a, b, c, d) \ - max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) + MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d)) #define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ IPSTATS_MIB_MAX, ICMP_MIB_MAX) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c3b0b610b0aa..c00323fa9eb6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084..2e86f520f799 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -86,6 +86,11 @@ /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -1124,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock_nested(sk); + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); + if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1176,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); return ret; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 85cb71de370f..b02b84ce2130 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata) + if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -3053,6 +3053,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return -EOPNOTSUPP; + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) @@ -3115,7 +3118,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 72a9ba8bc5fd..edba4a31844f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1768,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } sdata = rcu_dereference(local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; @@ -3957,7 +3957,8 @@ begin: break; } tx.sdata = rcu_dereference(local->monitor_sdata); - if (tx.sdata) { + if (tx.sdata && + ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &tx.sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ced19ce7c51a..c7ad9bc5973a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -776,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, sdata = rcu_dereference_check(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx) || lockdep_is_held(&local->hw.wiphy->mtx)); - if (sdata && + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 3ae46b545d2c..2d3efb405437 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk) nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index c30405e76833..7884217f33eb 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2704afd0dfe4..66aa67f49d03 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -14,7 +14,9 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8e8dcfbc2993..ac2f1a54cc43 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; @@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 55406720c607..23bb89c94e90 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -426,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, &skc_local); } +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) +{ + struct mptcp_addr_info skc_local; + + mptcp_local_address((struct sock_common *)skc, &skc_local); + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_is_backup(msk, &skc_local); + + return mptcp_pm_nl_is_backup(msk, &skc_local); +} + int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ea9e5817b9e9..4cae2aa7be5c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -348,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { - if (mptcp_pm_is_kernel(msk)) + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, @@ -471,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con slow = lock_sock_fast(ssk); if (prio) { subflow->send_mp_prio = 1; - subflow->backup = backup; subflow->request_bkup = backup; } @@ -513,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *local; unsigned int add_addr_signal_max; unsigned int local_addr_max; struct pm_nl_pernet *pernet; @@ -556,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -568,16 +565,26 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - if (local) { - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); - mptcp_pm_nl_addr_send_ack(msk); - } - } + local = select_signal_address(pernet, msk); + if (!local) + goto subflow; + + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + return; + + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); + + if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = local; } +subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { @@ -585,9 +592,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) bool fullmesh; int i, nr; - local = select_local_address(pernet, msk); - if (!local) - break; + if (signal_and_subflow) { + local = signal_and_subflow; + signal_and_subflow = NULL; + } else { + local = select_local_address(pernet, msk); + if (!local) + break; + } fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); @@ -1102,6 +1114,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc return ret; } +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + rcu_read_unlock(); + + return backup; +} + #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1311,8 +1341,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + if (addr.addr.port && !address_use_port(&addr)) { + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } @@ -1401,6 +1431,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= ret; mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } @@ -1534,16 +1565,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { - if ((remove_anno_list_by_saddr(msk, &entry->addr) || - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && - alist.nr < MPTCP_RM_IDS_MAX) - alist.ids[alist.nr++] = entry->addr.id; + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } @@ -1556,17 +1596,18 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && - slist.nr < MPTCP_RM_IDS_MAX) + if (slist.nr < MPTCP_RM_IDS_MAX && + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = entry->addr.id; - if (remove_anno_list_by_saddr(msk, &entry->addr) && - alist.nr < MPTCP_RM_IDS_MAX) + if (alist.nr < MPTCP_RM_IDS_MAX && + remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f0a4590506c6..8eaa9fbe3e34 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -165,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, false)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + spin_unlock_bh(&msk->pm.lock); + + return backup; +} + int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a26c2c840fd9..0d536b183a6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -350,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -844,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + if (__mptcp_rmem(sk) > sk_rbuf) return; - } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); @@ -1422,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } mptcp_for_each_subflow(msk, subflow) { + bool backup = subflow->backup || subflow->request_bkup; + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); - nr_active += !subflow->backup; + nr_active += !backup; pace = subflow->avg_pacing_rate; if (unlikely(!pace)) { /* init pacing rate from socket */ @@ -1439,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { - send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + if (linger_time < send_info[backup].linger_time) { + send_info[backup].ssk = ssk; + send_info[backup].linger_time = linger_time; } } __mptcp_set_timeout(sk, tout); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b11a4e50d52b..60c6b073d65f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -448,6 +448,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; @@ -1108,6 +1109,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 39e2cbdf3801..a21c712350c3 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return NULL; } subflow_req->local_id = local_id; + subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req); return msk; } @@ -168,6 +169,9 @@ static int subflow_check_req(struct request_sock *req, return 0; } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); + + if (mp_opt.backup) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); } if (opt_mp_capable && listener->request_mptcp) { @@ -577,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); + if (subflow->backup) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); + if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), @@ -614,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk) return err; subflow_set_local_id(subflow, err); + subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk); + return 0; } @@ -1221,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - u32 incr; + struct tcp_sock *tp = tcp_sk(ssk); + u32 offset, incr, avail_len; - incr = limit >= skb->len ? skb->len + fin : limit; + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + if (WARN_ON_ONCE(offset > skb->len)) + goto out; - pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, - subflow->map_subflow_seq); + avail_len = skb->len - offset; + incr = limit >= avail_len ? avail_len + fin : limit; + + pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; + +out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) @@ -2005,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; + new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index ff1a4e36c2b5..e06bc36f49fe 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; + struct netlink_ext_ack extack = {}; struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 481ee78e77bc..0a2f79346958 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8020,6 +8020,19 @@ cont: return skb->len; } +static int nf_tables_dumpreset_obj(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_obj(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) +{ + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_obj_start(cb); +} + static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); @@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; - bool reset = false; u32 objtype; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_obj_start, - .dump = nf_tables_dump_obj, - .done = nf_tables_dump_obj_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) - return -EINVAL; + return ERR_PTR(-EINVAL); table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); - return PTR_ERR(obj); + return ERR_CAST(obj); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; + err = nf_tables_fill_obj_info(skb2, net, portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } - if (reset) { - const struct nftables_pernet *nft_net; - char *buf; + return skb2; +} - nft_net = nft_pernet(net); - buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); +static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + u32 portid = NETLINK_CB(skb).portid; + struct sk_buff *skb2; - audit_log_nfcfg(buf, - family, - 1, - AUDIT_NFT_OP_OBJ_RESET, - GFP_ATOMIC); - kfree(buf); + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_obj_start, + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, - info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj, reset); - if (err < 0) - goto err_fill_obj_info; + skb2 = nf_tables_getobj_single(portid, info, nla, false); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, info->net, portid); +} -err_fill_obj_info: - kfree_skb(skb2); - return err; +static int nf_tables_getobj_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dumpreset_obj_start, + .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getobj_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) @@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abf660c7baf..932b3ddb34f1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -427,8 +427,10 @@ replay_abort: nfnl_unlock(subsys_id); - if (nlh->nlmsg_flags & NLM_F_ACK) + if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); + } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -577,6 +579,7 @@ done: ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 55e28e1da66e..e0716da256bf 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; - const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + struct nf_conn *ct = (void *)skb_nfct(entry->skb); + unsigned long status; + unsigned int use; - if (ct && ((ct->status & flags) == IPS_DYING)) + if (!ct) + return false; + + status = READ_ONCE(ct->status); + if ((status & flags) == IPS_DYING) return true; + + if (status & IPS_CONFIRMED) + return false; + + /* in some cases skb_clone() can occur after initial conntrack + * pickup, but conntrack assumes exclusive skb->_nfct ownership for + * unconfirmed entries. + * + * This happens for br_netfilter and with ip multicast routing. + * We can't be solved with serialization here because one clone could + * have been queued for local delivery. + */ + use = refcount_read(&ct->ct_general.use); + if (likely(use == 1)) + return false; + + /* Can't decrement further? Exclusive ownership. */ + if (!refcount_dec_not_one(&ct->ct_general.use)) + return false; + + skb_set_nfct(entry->skb, 0); + /* No nf_ct_put(): we already decremented .use and it cannot + * drop down to 0. + */ + return true; #endif return false; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 113b907da0f7..3ba8e7e739b5 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -44,6 +44,8 @@ static DEFINE_MUTEX(zones_mutex); struct zones_ht_key { struct net *net; u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; }; struct tcf_ct_flow_table { @@ -60,7 +62,7 @@ struct tcf_ct_flow_table { static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), .key_offset = offsetof(struct tcf_ct_flow_table, key), - .key_len = sizeof_field(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9..a8a254a5008e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73a875573e7a..8e3093938cd2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3319,10 +3319,8 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); - if (rc) { - sk_common_release(sk); + if (rc) return rc; - } /* smc_clcsock_release() does not wait smc->clcsock->sk's * destruction; its sk_state might not be TCP_CLOSE after @@ -3368,6 +3366,9 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; else rc = smc_create_clcsk(net, sk, family); + + if (rc) + sk_common_release(sk); out: return rc; } diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 9d32058db2b5..e19177ce4092 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -19,7 +19,7 @@ #include "smc_clc.h" -#define SMC_MAX_FBACK_RSN_CNT 30 +#define SMC_MAX_FBACK_RSN_CNT 36 enum { SMC_BUF_8K, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e03f14024e47..88a59cfa5583 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -161,7 +161,7 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) str[len] = '\n'; str[len + 1] = '\0'; - return sysfs_emit(buf, str); + return sysfs_emit(buf, "%s", str); } module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4b040285aa78..0ff9b2dd86ba 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1270,25 +1270,28 @@ out: return err; } +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + return vsk->transport->dgram_dequeue(vsk, msg, len, flags); +} + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; const struct proto *prot; -#endif - struct vsock_sock *vsk; - struct sock *sk; - sk = sock->sk; - vsk = vsock_sk(sk); - -#ifdef CONFIG_BPF_SYSCALL prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif - return vsk->transport->dgram_dequeue(vsk, msg, len, flags); + return __vsock_dgram_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); @@ -2174,15 +2177,12 @@ out: } int -vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; -#ifdef CONFIG_BPF_SYSCALL - const struct proto *prot; -#endif int err; sk = sock->sk; @@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto out; } -#ifdef CONFIG_BPF_SYSCALL - prot = READ_ONCE(sk->sk_prot); - if (prot != &vsock_proto) { - release_sock(sk); - return prot->recvmsg(sk, msg, len, flags, NULL); - } -#endif - if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else @@ -2250,6 +2242,22 @@ out: release_sock(sk); return err; } + +int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ +#ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; + const struct proto *prot; + + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); +#endif + + return __vsock_connectible_recvmsg(sock, msg, len, flags); +} EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index a3c97546ab84..c42c5cc18f32 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int int err; if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) - err = vsock_connectible_recvmsg(sock, msg, len, flags); + err = __vsock_connectible_recvmsg(sock, msg, len, flags); else if (sk->sk_type == SOCK_DGRAM) - err = vsock_dgram_recvmsg(sock, msg, len, flags); + err = __vsock_dgram_recvmsg(sock, msg, len, flags); else err = -EPROTOTYPE; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d99319d82205..64eeed82d43d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3178,8 +3178,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - size_t min_hdr_len = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); + size_t min_hdr_len; struct ieee80211_ext *ext = NULL; enum cfg80211_bss_frame_type ftype; u16 beacon_interval; @@ -3202,10 +3201,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon); if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon.variable); + } else { + /* same for beacons */ + min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); } if (WARN_ON(len < min_hdr_len)) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index e419aa8c4a5a..d9d7bf8bb5c1 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1045,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev, cfg80211_hold_bss( bss_from_pub(params->links[link].bss)); ev->cr.links[link].bss = params->links[link].bss; + ev->cr.links[link].status = params->links[link].status; if (params->links[link].addr) { ev->cr.links[link].addr = next; diff --git a/rust/Makefile b/rust/Makefile index 83f675adbfab..8de3ebba9551 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -44,17 +44,10 @@ rustc_sysroot := $(shell MAKEFLAGS= $(RUSTC) $(rust_flags) --print sysroot) rustc_host_target := $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library -ifeq ($(quiet),silent_) -cargo_quiet=-q +ifneq ($(quiet),) rust_test_quiet=-q rustdoc_test_quiet=--test-args -q rustdoc_test_kernel_quiet=>/dev/null -else ifeq ($(quiet),quiet_) -rust_test_quiet=-q -rustdoc_test_quiet=--test-args -q -rustdoc_test_kernel_quiet=>/dev/null -else -cargo_quiet=--verbose endif core-cfgs = \ @@ -135,22 +128,21 @@ quiet_cmd_rustc_test_library = RUSTC TL $< @$(objtree)/include/generated/rustc_cfg $(rustc_target_flags) \ --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ --out-dir $(objtree)/$(obj)/test --cfg testlib \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< -rusttestlib-build_error: $(src)/build_error.rs rusttest-prepare FORCE +rusttestlib-build_error: $(src)/build_error.rs FORCE +$(call if_changed,rustc_test_library) rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes -rusttestlib-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttestlib-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-bindings: $(src)/bindings/lib.rs rusttest-prepare FORCE +rusttestlib-bindings: $(src)/bindings/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-uapi: $(src)/uapi/lib.rs rusttest-prepare FORCE +rusttestlib-uapi: $(src)/uapi/lib.rs FORCE +$(call if_changed,rustc_test_library) quiet_cmd_rustdoc_test = RUSTDOC T $< @@ -159,7 +151,7 @@ quiet_cmd_rustdoc_test = RUSTDOC T $< $(RUSTDOC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) $(rustdoc_test_target_flags) \ - --sysroot $(objtree)/$(obj)/test/sysroot $(rustdoc_test_quiet) \ + $(rustdoc_test_quiet) \ -L$(objtree)/$(obj)/test --output $(rustdoc_output) \ --crate-name $(subst rusttest-,,$@) $< @@ -192,7 +184,6 @@ quiet_cmd_rustc_test = RUSTC T $< $(RUSTC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) --out-dir $(objtree)/$(obj)/test \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$@) $<; \ $(objtree)/$(obj)/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ @@ -200,60 +191,15 @@ quiet_cmd_rustc_test = RUSTC T $< rusttest: rusttest-macros rusttest-kernel -# This prepares a custom sysroot with our custom `alloc` instead of -# the standard one. -# -# This requires several hacks: -# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, -# including third-party crates that need to be downloaded, plus custom -# `build.rs` steps. Thus hardcoding things here is not maintainable. -# - `cargo` knows how to build the standard library, but it is an unstable -# feature so far (`-Zbuild-std`). -# - `cargo` only considers the use case of building the standard library -# to use it in a given package. Thus we need to create a dummy package -# and pick the generated libraries from there. -# - The usual ways of modifying the dependency graph in `cargo` do not seem -# to apply for the `-Zbuild-std` steps, thus we have to mislead it -# by modifying the sources in the sysroot. -# - To avoid messing with the user's Rust installation, we create a clone -# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` -# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. -# -# In the future, we hope to avoid the whole ordeal by either: -# - Making the `test` crate not depend on `std` (either improving upstream -# or having our own custom crate). -# - Making the tests run in kernel space (requires the previous point). -# - Making `std` and friends be more like a "normal" crate, so that -# `-Zbuild-std` and related hacks are not needed. -quiet_cmd_rustsysroot = RUSTSYSROOT - cmd_rustsysroot = \ - rm -rf $(objtree)/$(obj)/test; \ - mkdir -p $(objtree)/$(obj)/test; \ - cp -a $(rustc_sysroot) $(objtree)/$(obj)/test/sysroot; \ - echo '\#!/bin/sh' > $(objtree)/$(obj)/test/rustc_sysroot; \ - echo "$(RUSTC) --sysroot=$(abspath $(objtree)/$(obj)/test/sysroot) \"\$$@\"" \ - >> $(objtree)/$(obj)/test/rustc_sysroot; \ - chmod u+x $(objtree)/$(obj)/test/rustc_sysroot; \ - $(CARGO) -q new $(objtree)/$(obj)/test/dummy; \ - RUSTC=$(objtree)/$(obj)/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ - test -Zbuild-std --target $(rustc_host_target) \ - --manifest-path $(objtree)/$(obj)/test/dummy/Cargo.toml; \ - rm $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ - cp $(objtree)/$(obj)/test/dummy/target/$(rustc_host_target)/debug/deps/* \ - $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib - -rusttest-prepare: FORCE - +$(call if_changed,rustsysroot) - rusttest-macros: private rustc_target_flags = --extern proc_macro rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro -rusttest-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttest-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test) +$(call if_changed,rustdoc_test) rusttest-kernel: private rustc_target_flags = --extern alloc \ --extern build_error --extern macros --extern bindings --extern uapi -rusttest-kernel: $(src)/kernel/lib.rs rusttest-prepare \ +rusttest-kernel: $(src)/kernel/lib.rs \ rusttestlib-build_error rusttestlib-macros rusttestlib-bindings \ rusttestlib-uapi FORCE +$(call if_changed,rustc_test) @@ -281,7 +227,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ - -fstrict-flex-arrays=% \ + -fstrict-flex-arrays=% -fmin-function-alignment=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -404,12 +350,12 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ - $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ + $(rustc_sysroot) $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ - __addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \ - __adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \ + __addsf3 __eqsf2 __extendsfdf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __truncdfsf2 __unordsf2 \ + __adddf3 __eqdf2 __ledf2 __ltdf2 __muldf3 __unorddf2 \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 @@ -421,7 +367,7 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) endif $(obj)/core.o: private skip_clippy = 1 -$(obj)/core.o: private skip_flags = -Dunreachable_pub +$(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) $(obj)/core.o: private rustc_target_flags = $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE @@ -435,7 +381,7 @@ $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_dep,rustc_library) $(obj)/alloc.o: private skip_clippy = 1 -$(obj)/alloc.o: private skip_flags = -Dunreachable_pub +$(obj)/alloc.o: private skip_flags = -Wunreachable_pub $(obj)/alloc.o: private rustc_target_flags = $(alloc-cfgs) $(obj)/alloc.o: $(RUST_LIB_SRC)/alloc/src/lib.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_dep,rustc_library) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 53c996e4bedf..b940a5777330 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -30,4 +30,5 @@ const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL; const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT; const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT; const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO; +const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM; const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 40ddaee50d8b..93a1a3fc97bc 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -24,6 +24,7 @@ unsafe_op_in_unsafe_fn )] +#[allow(dead_code)] mod bindings_raw { // Use glob import here to expose all helpers. // Symbols defined within the module will take precedence to the glob import. diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index bba2922c6ef7..f14b8d7caf89 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -40,16 +40,19 @@ macro_rules! define_panicking_intrinsics( define_panicking_intrinsics!("`f32` should not be used", { __addsf3, __eqsf2, + __extendsfdf2, __gesf2, __lesf2, __ltsf2, __mulsf3, __nesf2, + __truncdfsf2, __unordsf2, }); define_panicking_intrinsics!("`f64` should not be used", { __adddf3, + __eqdf2, __ledf2, __ltdf2, __muldf3, diff --git a/rust/helpers.c b/rust/helpers.c index 87ed0a5b6099..92d3c03ae1bd 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -26,6 +26,8 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/errname.h> +#include <linux/gfp.h> +#include <linux/highmem.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/sched/signal.h> @@ -40,6 +42,20 @@ __noreturn void rust_helper_BUG(void) } EXPORT_SYMBOL_GPL(rust_helper_BUG); +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, + unsigned long n) +{ + return copy_from_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_user); + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, + unsigned long n) +{ + return copy_to_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_user); + void rust_helper_mutex_lock(struct mutex *lock) { mutex_lock(lock); @@ -81,6 +97,24 @@ int rust_helper_signal_pending(struct task_struct *t) } EXPORT_SYMBOL_GPL(rust_helper_signal_pending); +struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages(gfp_mask, order); +} +EXPORT_SYMBOL_GPL(rust_helper_alloc_pages); + +void *rust_helper_kmap_local_page(struct page *page) +{ + return kmap_local_page(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kmap_local_page); + +void rust_helper_kunmap_local(const void *addr) +{ + kunmap_local(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); + refcount_t rust_helper_REFCOUNT_INIT(int n) { return (refcount_t)REFCOUNT_INIT(n); diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs index 531b5e471cb1..1966bd407017 100644 --- a/rust/kernel/alloc.rs +++ b/rust/kernel/alloc.rs @@ -20,6 +20,13 @@ pub struct AllocError; #[derive(Clone, Copy)] pub struct Flags(u32); +impl Flags { + /// Get the raw representation of this flag. + pub(crate) fn as_raw(self) -> u32 { + self.0 + } +} + impl core::ops::BitOr for Flags { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -52,6 +59,14 @@ pub mod flags { /// This is normally or'd with other flags. pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO); + /// Allow the allocation to be in high memory. + /// + /// Allocations in high memory may not be mapped into the kernel's address space, so this can't + /// be used with `kmalloc` and other similar methods. + /// + /// This is normally or'd with other flags. + pub const __GFP_HIGHMEM: Flags = Flags(bindings::__GFP_HIGHMEM); + /// Users can not sleep and need the allocation to succeed. /// /// A lower watermark is applied to allow access to "atomic reserves". The current @@ -66,7 +81,7 @@ pub mod flags { /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg. pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT); - /// Ror kernel allocations that should not stall for direct reclaim, start physical IO or + /// For kernel allocations that should not stall for direct reclaim, start physical IO or /// use any filesystem callback. It is very likely to fail to allocate memory, even for very /// small allocations. pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT); diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2ba03af9f036..dee5b4b18aec 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -2,7 +2,7 @@ //! Firmware abstraction //! -//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; use core::ptr::NonNull; diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 68605b633e73..495c09ebe3a3 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -843,11 +843,8 @@ where let val = unsafe { &mut *slot }; // SAFETY: `slot` is considered pinned. let val = unsafe { Pin::new_unchecked(val) }; - (self.1)(val).map_err(|e| { - // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + // SAFETY: `slot` was initialized above. + (self.1)(val).inspect_err(|_| unsafe { core::ptr::drop_in_place(slot) }) } } @@ -941,11 +938,9 @@ where // SAFETY: All requirements fulfilled since this function is `__init`. unsafe { self.0.__pinned_init(slot)? }; // SAFETY: The above call initialized `slot` and we still have unique access. - (self.1)(unsafe { &mut *slot }).map_err(|e| { + (self.1)(unsafe { &mut *slot }).inspect_err(|_| // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + unsafe { core::ptr::drop_in_place(slot) }) } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index e6b7d3a80bbc..274bdc1b0a82 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -40,6 +40,7 @@ pub mod ioctl; pub mod kunit; #[cfg(CONFIG_NET)] pub mod net; +pub mod page; pub mod prelude; pub mod print; mod static_assert; @@ -50,6 +51,7 @@ pub mod sync; pub mod task; pub mod time; pub mod types; +pub mod uaccess; pub mod workqueue; #[doc(hidden)] diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs new file mode 100644 index 000000000000..208a006d587c --- /dev/null +++ b/rust/kernel/page.rs @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel page allocation and management. + +use crate::{ + alloc::{AllocError, Flags}, + bindings, + error::code::*, + error::Result, + uaccess::UserSliceReader, +}; +use core::ptr::{self, NonNull}; + +/// A bitwise shift for the page size. +pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize; + +/// The number of bytes in a page. +pub const PAGE_SIZE: usize = bindings::PAGE_SIZE; + +/// A bitmask that gives the page containing a given address. +pub const PAGE_MASK: usize = !(PAGE_SIZE - 1); + +/// A pointer to a page that owns the page allocation. +/// +/// # Invariants +/// +/// The pointer is valid, and has ownership over the page. +pub struct Page { + page: NonNull<bindings::page>, +} + +// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across +// threads is safe. +unsafe impl Send for Page {} + +// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing +// them concurrently is safe. +unsafe impl Sync for Page {} + +impl Page { + /// Allocates a new page. + /// + /// # Examples + /// + /// Allocate memory for a page. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL)?; + /// # Ok(()) } + /// ``` + /// + /// Allocate memory for a page and zero its contents. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?; + /// # Ok(()) } + /// ``` + pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> { + // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it + // is always safe to call this method. + let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) }; + let page = NonNull::new(page).ok_or(AllocError)?; + // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly + // allocated page. We transfer that ownership to the new `Page` object. + Ok(Self { page }) + } + + /// Returns a raw pointer to the page. + pub fn as_ptr(&self) -> *mut bindings::page { + self.page.as_ptr() + } + + /// Runs a piece of code with this page mapped to an address. + /// + /// The page is unmapped when this call returns. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might + /// only be mapped on the current thread, and when that is the case, dereferencing it on other + /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't + /// cause data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T { + // SAFETY: `page` is valid due to the type invariants on `Page`. + let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) }; + + let res = f(mapped_addr.cast()); + + // This unmaps the page mapped above. + // + // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner + // where the pages are unmapped in reverse order. This is as required by `kunmap_local`. + // + // In other words, if this call to `kunmap_local` happens when a different page should be + // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the + // call just above in `with_page_mapped` that made that possible. In this case, it is the + // unsafe block that wraps that other call that is incorrect. + unsafe { bindings::kunmap_local(mapped_addr) }; + + res + } + + /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking. + /// + /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the + /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on + /// this task, as this method uses a local mapping. + /// + /// If `off` and `len` refers to a region outside of this page, then this method returns + /// [`EINVAL`] and does not call `f`. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `len` bytes and for the duration in which the closure is called. The pointer might only be + /// mapped on the current thread, and when that is the case, dereferencing it on other threads + /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause + /// data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_pointer_into_page<T>( + &self, + off: usize, + len: usize, + f: impl FnOnce(*mut u8) -> Result<T>, + ) -> Result<T> { + let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE; + + if bounds_ok { + self.with_page_mapped(move |page_addr| { + // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will + // result in a pointer that is in bounds or one off the end of the page. + f(unsafe { page_addr.add(off) }) + }) + } else { + Err(EINVAL) + } + } + + /// Maps the page and reads from it into the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `dst` is valid for writing `len` bytes. + /// * Callers must ensure that this call does not race with a write to the same page that + /// overlaps with this read. + pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |src| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then + // it has performed a bounds check and guarantees that `src` is + // valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and writes into it from the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `src` is valid for reading `len` bytes. + /// * Callers must ensure that this call does not race with a read or write to the same page + /// that overlaps with this write. + pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and zeroes the given slice. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::write_bytes(dst, 0u8, len) }; + Ok(()) + }) + } + + /// Copies data from userspace into this page. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address. + /// However, they are not allowed on the page you are copying into. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn copy_from_user_slice_raw( + &self, + reader: &mut UserSliceReader, + offset: usize, + len: usize, + ) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have + // exclusive access to the slice since the caller guarantees that there are no races. + reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) }) + }) + } +} + +impl Drop for Page { + fn drop(&mut self) { + // SAFETY: By the type invariants, we have ownership of the page and can free it. + unsafe { bindings::__free_pages(self.page.as_ptr(), 0) }; + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 2e7c9008621f..bd189d646adb 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -409,3 +409,67 @@ pub enum Either<L, R> { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// Types for which any bit pattern is valid. +/// +/// Not all types are valid for all values. For example, a `bool` must be either zero or one, so +/// reading arbitrary bytes into something that contains a `bool` is not okay. +/// +/// It's okay for the type to have padding, as initializing those bytes has no effect. +/// +/// # Safety +/// +/// All bit-patterns must be valid for this type. This type must not have interior mutability. +pub unsafe trait FromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl FromBytes for u8 {} +unsafe impl FromBytes for u16 {} +unsafe impl FromBytes for u32 {} +unsafe impl FromBytes for u64 {} +unsafe impl FromBytes for usize {} +unsafe impl FromBytes for i8 {} +unsafe impl FromBytes for i16 {} +unsafe impl FromBytes for i32 {} +unsafe impl FromBytes for i64 {} +unsafe impl FromBytes for isize {} +// SAFETY: If all bit patterns are acceptable for individual values in an array, then all bit +// patterns are also acceptable for arrays of that type. +unsafe impl<T: FromBytes> FromBytes for [T] {} +unsafe impl<T: FromBytes, const N: usize> FromBytes for [T; N] {} + +/// Types that can be viewed as an immutable slice of initialized bytes. +/// +/// If a struct implements this trait, then it is okay to copy it byte-for-byte to userspace. This +/// means that it should not have any padding, as padding bytes are uninitialized. Reading +/// uninitialized memory is not just undefined behavior, it may even lead to leaking sensitive +/// information on the stack to userspace. +/// +/// The struct should also not hold kernel pointers, as kernel pointer addresses are also considered +/// sensitive. However, leaking kernel pointers is not considered undefined behavior by Rust, so +/// this is a correctness requirement, but not a safety requirement. +/// +/// # Safety +/// +/// Values of this type may not contain any uninitialized bytes. This type must not have interior +/// mutability. +pub unsafe trait AsBytes {} + +// SAFETY: Instances of the following types have no uninitialized portions. +unsafe impl AsBytes for u8 {} +unsafe impl AsBytes for u16 {} +unsafe impl AsBytes for u32 {} +unsafe impl AsBytes for u64 {} +unsafe impl AsBytes for usize {} +unsafe impl AsBytes for i8 {} +unsafe impl AsBytes for i16 {} +unsafe impl AsBytes for i32 {} +unsafe impl AsBytes for i64 {} +unsafe impl AsBytes for isize {} +unsafe impl AsBytes for bool {} +unsafe impl AsBytes for char {} +unsafe impl AsBytes for str {} +// SAFETY: If individual values in an array have no uninitialized portions, then the array itself +// does not have any uninitialized portions either. +unsafe impl<T: AsBytes> AsBytes for [T] {} +unsafe impl<T: AsBytes, const N: usize> AsBytes for [T; N] {} diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs new file mode 100644 index 000000000000..e9347cff99ab --- /dev/null +++ b/rust/kernel/uaccess.rs @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Slices to user space memory regions. +//! +//! C header: [`include/linux/uaccess.h`](srctree/include/linux/uaccess.h) + +use crate::{ + alloc::Flags, + bindings, + error::Result, + prelude::*, + types::{AsBytes, FromBytes}, +}; +use alloc::vec::Vec; +use core::ffi::{c_ulong, c_void}; +use core::mem::{size_of, MaybeUninit}; + +/// The type used for userspace addresses. +pub type UserPtr = usize; + +/// A pointer to an area in userspace memory, which can be either read-only or read-write. +/// +/// All methods on this struct are safe: attempting to read or write on bad addresses (either out of +/// the bound of the slice or unmapped addresses) will return [`EFAULT`]. Concurrent access, +/// *including data races to/from userspace memory*, is permitted, because fundamentally another +/// userspace thread/process could always be modifying memory at the same time (in the same way that +/// userspace Rust's [`std::io`] permits data races with the contents of files on disk). In the +/// presence of a race, the exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data after completing a read, and not +/// expect that multiple reads of the same address will return the same value. +/// +/// These APIs are designed to make it difficult to accidentally write TOCTOU (time-of-check to +/// time-of-use) bugs. Every time a memory location is read, the reader's position is advanced by +/// the read length and the next read will start from there. This helps prevent accidentally reading +/// the same location twice and causing a TOCTOU bug. +/// +/// Creating a [`UserSliceReader`] and/or [`UserSliceWriter`] consumes the `UserSlice`, helping +/// ensure that there aren't multiple readers or writers to the same location. +/// +/// If double-fetching a memory location is necessary for some reason, then that is done by creating +/// multiple readers to the same memory location, e.g. using [`clone_reader`]. +/// +/// # Examples +/// +/// Takes a region of userspace memory from the current process, and modify it by adding one to +/// every byte in the region. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::Result; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result<()> { +/// let (read, mut write) = UserSlice::new(uptr, len).reader_writer(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// for b in &mut buf { +/// *b = b.wrapping_add(1); +/// } +/// +/// write.write_slice(&buf)?; +/// Ok(()) +/// } +/// ``` +/// +/// Example illustrating a TOCTOU (time-of-check to time-of-use) bug. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::{code::EINVAL, Result}; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// /// Returns whether the data in this region is valid. +/// fn is_valid(uptr: UserPtr, len: usize) -> Result<bool> { +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// todo!() +/// } +/// +/// /// Returns the bytes behind this user pointer if they are valid. +/// fn get_bytes_if_valid(uptr: UserPtr, len: usize) -> Result<Vec<u8>> { +/// if !is_valid(uptr, len)? { +/// return Err(EINVAL); +/// } +/// +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// // THIS IS A BUG! The bytes could have changed since we checked them. +/// // +/// // To avoid this kind of bug, don't call `UserSlice::new` multiple +/// // times with the same address. +/// Ok(buf) +/// } +/// ``` +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +/// [`clone_reader`]: UserSliceReader::clone_reader +pub struct UserSlice { + ptr: UserPtr, + length: usize, +} + +impl UserSlice { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// Constructing a [`UserSlice`] performs no checks on the provided address and length, it can + /// safely be constructed inside a kernel thread with no current userspace process. Reads and + /// writes wrap the kernel APIs `copy_from_user` and `copy_to_user`, which check the memory map + /// of the current process and enforce that the address range is within the user range (no + /// additional calls to `access_ok` are needed). Validity of the pointer is checked when you + /// attempt to read or write, not in the call to `UserSlice::new`. + /// + /// Callers must be careful to avoid time-of-check-time-of-use (TOCTOU) issues. The simplest way + /// is to create a single instance of [`UserSlice`] per user memory block as it reads each byte + /// at most once. + pub fn new(ptr: UserPtr, length: usize) -> Self { + UserSlice { ptr, length } + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(self, buf: &mut Vec<u8>, flags: Flags) -> Result { + self.reader().read_all(buf, flags) + } + + /// Constructs a [`UserSliceReader`]. + pub fn reader(self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs a [`UserSliceWriter`]. + pub fn writer(self) -> UserSliceWriter { + UserSliceWriter { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs both a [`UserSliceReader`] and a [`UserSliceWriter`]. + /// + /// Usually when this is used, you will first read the data, and then overwrite it afterwards. + pub fn reader_writer(self) -> (UserSliceReader, UserSliceWriter) { + ( + UserSliceReader { + ptr: self.ptr, + length: self.length, + }, + UserSliceWriter { + ptr: self.ptr, + length: self.length, + }, + ) + } +} + +/// A reader for [`UserSlice`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSliceReader { + ptr: UserPtr, + length: usize, +} + +impl UserSliceReader { + /// Skip the provided number of bytes. + /// + /// Returns an error if skipping more than the length of the buffer. + pub fn skip(&mut self, num_skip: usize) -> Result { + // Update `self.length` first since that's the fallible part of this operation. + self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?; + self.ptr = self.ptr.wrapping_add(num_skip); + Ok(()) + } + + /// Create a reader that can access the same range of data. + /// + /// Reading from the clone does not advance the current reader. + /// + /// The caller should take care to not introduce TOCTOU issues, as described in the + /// documentation for [`UserSlice`]. + pub fn clone_reader(&self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Returns the number of bytes left to be read from this reader. + /// + /// Note that even reading less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no data is available in the io buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// For a version that uses `&mut [u8]`, please see [`UserSliceReader::read_slice`]. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + /// + /// # Guarantees + /// + /// After a successful call to this method, all bytes in `out` are initialized. + pub fn read_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> Result { + let len = out.len(); + let out_ptr = out.as_mut_ptr().cast::<c_void>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `out_ptr` points into a mutable slice of length `len_ulong`, so we may write + // that many bytes to it. + let res = + unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + pub fn read_slice(&mut self, out: &mut [u8]) -> Result { + // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to + // `out`. + let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit<u8>]) }; + self.read_raw(out) + } + + /// Reads a value of the specified type. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. + pub fn read<T: FromBytes>(&mut self) -> Result<T> { + let len = size_of::<T>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); + // SAFETY: The local variable `out` is valid for writing `size_of::<T>()` bytes. + // + // By using the _copy_from_user variant, we skip the check_object_size check that verifies + // the kernel pointer. This mirrors the logic on the C side that skips the check when the + // length is a compile-time constant. + let res = unsafe { + bindings::_copy_from_user( + out.as_mut_ptr().cast::<c_void>(), + self.ptr as *const c_void, + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements + // `FromBytes`, any bit-pattern is a valid value for this type. + Ok(unsafe { out.assume_init() }) + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(mut self, buf: &mut Vec<u8>, flags: Flags) -> Result { + let len = self.length; + VecExt::<u8>::reserve(buf, len, flags)?; + + // The call to `try_reserve` was successful, so the spare capacity is at least `len` bytes + // long. + self.read_raw(&mut buf.spare_capacity_mut()[..len])?; + + // SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the + // vector have been initialized. + unsafe { buf.set_len(buf.len() + len) }; + Ok(()) + } +} + +/// A writer for [`UserSlice`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSliceWriter { + ptr: UserPtr, + length: usize, +} + +impl UserSliceWriter { + /// Returns the amount of space remaining in this buffer. + /// + /// Note that even writing less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no more data can be written to this buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Writes raw data to this user pointer from a kernel buffer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write_slice(&mut self, data: &[u8]) -> Result { + let len = data.len(); + let data_ptr = data.as_ptr().cast::<c_void>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `data_ptr` points into an immutable slice of length `len_ulong`, so we may read + // that many bytes from it. + let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Writes the provided Rust value to this userspace pointer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write<T: AsBytes>(&mut self, value: &T) -> Result { + let len = size_of::<T>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: The reference points to a value of type `T`, so it is valid for reading + // `size_of::<T>()` bytes. + // + // By using the _copy_to_user variant, we skip the check_object_size check that verifies the + // kernel pointer. This mirrors the logic on the C side that skips the check when the length + // is a compile-time constant. + let res = unsafe { + bindings::_copy_to_user( + self.ptr as *mut c_void, + (value as *const T).cast::<c_void>(), + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } +} diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 1cec63a2aea8..553a5cba2adc 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -482,24 +482,26 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> { /// use kernel::sync::Arc; /// use kernel::workqueue::{self, impl_has_work, Work}; /// -/// struct MyStruct { -/// work_field: Work<MyStruct, 17>, +/// struct MyStruct<'a, T, const N: usize> { +/// work_field: Work<MyStruct<'a, T, N>, 17>, +/// f: fn(&'a [T; N]), /// } /// /// impl_has_work! { -/// impl HasWork<MyStruct, 17> for MyStruct { self.work_field } +/// impl{'a, T, const N: usize} HasWork<MyStruct<'a, T, N>, 17> +/// for MyStruct<'a, T, N> { self.work_field } /// } /// ``` #[macro_export] macro_rules! impl_has_work { - ($(impl$(<$($implarg:ident),*>)? + ($(impl$({$($generics:tt)*})? HasWork<$work_type:ty $(, $id:tt)?> - for $self:ident $(<$($selfarg:ident),*>)? + for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right // type. - unsafe impl$(<$($implarg),*>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self $(<$($selfarg),*>)? { + unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] @@ -515,7 +517,7 @@ macro_rules! impl_has_work { pub use impl_has_work; impl_has_work! { - impl<T> HasWork<Self> for ClosureWork<T> { self.work } + impl{T} HasWork<Self> for ClosureWork<T> { self.work } } unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Arc<T> diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 520eae5fd792..5be0cb9db3ee 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -35,6 +35,7 @@ use proc_macro::TokenStream; /// author: "Rust for Linux Contributors", /// description: "My very own kernel module!", /// license: "GPL", +/// alias: ["alternate_module_name"], /// } /// /// struct MyModule; @@ -55,13 +56,45 @@ use proc_macro::TokenStream; /// } /// ``` /// +/// ## Firmware +/// +/// The following example shows how to declare a kernel module that needs +/// to load binary firmware files. You need to specify the file names of +/// the firmware in the `firmware` field. The information is embedded +/// in the `modinfo` section of the kernel module. For example, a tool to +/// build an initramfs uses this information to put the firmware files into +/// the initramfs image. +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyDeviceDriverModule, +/// name: "my_device_driver_module", +/// author: "Rust for Linux Contributors", +/// description: "My device driver requires firmware", +/// license: "GPL", +/// firmware: ["my_device_firmware1.bin", "my_device_firmware2.bin"], +/// } +/// +/// struct MyDeviceDriverModule; +/// +/// impl kernel::Module for MyDeviceDriverModule { +/// fn init() -> Result<Self> { +/// Ok(Self) +/// } +/// } +/// ``` +/// /// # Supported argument types /// - `type`: type which implements the [`Module`] trait (required). -/// - `name`: byte array of the name of the kernel module (required). -/// - `author`: byte array of the author of the kernel module. -/// - `description`: byte array of the description of the kernel module. -/// - `license`: byte array of the license of the kernel module (required). -/// - `alias`: byte array of alias name of the kernel module. +/// - `name`: ASCII string literal of the name of the kernel module (required). +/// - `author`: string literal of the author of the kernel module. +/// - `description`: string literal of the description of the kernel module. +/// - `license`: ASCII string literal of the license of the kernel module (required). +/// - `alias`: array of ASCII string literals of the alias names of the kernel module. +/// - `firmware`: array of ASCII string literals of the firmware files of +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) @@ -312,7 +345,7 @@ pub fn pinned_drop(args: TokenStream, input: TokenStream) -> TokenStream { /// /// Currently supported modifiers are: /// * `span`: change the span of concatenated identifier to the span of the specified token. By -/// default the span of the `[< >]` group is used. +/// default the span of the `[< >]` group is used. /// * `lower`: change the identifier to lower case. /// * `upper`: change the identifier to upper case. /// diff --git a/rust/macros/module.rs b/rust/macros/module.rs index acd0393b5095..411dc103d82e 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -97,14 +97,22 @@ struct ModuleInfo { author: Option<String>, description: Option<String>, alias: Option<Vec<String>>, + firmware: Option<Vec<String>>, } impl ModuleInfo { fn parse(it: &mut token_stream::IntoIter) -> Self { let mut info = ModuleInfo::default(); - const EXPECTED_KEYS: &[&str] = - &["type", "name", "author", "description", "license", "alias"]; + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "firmware", + ]; const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; let mut seen_keys = Vec::new(); @@ -131,6 +139,7 @@ impl ModuleInfo { "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), + "firmware" => info.firmware = Some(expect_string_array(it)), _ => panic!( "Unknown key \"{}\". Valid keys are: {:?}.", key, EXPECTED_KEYS @@ -186,6 +195,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { modinfo.emit("alias", &alias); } } + if let Some(firmware) = info.firmware { + for fw in firmware { + modinfo.emit("firmware", &fw); + } + } // Built-in modules also export the `file` modinfo string. let file = diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 0caad902ba40..80a00260e3e7 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + dead_code, missing_docs, non_camel_case_types, non_upper_case_globals, diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 746ff2d272f2..5694df3da2e9 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -19,10 +19,6 @@ #include "gcc-common.h" #include "randomize_layout_seed.h" -#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7) -#error "The RANDSTRUCT plugin requires GCC 4.7 or newer." -#endif - #define ORIG_TYPE_NAME(node) \ (TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous") diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh index 825c75c5b715..9459ca4f0f11 100755 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -5,4 +5,4 @@ # -mstack-protector-guard-reg, added by # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708 -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 75e4e22b986a..f680bb01aeeb 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,4 +1,4 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f270c7b0cf34..d2bc63cde8c6 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -145,6 +145,7 @@ def main(): parser.add_argument('--cfgs', action='append', default=[]) parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot", type=pathlib.Path) parser.add_argument("sysroot_src", type=pathlib.Path) parser.add_argument("exttree", type=pathlib.Path, nargs="?") args = parser.parse_args() @@ -154,9 +155,12 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) + # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. + assert args.sysroot in args.sysroot_src.parents + rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), - "sysroot_src": str(args.sysroot_src), + "sysroot": str(args.sysroot), } json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 87f34925eb7b..404edf7587e0 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -162,7 +162,7 @@ fn main() { "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } @@ -179,7 +179,7 @@ fn main() { "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0ed873491bf5..123dab0572f8 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,8 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] - * [--lto-clang] in.map > out.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -62,7 +61,6 @@ static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; -static int lto_clang; static int token_profit[0x10000]; @@ -73,8 +71,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " - "[--lto-clang] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); exit(1); } @@ -344,25 +341,6 @@ static bool symbol_absolute(const struct sym_entry *s) return s->percpu_absolute; } -static void cleanup_symbol_name(char *s) -{ - char *p; - - /* - * ASCII[.] = 2e - * ASCII[0-9] = 30,39 - * ASCII[A-Z] = 41,5a - * ASCII[_] = 5f - * ASCII[a-z] = 61,7a - * - * As above, replacing the first '.' in ".llvm." with '\0' does not - * affect the main sorting, but it helps us with subsorting. - */ - p = strstr(s, ".llvm."); - if (p) - *p = '\0'; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -526,10 +504,6 @@ static void write_src(void) output_address(relative_base); printf("\n"); - if (lto_clang) - for (i = 0; i < table_cnt; i++) - cleanup_symbol_name((char *)table[i]->sym); - sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) @@ -807,7 +781,6 @@ int main(int argc, char **argv) static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, {"absolute-percpu", no_argument, &absolute_percpu, 1}, - {"lto-clang", no_argument, <o_clang, 1}, {}, }; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba9..22d0bc843986 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -156,10 +156,6 @@ kallsyms() kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_LTO_CLANG; then - kallsymopt="${kallsymopt} --lto-clang" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 74355ff0e106..ac3e5ac01d8a 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -74,7 +74,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA echo "/lib/modules/%{KERNELRELEASE}" for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \ - devname softdep symbols symbols.bin; do + devname softdep symbols symbols.bin weakdep; do echo "%ghost /lib/modules/%{KERNELRELEASE}/modules.${x}" done diff --git a/scripts/package/mkspec b/scripts/package/mkspec index ead54d67a024..4dc1466dfc81 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -50,6 +50,6 @@ fi cat << EOF %changelog -* $(LC_ALL=C; date +'%a %b %d %Y') ${name} <${email}> +* $(LC_ALL=C date +'%a %b %d %Y') ${name} <${email}> - Custom built Linux kernel. EOF diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh index 117018946b57..5262c56dd674 100755 --- a/scripts/rust_is_available.sh +++ b/scripts/rust_is_available.sh @@ -117,20 +117,16 @@ if [ "$rust_compiler_cversion" -lt "$rust_compiler_min_cversion" ]; then echo >&2 "***" exit 1 fi -if [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then - echo >&2 "***" - echo >&2 "*** Rust compiler '$RUSTC' is too new. This may or may not work." - echo >&2 "*** Your version: $rust_compiler_version" - echo >&2 "*** Expected version: $rust_compiler_min_version" - echo >&2 "***" - warning=1 -fi # Check that the Rust bindings generator is suitable. # # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`. +# +# The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 +# (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when +# the minimum version is upgraded past that (0.69.1 already fixed the issue). rust_bindings_generator_output=$( \ - LC_ALL=C "$BINDGEN" --version 2>/dev/null + LC_ALL=C "$BINDGEN" --version workaround-for-0.69.0 2>/dev/null ) || rust_bindings_generator_code=$? if [ -n "$rust_bindings_generator_code" ]; then echo >&2 "***" @@ -165,13 +161,18 @@ if [ "$rust_bindings_generator_cversion" -lt "$rust_bindings_generator_min_cvers echo >&2 "***" exit 1 fi -if [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cversion" ]; then - echo >&2 "***" - echo >&2 "*** Rust bindings generator '$BINDGEN' is too new. This may or may not work." - echo >&2 "*** Your version: $rust_bindings_generator_version" - echo >&2 "*** Expected version: $rust_bindings_generator_min_version" - echo >&2 "***" - warning=1 +if [ "$rust_bindings_generator_cversion" -eq 6600 ] || + [ "$rust_bindings_generator_cversion" -eq 6601 ]; then + # Distributions may have patched the issue (e.g. Debian did). + if ! "$BINDGEN" $(dirname $0)/rust_is_available_bindgen_0_66.h >/dev/null; then + echo >&2 "***" + echo >&2 "*** Rust bindings generator '$BINDGEN' versions 0.66.0 and 0.66.1 may not" + echo >&2 "*** work due to a bug (https://github.com/rust-lang/rust-bindgen/pull/2567)," + echo >&2 "*** unless patched (like Debian's)." + echo >&2 "*** Your version: $rust_bindings_generator_version" + echo >&2 "***" + warning=1 + fi fi # Check that the `libclang` used by the Rust bindings generator is suitable. diff --git a/scripts/rust_is_available_bindgen_0_66.h b/scripts/rust_is_available_bindgen_0_66.h new file mode 100644 index 000000000000..c0431293421c --- /dev/null +++ b/scripts/rust_is_available_bindgen_0_66.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define A "\0" diff --git a/scripts/rust_is_available_test.py b/scripts/rust_is_available_test.py index 57613fe5ed75..413741037fb3 100755 --- a/scripts/rust_is_available_test.py +++ b/scripts/rust_is_available_test.py @@ -54,18 +54,34 @@ else: """) @classmethod - def generate_bindgen(cls, version_stdout, libclang_stderr): + def generate_bindgen(cls, version_stdout, libclang_stderr, version_0_66_patched=False): + if libclang_stderr is None: + libclang_case = f"raise SystemExit({cls.bindgen_default_bindgen_libclang_failure_exit_code})" + else: + libclang_case = f"print({repr(libclang_stderr)}, file=sys.stderr)" + + if version_0_66_patched: + version_0_66_case = "pass" + else: + version_0_66_case = "raise SystemExit(1)" + return cls.generate_executable(f"""#!/usr/bin/env python3 import sys if "rust_is_available_bindgen_libclang.h" in " ".join(sys.argv): - print({repr(libclang_stderr)}, file=sys.stderr) + {libclang_case} +elif "rust_is_available_bindgen_0_66.h" in " ".join(sys.argv): + {version_0_66_case} else: print({repr(version_stdout)}) """) @classmethod - def generate_bindgen_version(cls, stdout): - return cls.generate_bindgen(stdout, cls.bindgen_default_bindgen_libclang_stderr) + def generate_bindgen_version(cls, stdout, version_0_66_patched=False): + return cls.generate_bindgen(stdout, cls.bindgen_default_bindgen_libclang_stderr, version_0_66_patched) + + @classmethod + def generate_bindgen_libclang_failure(cls): + return cls.generate_bindgen(cls.bindgen_default_bindgen_version_stdout, None) @classmethod def generate_bindgen_libclang(cls, stderr): @@ -89,6 +105,7 @@ else: cls.rust_default_sysroot = subprocess.check_output(("rustc", "--print", "sysroot")).decode().strip() cls.bindgen_default_bindgen_version_stdout = f"bindgen {cls.bindgen_default_version}" + cls.bindgen_default_bindgen_libclang_failure_exit_code = 42 cls.bindgen_default_bindgen_libclang_stderr = f"scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {cls.llvm_default_version} [-W#pragma-messages], err: false" cls.default_rustc = cls.generate_rustc(f"rustc {cls.rustc_default_version}") @@ -193,11 +210,6 @@ else: result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc }) self.assertIn(f"Rust compiler '{rustc}' is too old.", result.stderr) - def test_rustc_new_version(self): - rustc = self.generate_rustc("rustc 1.999.0 (a8314ef7d 2099-06-27)") - result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "RUSTC": rustc }) - self.assertIn(f"Rust compiler '{rustc}' is too new. This may or may not work.", result.stderr) - def test_bindgen_nonexecutable(self): result = self.run_script(self.Expected.FAILURE, { "BINDGEN": self.nonexecutable }) self.assertIn(f"Running '{self.nonexecutable}' to check the Rust bindings generator version failed with", result.stderr) @@ -226,21 +238,24 @@ else: result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen }) self.assertIn(f"Rust bindings generator '{bindgen}' is too old.", result.stderr) - def test_bindgen_new_version(self): - bindgen = self.generate_bindgen_version("bindgen 0.999.0") - result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen }) - self.assertIn(f"Rust bindings generator '{bindgen}' is too new. This may or may not work.", result.stderr) + def test_bindgen_bad_version_0_66_0_and_0_66_1(self): + for version in ("0.66.0", "0.66.1"): + with self.subTest(version=version): + bindgen = self.generate_bindgen_version(f"bindgen {version}") + result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen }) + self.assertIn(f"Rust bindings generator '{bindgen}' versions 0.66.0 and 0.66.1 may not", result.stderr) + + def test_bindgen_bad_version_0_66_0_and_0_66_1_patched(self): + for version in ("0.66.0", "0.66.1"): + with self.subTest(version=version): + bindgen = self.generate_bindgen_version(f"bindgen {version}", True) + result = self.run_script(self.Expected.SUCCESS, { "BINDGEN": bindgen }) def test_bindgen_libclang_failure(self): - for env in ( - { "LLVM_CONFIG_PATH": self.missing }, - { "LIBCLANG_PATH": self.missing }, - { "CLANG_PATH": self.missing }, - ): - with self.subTest(env=env): - result = self.run_script(self.Expected.FAILURE, env | { "PATH": os.environ["PATH"], "BINDGEN": "bindgen" }) - self.assertIn("Running 'bindgen' to check the libclang version (used by the Rust", result.stderr) - self.assertIn("bindings generator) failed with code ", result.stderr) + bindgen = self.generate_bindgen_libclang_failure() + result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen }) + self.assertIn(f"Running '{bindgen}' to check the libclang version (used by the Rust", result.stderr) + self.assertIn(f"bindings generator) failed with code {self.bindgen_default_bindgen_libclang_failure_exit_code}. This may be caused by", result.stderr) def test_bindgen_libclang_unexpected_version(self): bindgen = self.generate_bindgen_libclang("scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version unexpected [-W#pragma-messages], err: false") diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index 591d85e8ca7e..845e24eb372e 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -53,6 +53,7 @@ 39 common umount2 sys_umount 40 common mount sys_mount 41 common pivot_root sys_pivot_root +42 common nfsservctl sys_ni_syscall 43 32 statfs64 sys_statfs64 compat_sys_statfs64 43 64 statfs sys_statfs 44 32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 @@ -98,9 +99,9 @@ 77 common tee sys_tee 78 common readlinkat sys_readlinkat 79 stat64 fstatat64 sys_fstatat64 -79 newstat fstatat sys_newfstatat +79 64 newfstatat sys_newfstatat 80 stat64 fstat64 sys_fstat64 -80 newstat fstat sys_newfstat +80 64 fstat sys_newfstat 81 common sync sys_sync 82 common fsync sys_fsync 83 common fdatasync sys_fdatasync @@ -402,4 +403,3 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index bcfea073e3f2..01b923d97a44 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1692,6 +1692,10 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) struct aa_profile *p; p = aa_deref_parent(profile); dent = prof_dir(p); + if (!dent) { + error = -ENOENT; + goto fail2; + } /* adding to parent that previously didn't have children */ dent = aafs_create_dir("profiles", dent); if (IS_ERR(dent)) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index c03eb7c19f16..d52a5b14dad4 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -144,19 +144,6 @@ int aa_audit_file(const struct cred *subj_cred, return aa_audit(type, profile, &ad, file_audit_cb); } -/** - * is_deleted - test if a file has been completely unlinked - * @dentry: dentry of file to test for deletion (NOT NULL) - * - * Returns: true if deleted else false - */ -static inline bool is_deleted(struct dentry *dentry) -{ - if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0) - return true; - return false; -} - static int path_name(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, char *buffer, diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 58fdc72af664..7265d2f81dd5 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -63,6 +63,26 @@ static inline struct aa_label *aa_get_newest_cred_label(const struct cred *cred) return aa_get_newest_label(aa_cred_raw_label(cred)); } +static inline struct aa_label *aa_get_newest_cred_label_condref(const struct cred *cred, + bool *needput) +{ + struct aa_label *l = aa_cred_raw_label(cred); + + if (unlikely(label_is_stale(l))) { + *needput = true; + return aa_get_newest_label(l); + } + + *needput = false; + return l; +} + +static inline void aa_put_label_condref(struct aa_label *l, bool needput) +{ + if (unlikely(needput)) + aa_put_label(l); +} + /** * aa_current_raw_label - find the current tasks confining label * diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 16568b6d589d..808060f9effb 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -461,6 +461,7 @@ static int apparmor_file_open(struct file *file) struct aa_file_ctx *fctx = file_ctx(file); struct aa_label *label; int error = 0; + bool needput; if (!path_mediated_fs(file->f_path.dentry)) return 0; @@ -477,7 +478,7 @@ static int apparmor_file_open(struct file *file) return 0; } - label = aa_get_newest_cred_label(file->f_cred); + label = aa_get_newest_cred_label_condref(file->f_cred, &needput); if (!unconfined(label)) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); @@ -494,7 +495,7 @@ static int apparmor_file_open(struct file *file) /* todo cache full allowed permissions set and state */ fctx->allow = aa_map_file_to_perms(file); } - aa_put_label(label); + aa_put_label_condref(label, needput); return error; } @@ -1124,7 +1125,7 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) * @sock: socket that is being setup * @family: family of socket being created * @type: type of the socket - * @ptotocol: protocol of the socket + * @protocol: protocol of the socket * @kern: socket is a special kernel socket * * Note: @@ -1304,6 +1305,13 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!skb->secmark) return 0; + /* + * If reach here before socket_post_create hook is called, in which + * case label is null, drop the packet. + */ + if (!ctx->label) + return -EACCES; + return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE, skb->secmark, sk); } diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 49fe8da6fea4..bf8863253e07 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -44,6 +44,8 @@ static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags) audit_log_format(ab, ", mand"); if (flags & MS_DIRSYNC) audit_log_format(ab, ", dirsync"); + if (flags & MS_NOSYMFOLLOW) + audit_log_format(ab, ", nosymfollow"); if (flags & MS_NOATIME) audit_log_format(ab, ", noatime"); if (flags & MS_NODIRATIME) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 957654d253dd..14df15e35695 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -225,7 +225,7 @@ static void aa_free_data(void *ptr, void *arg) { struct aa_data *data = ptr; - kfree_sensitive(data->data); + kvfree_sensitive(data->data, data->size); kfree_sensitive(data->key); kfree_sensitive(data); } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5e578ef0ddff..5a570235427d 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -747,34 +747,42 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, *info = "missing required dfa"; goto fail; } - goto out; + } else { + /* + * only unpack the following if a dfa is present + * + * sadly start was given different names for file and policydb + * but since it is optional we can try both + */ + if (!aa_unpack_u32(e, &pdb->start[0], "start")) + /* default start state */ + pdb->start[0] = DFA_START; + if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { + /* default start state for xmatch and file dfa */ + pdb->start[AA_CLASS_FILE] = DFA_START; + } /* setup class index */ + for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { + pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], + i); + } } /* - * only unpack the following if a dfa is present - * - * sadly start was given different names for file and policydb - * but since it is optional we can try both + * Unfortunately due to a bug in earlier userspaces, a + * transition table may be present even when the dfa is + * not. For compatibility reasons unpack and discard. */ - if (!aa_unpack_u32(e, &pdb->start[0], "start")) - /* default start state */ - pdb->start[0] = DFA_START; - if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { - /* default start state for xmatch and file dfa */ - pdb->start[AA_CLASS_FILE] = DFA_START; - } /* setup class index */ - for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { - pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], - i); - } if (!unpack_trans_table(e, &pdb->trans) && required_trans) { *info = "failed to unpack profile transition table"; goto fail; } + if (!pdb->dfa && pdb->trans.table) + aa_free_str_table(&pdb->trans); + /* TODO: move compat mapping here, requires dfa merging first */ /* TODO: move verify here, it has to be done after compat mappings */ -out: + *policy = pdb; return 0; @@ -1071,6 +1079,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (rhashtable_insert_fast(profile->data, &data->head, profile->data->p)) { + kvfree_sensitive(data->data, data->size); kfree_sensitive(data->key); kfree_sensitive(data); info = "failed to insert data to table"; diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index 5c9bde25e56d..874fcf97794e 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -604,4 +604,5 @@ static struct kunit_suite apparmor_policy_unpack_test_module = { kunit_test_suite(apparmor_policy_unpack_test_module); +MODULE_DESCRIPTION("KUnit tests for AppArmor's policy unpack"); MODULE_LICENSE("GPL"); diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b5f81a05be36..4edc5bbbcda3 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -186,20 +186,21 @@ out: return ret; } -static int decrypt_blob_key(u8 *key) +static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key) { - return do_dcp_crypto(key, key, false); + return do_dcp_crypto(encrypted_key, plain_key, false); } -static int encrypt_blob_key(u8 *key) +static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key) { - return do_dcp_crypto(key, key, true); + return do_dcp_crypto(plain_key, encrypted_key, true); } static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; blen = calc_blob_len(p->key_len); if (blen > MAX_BLOB_SIZE) @@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) b->fmt_version = DCP_BLOB_VERSION; get_random_bytes(b->nonce, AES_KEYSIZE_128); - get_random_bytes(b->blob_key, AES_KEYSIZE_128); + get_random_bytes(plain_blob_key, AES_KEYSIZE_128); - ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key, + ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key, b->nonce, true); if (ret) { pr_err("Unable to encrypt blob payload: %i\n", ret); - return ret; + goto out; } - ret = encrypt_blob_key(b->blob_key); + ret = encrypt_blob_key(plain_blob_key, b->blob_key); if (ret) { pr_err("Unable to encrypt blob key: %i\n", ret); - return ret; + goto out; } - b->payload_len = get_unaligned_le32(&p->key_len); + put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; - return 0; + ret = 0; + +out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + + return ret; } static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; if (b->fmt_version != DCP_BLOB_VERSION) { pr_err("DCP blob has bad version: %i, expected %i\n", @@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) goto out; } - ret = decrypt_blob_key(b->blob_key); + ret = decrypt_blob_key(b->blob_key, plain_blob_key); if (ret) { pr_err("Unable to decrypt blob key: %i\n", ret); goto out; } ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN, - b->blob_key, b->nonce, false); + plain_blob_key, b->nonce, false); if (ret) { pr_err("Unwrap of DCP payload failed: %i\n", ret); goto out; @@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) ret = 0; out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + return ret; } diff --git a/security/landlock/cred.c b/security/landlock/cred.c index 786af18c4a1c..db9fe7d906ba 100644 --- a/security/landlock/cred.c +++ b/security/landlock/cred.c @@ -14,8 +14,8 @@ #include "ruleset.h" #include "setup.h" -static int hook_cred_prepare(struct cred *const new, - const struct cred *const old, const gfp_t gfp) +static void hook_cred_transfer(struct cred *const new, + const struct cred *const old) { struct landlock_ruleset *const old_dom = landlock_cred(old)->domain; @@ -23,6 +23,12 @@ static int hook_cred_prepare(struct cred *const new, landlock_get_ruleset(old_dom); landlock_cred(new)->domain = old_dom; } +} + +static int hook_cred_prepare(struct cred *const new, + const struct cred *const old, const gfp_t gfp) +{ + hook_cred_transfer(new, old); return 0; } @@ -36,6 +42,7 @@ static void hook_cred_free(struct cred *const cred) static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), + LSM_HOOK_INIT(cred_transfer, hook_cred_transfer), LSM_HOOK_INIT(cred_free, hook_cred_free), }; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 32eb67fb3e42..b49c44869dc4 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struct avc_node *node, { struct avc_xperms_decision_node *dest_xpd; - node->ae.xp_node->xp.len++; dest_xpd = avc_xperms_decision_alloc(src->used); if (!dest_xpd) return -ENOMEM; avc_copy_xperms_decision(&dest_xpd->xpd, src); list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head); + node->ae.xp_node->xp.len++; return 0; } @@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, node->ae.avd.auditdeny &= ~perms; break; case AVC_CALLBACK_ADD_XPERMS: - avc_add_xperms_decision(node, xpd); + rc = avc_add_xperms_decision(node, xpd); + if (rc) { + avc_node_kill(node); + goto out_unlock; + } break; } avc_node_replace(node, orig); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd..bfa61e005aac 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3852,7 +3852,17 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { int rc = 0; - if (vma_is_initial_heap(vma)) { + /* + * We don't use the vma_is_initial_heap() helper as it has + * a history of problems and is currently broken on systems + * where there is no heap, e.g. brk == start_brk. Before + * replacing the conditional below with vma_is_initial_heap(), + * or something similar, please ensure that the logic is the + * same as what we have below or you have tested every possible + * corner case you can think to test. + */ + if (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && (vma_is_initial_stack(vma) || diff --git a/sound/core/seq/seq_ports.h b/sound/core/seq/seq_ports.h index b111382f697a..9e36738c0dd0 100644 --- a/sound/core/seq/seq_ports.h +++ b/sound/core/seq/seq_ports.h @@ -7,6 +7,7 @@ #define __SND_SEQ_PORTS_H #include <sound/seq_kernel.h> +#include <sound/ump_convert.h> #include "seq_lock.h" /* list of 'exported' ports */ @@ -42,17 +43,6 @@ struct snd_seq_port_subs_info { int (*close)(void *private_data, struct snd_seq_port_subscribe *info); }; -/* context for converting from legacy control event to UMP packet */ -struct snd_seq_ump_midi2_bank { - bool rpn_set; - bool nrpn_set; - bool bank_set; - unsigned char cc_rpn_msb, cc_rpn_lsb; - unsigned char cc_nrpn_msb, cc_nrpn_lsb; - unsigned char cc_data_msb, cc_data_lsb; - unsigned char cc_bank_msb, cc_bank_lsb; -}; - struct snd_seq_client_port { struct snd_seq_addr addr; /* client/port number */ @@ -88,7 +78,7 @@ struct snd_seq_client_port { unsigned char ump_group; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - struct snd_seq_ump_midi2_bank midi2_bank[16]; /* per channel */ + struct ump_cvt_to_ump_bank midi2_bank[16]; /* per channel */ #endif }; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index e90b27a135e6..4dd540cbb1cb 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -368,7 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, struct snd_seq_ump_event ev_cvt; const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump; - struct snd_seq_ump_midi2_bank *cc; + struct ump_cvt_to_ump_bank *cc; ev_cvt = *event; memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump)); @@ -789,28 +789,45 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, return 1; } +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) +{ + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ -static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data, - unsigned char channel) +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *data, + unsigned char channel, + bool flush) { + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; data->rpn.bank = cc->cc_rpn_msb; data->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { data->rpn.status = UMP_MSG_STATUS_NRPN; data->rpn.bank = cc->cc_nrpn_msb; data->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); data->rpn.channel = channel; - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert CC event to MIDI 2.0 UMP */ @@ -822,29 +839,39 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; unsigned char val = event->data.control.value & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; + int ret; /* process special CC's (bank/rpn/nrpn) */ switch (index) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_msb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_lsb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = val; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = val; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = val; - return 0; // skip + return fill_rpn(cc, data, channel, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = val; @@ -854,11 +881,9 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = val; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = val; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = status; @@ -887,7 +912,7 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char status) { unsigned char channel = event->data.control.channel & 0x0f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; data->pg.status = status; data->pg.channel = channel; @@ -924,8 +949,9 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, { unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; unsigned char msb, lsb; + int ret; msb = (event->data.control.value >> 7) & 0x7f; lsb = event->data.control.value & 0x7f; @@ -939,28 +965,27 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = lsb; return 0; // skip case UMP_CC_RPN_MSB: - cc->cc_rpn_msb = msb; - fallthrough; case UMP_CC_RPN_LSB: - cc->rpn_set = 1; + ret = fill_rpn(cc, data, channel, true); + cc->cc_rpn_msb = msb; cc->cc_rpn_lsb = lsb; - return 0; // skip + cc->rpn_set = 1; + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: - cc->cc_nrpn_msb = msb; - fallthrough; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); + cc->cc_nrpn_msb = msb; cc->nrpn_set = 1; cc->cc_nrpn_lsb = lsb; - return 0; // skip + return ret; case UMP_CC_DATA: - cc->cc_data_msb = msb; - fallthrough; case UMP_CC_DATA_LSB: + cc->cc_data_msb_set = cc->cc_data_lsb_set = 1; + cc->cc_data_msb = msb; cc->cc_data_lsb = lsb; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = UMP_MSG_STATUS_CC; @@ -1192,44 +1217,53 @@ static int cvt_sysex_to_ump(struct snd_seq_client *dest, { struct snd_seq_ump_event ev_cvt; unsigned char status; - u8 buf[6], *xbuf; + u8 buf[8], *xbuf; int offset = 0; int len, err; + bool finished = false; if (!snd_seq_ev_is_variable(event)) return 0; setup_ump_event(&ev_cvt, event); - for (;;) { + while (!finished) { len = snd_seq_expand_var_event_at(event, sizeof(buf), buf, offset); if (len <= 0) break; - if (WARN_ON(len > 6)) + if (WARN_ON(len > sizeof(buf))) break; - offset += len; + xbuf = buf; + status = UMP_SYSEX_STATUS_CONTINUE; + /* truncate the sysex start-marker */ if (*xbuf == UMP_MIDI1_MSG_SYSEX_START) { status = UMP_SYSEX_STATUS_START; - xbuf++; len--; - if (len > 0 && xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + offset++; + xbuf++; + } + + /* if the last of this packet or the 1st byte of the next packet + * is the end-marker, finish the transfer with this packet + */ + if (len > 0 && len < 8 && + xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + if (status == UMP_SYSEX_STATUS_START) status = UMP_SYSEX_STATUS_SINGLE; - len--; - } - } else { - if (xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + else status = UMP_SYSEX_STATUS_END; - len--; - } else { - status = UMP_SYSEX_STATUS_CONTINUE; - } + len--; + finished = true; } + + len = min(len, 6); fill_sysex7_ump(dest_port, ev_cvt.ump, status, xbuf, len); err = __snd_seq_deliver_single_event(dest, dest_port, (struct snd_seq_event *)&ev_cvt, atomic, hop); if (err < 0) return err; + offset += len; } return 0; } diff --git a/sound/core/timer.c b/sound/core/timer.c index d104adc75a8b..71a07c1662f5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ - if (start) { + if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } diff --git a/sound/core/ump.c b/sound/core/ump.c index 3f61220c23b4..0f0d7e895c5a 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -733,6 +733,12 @@ static void fill_fb_info(struct snd_ump_endpoint *ump, info->block_id, info->direction, info->active, info->first_group, info->num_groups, info->midi_ci_version, info->sysex8_streams, info->flags); + + if ((info->flags & SNDRV_UMP_BLOCK_IS_MIDI1) && info->num_groups != 1) { + info->num_groups = 1; + ump_dbg(ump, "FB %d: corrected groups to 1 for MIDI1\n", + info->block_id); + } } /* check whether the FB info gets updated by the current message */ @@ -806,6 +812,13 @@ static int ump_handle_fb_name_msg(struct snd_ump_endpoint *ump, if (!fb) return -ENODEV; + if (ump->parsed && + (ump->info.flags & SNDRV_UMP_EP_INFO_STATIC_BLOCKS)) { + ump_dbg(ump, "Skipping static FB name update (blk#%d)\n", + fb->info.block_id); + return 0; + } + ret = ump_append_string(ump, fb->info.name, sizeof(fb->info.name), buf->raw, 3); /* notify the FB name update to sequencer, too */ diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c index f67c44c83fde..0fe13d031656 100644 --- a/sound/core/ump_convert.c +++ b/sound/core/ump_convert.c @@ -287,25 +287,42 @@ static int cvt_legacy_system_to_ump(struct ump_cvt_to_ump *cvt, return 4; } -static void fill_rpn(struct ump_cvt_to_ump_bank *cc, - union snd_ump_midi2_msg *midi2) +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) { + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *midi2, + bool flush) +{ + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { midi2->rpn.status = UMP_MSG_STATUS_RPN; midi2->rpn.bank = cc->cc_rpn_msb; midi2->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { midi2->rpn.status = UMP_MSG_STATUS_NRPN; midi2->rpn.bank = cc->cc_nrpn_msb; midi2->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + midi2->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert to a MIDI 1.0 Channel Voice message */ @@ -318,6 +335,7 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, struct ump_cvt_to_ump_bank *cc; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)data; unsigned char status, channel; + int ret; BUILD_BUG_ON(sizeof(union snd_ump_midi1_msg) != 4); BUILD_BUG_ON(sizeof(union snd_ump_midi2_msg) != 8); @@ -358,24 +376,33 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, case UMP_MSG_STATUS_CC: switch (buf[1]) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_msb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_lsb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = buf[2]; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = buf[2]; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = buf[2]; - return 0; // skip + return fill_rpn(cc, midi2, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = buf[2]; @@ -385,12 +412,9 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, cc->cc_bank_lsb = buf[2]; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = buf[2]; - if (cc->rpn_set || cc->nrpn_set) - fill_rpn(cc, midi2); - else - return 0; // skip - break; + return fill_rpn(cc, midi2, false); default: midi2->cc.index = buf[1]; midi2->cc.data = upscale_7_to_32bit(buf[2]); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index d35d0a420ee0..c827d7d8d800 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -77,6 +77,8 @@ // overrun. Actual device can skip more, then this module stops the packet streaming. #define IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES 5 +static void pcm_period_work(struct work_struct *work); + /** * amdtp_stream_init - initialize an AMDTP stream structure * @s: the AMDTP stream to initialize @@ -105,6 +107,7 @@ int amdtp_stream_init(struct amdtp_stream *s, struct fw_unit *unit, s->flags = flags; s->context = ERR_PTR(-1); mutex_init(&s->mutex); + INIT_WORK(&s->period_work, pcm_period_work); s->packet_index = 0; init_waitqueue_head(&s->ready_wait); @@ -347,6 +350,7 @@ EXPORT_SYMBOL(amdtp_stream_get_max_payload); */ void amdtp_stream_pcm_prepare(struct amdtp_stream *s) { + cancel_work_sync(&s->period_work); s->pcm_buffer_pointer = 0; s->pcm_period_pointer = 0; } @@ -611,19 +615,21 @@ static void update_pcm_pointers(struct amdtp_stream *s, // The program in user process should periodically check the status of intermediate // buffer associated to PCM substream to process PCM frames in the buffer, instead // of receiving notification of period elapsed by poll wait. - if (!pcm->runtime->no_period_wakeup) { - if (in_softirq()) { - // In software IRQ context for 1394 OHCI. - snd_pcm_period_elapsed(pcm); - } else { - // In process context of ALSA PCM application under acquired lock of - // PCM substream. - snd_pcm_period_elapsed_under_stream_lock(pcm); - } - } + if (!pcm->runtime->no_period_wakeup) + queue_work(system_highpri_wq, &s->period_work); } } +static void pcm_period_work(struct work_struct *work) +{ + struct amdtp_stream *s = container_of(work, struct amdtp_stream, + period_work); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); + + if (pcm) + snd_pcm_period_elapsed(pcm); +} + static int queue_packet(struct amdtp_stream *s, struct fw_iso_packet *params, bool sched_irq) { @@ -1180,8 +1186,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_ (void)fw_card_read_cycle_time(fw_parent_device(s->unit)->card, &curr_cycle_time); for (i = 0; i < packets; ++i) { - DEFINE_FLEX(struct fw_iso_packet, template, header, - header_length, CIP_HEADER_QUADLETS); + DEFINE_RAW_FLEX(struct fw_iso_packet, template, header, CIP_HEADER_QUADLETS); bool sched_irq = false; build_it_pkt_header(s, desc->cycle, template, pkt_header_length, @@ -1850,11 +1855,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d, { struct amdtp_stream *irq_target = d->irq_target; - // Process isochronous packets queued till recent isochronous cycle to handle PCM frames. if (irq_target && amdtp_stream_running(irq_target)) { - // In software IRQ context, the call causes dead-lock to disable the tasklet - // synchronously. - if (!in_softirq()) + // use wq to prevent AB/BA deadlock competition for + // substream lock: + // fw_iso_context_flush_completions() acquires + // lock by ohci_flush_iso_completions(), + // amdtp-stream process_rx_packets() attempts to + // acquire same lock by snd_pcm_elapsed() + if (current_work() != &s->period_work) fw_iso_context_flush_completions(irq_target->context); } @@ -1910,6 +1918,7 @@ static void amdtp_stream_stop(struct amdtp_stream *s) return; } + cancel_work_sync(&s->period_work); fw_iso_context_stop(s->context); fw_iso_context_destroy(s->context); s->context = ERR_PTR(-1); diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index a1ed2e80f91a..775db3fc4959 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -191,6 +191,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; + struct work_struct period_work; snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; unsigned int pcm_frame_multiplier; diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 4b411ed8c3fe..d68bf7591d90 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -133,19 +133,8 @@ static const struct reg_sequence cs35l41_hda_mute[] = { { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_HPF_PCM_EN = 1, AMP_VOL_PCM Mute }; -static void cs35l41_add_controls(struct cs35l41_hda *cs35l41) -{ - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l41->amp_name; - info.fw_type = cs35l41->firmware_type; - info.card = cs35l41->codec->card; - - hda_cs_dsp_add_controls(&cs35l41->cs_dsp, &info); -} - static const struct cs_dsp_client_ops client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l41_request_tuning_param_file(struct cs35l41_hda *cs35l41, char *tuning_filename, @@ -603,8 +592,6 @@ static int cs35l41_init_dsp(struct cs35l41_hda *cs35l41) if (ret) goto err; - cs35l41_add_controls(cs35l41); - cs35l41_hda_apply_calibration(cs35l41); err: diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 96d3f13c5abf..a9dfd62637cf 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -413,7 +413,7 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) } static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, @@ -559,18 +559,6 @@ static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmw kfree(coeff_filename); } -static void cs35l56_hda_create_dsp_controls_work(struct work_struct *work) -{ - struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, control_work); - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l56->amp_name; - info.fw_type = HDA_CS_DSP_FW_MISC; - info.card = cs35l56->codec->card; - - hda_cs_dsp_add_controls(&cs35l56->cs_dsp, &info); -} - static void cs35l56_hda_apply_calibration(struct cs35l56_hda *cs35l56) { int ret; @@ -595,26 +583,15 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) char *wmfw_filename = NULL; unsigned int preloaded_fw_ver; bool firmware_missing; - bool add_dsp_controls_required = false; int ret; /* - * control_work must be flushed before proceeding, but we can't do that - * here as it would create a deadlock on controls_rwsem so it must be - * performed before queuing dsp_work. - */ - WARN_ON_ONCE(work_busy(&cs35l56->control_work)); - - /* * Prepare for a new DSP power-up. If the DSP has had firmware * downloaded previously then it needs to be powered down so that it - * can be updated and if hadn't been patched before then the controls - * will need to be added once firmware download succeeds. + * can be updated. */ if (cs35l56->base.fw_patched) cs_dsp_power_down(&cs35l56->cs_dsp); - else - add_dsp_controls_required = true; cs35l56->base.fw_patched = false; @@ -698,15 +675,6 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) CS35L56_FIRMWARE_MISSING); cs35l56->base.fw_patched = true; - /* - * Adding controls is deferred to prevent a lock inversion - ALSA takes - * the controls_rwsem when adding a control, the get() / put() - * functions of a control are called holding controls_rwsem and those - * that depend on running firmware wait for dsp_work() to complete. - */ - if (add_dsp_controls_required) - queue_work(system_long_wq, &cs35l56->control_work); - ret = cs_dsp_run(&cs35l56->cs_dsp); if (ret) dev_dbg(cs35l56->base.dev, "%s: cs_dsp_run ret %d\n", __func__, ret); @@ -753,7 +721,6 @@ static int cs35l56_hda_bind(struct device *dev, struct device *master, void *mas strscpy(comp->name, dev_name(dev), sizeof(comp->name)); comp->playback_hook = cs35l56_hda_playback_hook; - flush_work(&cs35l56->control_work); queue_work(system_long_wq, &cs35l56->dsp_work); cs35l56_hda_create_controls(cs35l56); @@ -775,7 +742,6 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * struct hda_component *comp; cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->control_work); cs35l56_hda_remove_controls(cs35l56); @@ -806,7 +772,6 @@ static int cs35l56_hda_system_suspend(struct device *dev) struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); cs35l56_hda_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->control_work); if (cs35l56->playing) cs35l56_hda_pause(cs35l56); @@ -1026,7 +991,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) dev_set_drvdata(cs35l56->base.dev, cs35l56); INIT_WORK(&cs35l56->dsp_work, cs35l56_hda_dsp_work); - INIT_WORK(&cs35l56->control_work, cs35l56_hda_create_dsp_controls_work); ret = cs35l56_hda_read_acpi(cs35l56, hid, id); if (ret) diff --git a/sound/pci/hda/cs35l56_hda.h b/sound/pci/hda/cs35l56_hda.h index c40d159507c2..38d94fb213a5 100644 --- a/sound/pci/hda/cs35l56_hda.h +++ b/sound/pci/hda/cs35l56_hda.h @@ -23,7 +23,6 @@ struct cs35l56_hda { struct cs35l56_base base; struct hda_codec *codec; struct work_struct dsp_work; - struct work_struct control_work; int index; const char *system_name; diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index c2d0109866e6..68c883f202ca 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_AMD_ALLOC_FIX (1 << 14) /* AMD allocation workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index f64d9dc197a3..9cff87dfbecb 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4955,6 +4955,69 @@ void snd_hda_gen_stream_pm(struct hda_codec *codec, hda_nid_t nid, bool on) } EXPORT_SYMBOL_GPL(snd_hda_gen_stream_pm); +/* forcibly mute the speaker output without caching; return true if updated */ +static bool force_mute_output_path(struct hda_codec *codec, hda_nid_t nid) +{ + if (!nid) + return false; + if (!nid_has_mute(codec, nid, HDA_OUTPUT)) + return false; /* no mute, skip */ + if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) & + snd_hda_codec_amp_read(codec, nid, 1, HDA_OUTPUT, 0) & + HDA_AMP_MUTE) + return false; /* both channels already muted, skip */ + + /* direct amp update without caching */ + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, + AC_AMP_SET_OUTPUT | AC_AMP_SET_LEFT | + AC_AMP_SET_RIGHT | HDA_AMP_MUTE); + return true; +} + +/** + * snd_hda_gen_shutup_speakers - Forcibly mute the speaker outputs + * @codec: the HDA codec + * + * Forcibly mute the speaker outputs, to be called at suspend or shutdown. + * + * The mute state done by this function isn't cached, hence the original state + * will be restored at resume. + * + * Return true if the mute state has been changed. + */ +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + const int *paths; + const struct nid_path *path; + int i, p, num_paths; + bool updated = false; + + /* if already powered off, do nothing */ + if (!snd_hdac_is_power_on(&codec->core)) + return false; + + if (spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) { + paths = spec->out_paths; + num_paths = spec->autocfg.line_outs; + } else { + paths = spec->speaker_paths; + num_paths = spec->autocfg.speaker_outs; + } + + for (i = 0; i < num_paths; i++) { + path = snd_hda_get_path_from_idx(codec, paths[i]); + if (!path) + continue; + for (p = 0; p < path->depth; p++) + if (force_mute_output_path(codec, path->path[p])) + updated = true; + } + + return updated; +} +EXPORT_SYMBOL_GPL(snd_hda_gen_shutup_speakers); + /** * snd_hda_gen_parse_auto_config - Parse the given BIOS configuration and * set up the hda_gen_spec diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 8f5ecf740c49..08544601b4ce 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -353,5 +353,6 @@ int snd_hda_gen_add_mute_led_cdev(struct hda_codec *codec, int snd_hda_gen_add_micmute_led_cdev(struct hda_codec *codec, int (*callback)(struct led_classdev *, enum led_brightness)); +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec); #endif /* __SOUND_HDA_GENERIC_H */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b33602e64d17..97d33a48ff17 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86 /* for snoop control */ +#include <linux/dma-map-ops.h> #include <asm/set_memory.h> #include <asm/cpufeature.h> #endif @@ -306,7 +307,7 @@ enum { /* quirks for ATI HDMI with snoop off */ #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ - (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) + (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_AMD_ALLOC_FIX) /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ @@ -1702,6 +1703,13 @@ static void azx_check_snoop_available(struct azx *chip) if (chip->driver_caps & AZX_DCAPS_SNOOP_OFF) snoop = false; +#ifdef CONFIG_X86 + /* check the presence of DMA ops (i.e. IOMMU), disable snoop conditionally */ + if ((chip->driver_caps & AZX_DCAPS_AMD_ALLOC_FIX) && + !get_dma_ops(chip->card->dev)) + snoop = false; +#endif + chip->snoop = snoop; if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 17389a3801bd..f030669243f9 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,12 +21,6 @@ #include "hda_jack.h" #include "hda_generic.h" -enum { - CX_HEADSET_NOPRESENT = 0, - CX_HEADSET_PARTPRESENT, - CX_HEADSET_ALLPRESENT, -}; - struct conexant_spec { struct hda_gen_spec gen; @@ -48,7 +42,6 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - unsigned int headset_present_flag; bool is_cx8070_sn6140; }; @@ -212,6 +205,8 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; + snd_hda_gen_shutup_speakers(codec); + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); @@ -250,48 +245,19 @@ static void cx_process_headset_plugin(struct hda_codec *codec) } } -static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +static void cx_update_headset_mic_vref(struct hda_codec *codec, struct hda_jack_callback *event) { - unsigned int phone_present, mic_persent, phone_tag, mic_tag; - struct conexant_spec *spec = codec->spec; + unsigned int mic_present; /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, * the node 19 can only be config to microphone or disabled. * Check hp&mic tag to process headset pulgin&plugout. */ - phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || - (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { - phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); - if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ - spec->headset_present_flag = CX_HEADSET_NOPRESENT; - snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); - return; - } - if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { - spec->headset_present_flag = CX_HEADSET_PARTPRESENT; - } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { - mic_persent = snd_hda_codec_read(codec, 0x19, 0, - AC_VERB_GET_PIN_SENSE, 0x0); - /* headset is present */ - if ((phone_present & AC_PINSENSE_PRESENCE) && - (mic_persent & AC_PINSENSE_PRESENCE)) { - cx_process_headset_plugin(codec); - spec->headset_present_flag = CX_HEADSET_ALLPRESENT; - } - } - } -} - -static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) -{ - struct conexant_spec *spec = codec->spec; - - if (spec->is_cx8070_sn6140) - cx_update_headset_mic_vref(codec, res); - - snd_hda_jack_unsol_event(codec, res); + mic_present = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(mic_present & AC_PINSENSE_PRESENCE)) /* mic plugout */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + else + cx_process_headset_plugin(codec); } static int cx_auto_suspend(struct hda_codec *codec) @@ -305,7 +271,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = cx_jack_unsol_event, + .unsol_event = snd_hda_jack_unsol_event, .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, }; @@ -1163,7 +1129,7 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f11f86: case 0x14f11f87: spec->is_cx8070_sn6140 = true; - spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); break; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 707d203ba652..78042ac2b71f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1989,6 +1989,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1), + SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c3a86a99f8c6..d022a25635f9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11,15 +11,18 @@ */ #include <linux/acpi.h> +#include <linux/cleanup.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/pci.h> #include <linux/dmi.h> #include <linux/module.h> +#include <linux/i2c.h> #include <linux/input.h> #include <linux/leds.h> #include <linux/ctype.h> +#include <linux/spi/spi.h> #include <sound/core.h> #include <sound/jack.h> #include <sound/hda_codec.h> @@ -583,7 +586,6 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: - case 0x10ec0257: case 0x19e58326: case 0x10ec0283: case 0x10ec0285: @@ -4800,6 +4802,8 @@ static void alc298_fixup_samsung_amp(struct hda_codec *codec, } } +#include "samsung_helper.c" + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -6854,6 +6858,86 @@ static void comp_generic_fixup(struct hda_codec *cdc, int action, const char *bu } } +static void cs35lxx_autodet_fixup(struct hda_codec *cdc, + const struct hda_fixup *fix, + int action) +{ + struct device *dev = hda_codec_dev(cdc); + struct acpi_device *adev; + struct fwnode_handle *fwnode __free(fwnode_handle) = NULL; + const char *bus = NULL; + static const struct { + const char *hid; + const char *name; + } acpi_ids[] = {{ "CSC3554", "cs35l54-hda" }, + { "CSC3556", "cs35l56-hda" }, + { "CSC3557", "cs35l57-hda" }}; + char *match; + int i, count = 0, count_devindex = 0; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + for (i = 0; i < ARRAY_SIZE(acpi_ids); ++i) { + adev = acpi_dev_get_first_match_dev(acpi_ids[i].hid, NULL, -1); + if (adev) + break; + } + if (!adev) { + dev_err(dev, "Failed to find ACPI entry for a Cirrus Amp\n"); + return; + } + + count = i2c_acpi_client_count(adev); + if (count > 0) { + bus = "i2c"; + } else { + count = acpi_spi_count_resources(adev); + if (count > 0) + bus = "spi"; + } + + fwnode = fwnode_handle_get(acpi_fwnode_handle(adev)); + acpi_dev_put(adev); + + if (!bus) { + dev_err(dev, "Did not find any buses for %s\n", acpi_ids[i].hid); + return; + } + + if (!fwnode) { + dev_err(dev, "Could not get fwnode for %s\n", acpi_ids[i].hid); + return; + } + + /* + * When available the cirrus,dev-index property is an accurate + * count of the amps in a system and is used in preference to + * the count of bus devices that can contain additional address + * alias entries. + */ + count_devindex = fwnode_property_count_u32(fwnode, "cirrus,dev-index"); + if (count_devindex > 0) + count = count_devindex; + + match = devm_kasprintf(dev, GFP_KERNEL, "-%%s:00-%s.%%d", acpi_ids[i].name); + if (!match) + return; + dev_info(dev, "Found %d %s on %s (%s)\n", count, acpi_ids[i].hid, bus, match); + comp_generic_fixup(cdc, action, bus, acpi_ids[i].hid, match, count); + + break; + case HDA_FIXUP_ACT_FREE: + /* + * Pass the action on to comp_generic_fixup() so that + * hda_component_manager functions can be called in just once + * place. In this context the bus, hid, match_str or count + * values do not need to be calculated. + */ + comp_generic_fixup(cdc, action, NULL, NULL, NULL, 0); + break; + } +} + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "i2c", "CSC3551", "-%s:00-cs35l41-hda.%d", 2); @@ -7429,6 +7513,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, ALC298_FIXUP_SAMSUNG_AMP, + ALC298_FIXUP_SAMSUNG_AMP2, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -7525,6 +7610,7 @@ enum { ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, ALC287_FIXUP_LENOVO_SSID_17AA3820, + ALCXXX_FIXUP_CS35LXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9055,6 +9141,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET }, + [ALC298_FIXUP_SAMSUNG_AMP2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_samsung_amp2 + }, [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9850,6 +9940,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_ssid_17aa3820, }, + [ALCXXX_FIXUP_CS35LXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35lxx_autodet_fixup, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9865,6 +9959,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), @@ -10263,6 +10358,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d08, "HP EliteBook 1045 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite x360 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8c, "HP EliteBook 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8d, "HP Elite x360 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8e, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8f, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 860 16 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALCXXX_FIXUP_CS35LXX), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -10359,10 +10465,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), - SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), @@ -10406,6 +10512,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG-KB1US)", ALC298_FIXUP_SAMSUNG_AMP2), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), @@ -10669,6 +10776,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. @@ -10843,6 +10951,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_AMP, .name = "alc298-samsung-amp"}, + {.id = ALC298_FIXUP_SAMSUNG_AMP2, .name = "alc298-samsung-amp2"}, {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, diff --git a/sound/pci/hda/samsung_helper.c b/sound/pci/hda/samsung_helper.c new file mode 100644 index 000000000000..a40175b69015 --- /dev/null +++ b/sound/pci/hda/samsung_helper.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Helper functions for Samsung Galaxy Book3 audio initialization */ + +struct alc298_samsung_coeff_fixup_desc { + unsigned char coeff_idx; + unsigned short coeff_value; +}; + +struct alc298_samsung_coeff_seq_desc { + unsigned short coeff_0x23; + unsigned short coeff_0x24; + unsigned short coeff_0x25; + unsigned short coeff_0x26; +}; + + +static inline void alc298_samsung_write_coef_pack2(struct hda_codec *codec, + const struct alc298_samsung_coeff_seq_desc *seq) +{ + int i; + + for (i = 0; i < 100; i++) { + if ((alc_read_coef_idx(codec, 0x26) & 0x0010) == 0) + break; + + usleep_range(500, 1000); + } + + alc_write_coef_idx(codec, 0x23, seq->coeff_0x23); + alc_write_coef_idx(codec, 0x24, seq->coeff_0x24); + alc_write_coef_idx(codec, 0x25, seq->coeff_0x25); + alc_write_coef_idx(codec, 0x26, seq->coeff_0x26); +} + +static inline void alc298_samsung_write_coef_pack_seq( + struct hda_codec *codec, + unsigned char target, + const struct alc298_samsung_coeff_seq_desc seq[], + int count) +{ + alc_write_coef_idx(codec, 0x22, target); + for (int i = 0; i < count; i++) + alc298_samsung_write_coef_pack2(codec, &seq[i]); +} + +static void alc298_fixup_samsung_amp2(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + int i; + static const struct alc298_samsung_coeff_fixup_desc fixups1[] = { + { 0x99, 0x8000 }, { 0x82, 0x4408 }, { 0x32, 0x3f00 }, { 0x0e, 0x6f80 }, + { 0x10, 0x0e21 }, { 0x55, 0x8000 }, { 0x08, 0x2fcf }, { 0x08, 0x2fcf }, + { 0x2d, 0xc020 }, { 0x19, 0x0017 }, { 0x50, 0x1000 }, { 0x0e, 0x6f80 }, + { 0x08, 0x2fcf }, { 0x80, 0x0011 }, { 0x2b, 0x0c10 }, { 0x2d, 0xc020 }, + { 0x03, 0x0042 }, { 0x0f, 0x0062 }, { 0x08, 0x2fcf }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x38[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0001, 0xb011 }, { 0x201d, 0x0000, 0x0001, 0xb011 }, + { 0x201f, 0x0000, 0x00fe, 0xb011 }, { 0x2021, 0x0000, 0x0000, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x2399, 0x0000, 0x0003, 0xb011 }, + { 0x23a4, 0x0000, 0x00b5, 0xb011 }, { 0x23a5, 0x0000, 0x0001, 0xb011 }, + { 0x23ba, 0x0000, 0x0094, 0xb011 }, { 0x2100, 0x00d0, 0x950e, 0xb017 }, + { 0x2104, 0x0061, 0xd4e2, 0xb017 }, { 0x2108, 0x00d0, 0x950e, 0xb017 }, + { 0x210c, 0x0075, 0xf4e2, 0xb017 }, { 0x2110, 0x00b4, 0x4b0d, 0xb017 }, + { 0x2114, 0x000a, 0x1000, 0xb017 }, { 0x2118, 0x0015, 0x2000, 0xb017 }, + { 0x211c, 0x000a, 0x1000, 0xb017 }, { 0x2120, 0x0075, 0xf4e2, 0xb017 }, + { 0x2124, 0x00b4, 0x4b0d, 0xb017 }, { 0x2128, 0x0000, 0x0010, 0xb017 }, + { 0x212c, 0x0000, 0x0000, 0xb017 }, { 0x2130, 0x0000, 0x0000, 0xb017 }, + { 0x2134, 0x0000, 0x0000, 0xb017 }, { 0x2138, 0x0000, 0x0000, 0xb017 }, + { 0x213c, 0x0000, 0x0010, 0xb017 }, { 0x2140, 0x0000, 0x0000, 0xb017 }, + { 0x2144, 0x0000, 0x0000, 0xb017 }, { 0x2148, 0x0000, 0x0000, 0xb017 }, + { 0x214c, 0x0000, 0x0000, 0xb017 }, { 0x2150, 0x0000, 0x0010, 0xb017 }, + { 0x2154, 0x0000, 0x0000, 0xb017 }, { 0x2158, 0x0000, 0x0000, 0xb017 }, + { 0x215c, 0x0000, 0x0000, 0xb017 }, { 0x2160, 0x0000, 0x0000, 0xb017 }, + { 0x2164, 0x0000, 0x0010, 0xb017 }, { 0x2168, 0x0000, 0x0000, 0xb017 }, + { 0x216c, 0x0000, 0x0000, 0xb017 }, { 0x2170, 0x0000, 0x0000, 0xb017 }, + { 0x2174, 0x0000, 0x0000, 0xb017 }, { 0x2178, 0x0000, 0x0010, 0xb017 }, + { 0x217c, 0x0000, 0x0000, 0xb017 }, { 0x2180, 0x0000, 0x0000, 0xb017 }, + { 0x2184, 0x0000, 0x0000, 0xb017 }, { 0x2188, 0x0000, 0x0000, 0xb017 }, + { 0x218c, 0x0064, 0x5800, 0xb017 }, { 0x2190, 0x00c8, 0xb000, 0xb017 }, + { 0x2194, 0x0064, 0x5800, 0xb017 }, { 0x2198, 0x003d, 0x5be7, 0xb017 }, + { 0x219c, 0x0054, 0x060a, 0xb017 }, { 0x21a0, 0x00c8, 0xa310, 0xb017 }, + { 0x21a4, 0x0029, 0x4de5, 0xb017 }, { 0x21a8, 0x0032, 0x420c, 0xb017 }, + { 0x21ac, 0x0029, 0x4de5, 0xb017 }, { 0x21b0, 0x00fa, 0xe50c, 0xb017 }, + { 0x21b4, 0x0000, 0x0010, 0xb017 }, { 0x21b8, 0x0000, 0x0000, 0xb017 }, + { 0x21bc, 0x0000, 0x0000, 0xb017 }, { 0x21c0, 0x0000, 0x0000, 0xb017 }, + { 0x21c4, 0x0000, 0x0000, 0xb017 }, { 0x21c8, 0x0056, 0xc50f, 0xb017 }, + { 0x21cc, 0x007b, 0xd7e1, 0xb017 }, { 0x21d0, 0x0077, 0xa70e, 0xb017 }, + { 0x21d4, 0x00e0, 0xbde1, 0xb017 }, { 0x21d8, 0x0032, 0x530e, 0xb017 }, + { 0x2204, 0x00fb, 0x7e0f, 0xb017 }, { 0x2208, 0x000b, 0x02e1, 0xb017 }, + { 0x220c, 0x00fb, 0x7e0f, 0xb017 }, { 0x2210, 0x00d5, 0x17e1, 0xb017 }, + { 0x2214, 0x00c0, 0x130f, 0xb017 }, { 0x2218, 0x00e5, 0x0a00, 0xb017 }, + { 0x221c, 0x00cb, 0x1500, 0xb017 }, { 0x2220, 0x00e5, 0x0a00, 0xb017 }, + { 0x2224, 0x00d5, 0x17e1, 0xb017 }, { 0x2228, 0x00c0, 0x130f, 0xb017 }, + { 0x222c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2230, 0x0017, 0x48e2, 0xb017 }, + { 0x2234, 0x00f5, 0xdb0e, 0xb017 }, { 0x2238, 0x00ef, 0x5ce2, 0xb017 }, + { 0x223c, 0x00c1, 0xcc0d, 0xb017 }, { 0x2240, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2244, 0x0017, 0x48e2, 0xb017 }, { 0x2248, 0x00f5, 0xdb0e, 0xb017 }, + { 0x224c, 0x00ef, 0x5ce2, 0xb017 }, { 0x2250, 0x00c1, 0xcc0d, 0xb017 }, + { 0x2254, 0x00f5, 0xdb0e, 0xb017 }, { 0x2258, 0x0017, 0x48e2, 0xb017 }, + { 0x225c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2260, 0x00ef, 0x5ce2, 0xb017 }, + { 0x2264, 0x00c1, 0xcc0d, 0xb017 }, { 0x2268, 0x00f5, 0xdb0e, 0xb017 }, + { 0x226c, 0x0017, 0x48e2, 0xb017 }, { 0x2270, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2274, 0x00ef, 0x5ce2, 0xb017 }, { 0x2278, 0x00c1, 0xcc0d, 0xb017 }, + { 0x227c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2280, 0x0017, 0x48e2, 0xb017 }, + { 0x2284, 0x00f5, 0xdb0e, 0xb017 }, { 0x2288, 0x00ef, 0x5ce2, 0xb017 }, + { 0x228c, 0x00c1, 0xcc0d, 0xb017 }, { 0x22cc, 0x00e8, 0x8d00, 0xb017 }, + { 0x22d0, 0x0000, 0x0000, 0xb017 }, { 0x22d4, 0x0018, 0x72ff, 0xb017 }, + { 0x22d8, 0x00ce, 0x25e1, 0xb017 }, { 0x22dc, 0x002f, 0xe40e, 0xb017 }, + { 0x238e, 0x0000, 0x0099, 0xb011 }, { 0x238f, 0x0000, 0x0011, 0xb011 }, + { 0x2390, 0x0000, 0x0056, 0xb011 }, { 0x2391, 0x0000, 0x0004, 0xb011 }, + { 0x2392, 0x0000, 0x00bb, 0xb011 }, { 0x2393, 0x0000, 0x006d, 0xb011 }, + { 0x2394, 0x0000, 0x0010, 0xb011 }, { 0x2395, 0x0000, 0x0064, 0xb011 }, + { 0x2396, 0x0000, 0x00b6, 0xb011 }, { 0x2397, 0x0000, 0x0028, 0xb011 }, + { 0x2398, 0x0000, 0x000b, 0xb011 }, { 0x239a, 0x0000, 0x0099, 0xb011 }, + { 0x239b, 0x0000, 0x000d, 0xb011 }, { 0x23a6, 0x0000, 0x0064, 0xb011 }, + { 0x23a7, 0x0000, 0x0078, 0xb011 }, { 0x23b9, 0x0000, 0x0000, 0xb011 }, + { 0x23e0, 0x0000, 0x0021, 0xb011 }, { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x39[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0002, 0xb011 }, { 0x201d, 0x0000, 0x0002, 0xb011 }, + { 0x201f, 0x0000, 0x00fd, 0xb011 }, { 0x2021, 0x0000, 0x0001, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x2399, 0x0000, 0x0003, 0xb011 }, + { 0x23a4, 0x0000, 0x00b5, 0xb011 }, { 0x23a5, 0x0000, 0x0001, 0xb011 }, + { 0x23ba, 0x0000, 0x0094, 0xb011 }, { 0x2100, 0x00d0, 0x950e, 0xb017 }, + { 0x2104, 0x0061, 0xd4e2, 0xb017 }, { 0x2108, 0x00d0, 0x950e, 0xb017 }, + { 0x210c, 0x0075, 0xf4e2, 0xb017 }, { 0x2110, 0x00b4, 0x4b0d, 0xb017 }, + { 0x2114, 0x000a, 0x1000, 0xb017 }, { 0x2118, 0x0015, 0x2000, 0xb017 }, + { 0x211c, 0x000a, 0x1000, 0xb017 }, { 0x2120, 0x0075, 0xf4e2, 0xb017 }, + { 0x2124, 0x00b4, 0x4b0d, 0xb017 }, { 0x2128, 0x0000, 0x0010, 0xb017 }, + { 0x212c, 0x0000, 0x0000, 0xb017 }, { 0x2130, 0x0000, 0x0000, 0xb017 }, + { 0x2134, 0x0000, 0x0000, 0xb017 }, { 0x2138, 0x0000, 0x0000, 0xb017 }, + { 0x213c, 0x0000, 0x0010, 0xb017 }, { 0x2140, 0x0000, 0x0000, 0xb017 }, + { 0x2144, 0x0000, 0x0000, 0xb017 }, { 0x2148, 0x0000, 0x0000, 0xb017 }, + { 0x214c, 0x0000, 0x0000, 0xb017 }, { 0x2150, 0x0000, 0x0010, 0xb017 }, + { 0x2154, 0x0000, 0x0000, 0xb017 }, { 0x2158, 0x0000, 0x0000, 0xb017 }, + { 0x215c, 0x0000, 0x0000, 0xb017 }, { 0x2160, 0x0000, 0x0000, 0xb017 }, + { 0x2164, 0x0000, 0x0010, 0xb017 }, { 0x2168, 0x0000, 0x0000, 0xb017 }, + { 0x216c, 0x0000, 0x0000, 0xb017 }, { 0x2170, 0x0000, 0x0000, 0xb017 }, + { 0x2174, 0x0000, 0x0000, 0xb017 }, { 0x2178, 0x0000, 0x0010, 0xb017 }, + { 0x217c, 0x0000, 0x0000, 0xb017 }, { 0x2180, 0x0000, 0x0000, 0xb017 }, + { 0x2184, 0x0000, 0x0000, 0xb017 }, { 0x2188, 0x0000, 0x0000, 0xb017 }, + { 0x218c, 0x0064, 0x5800, 0xb017 }, { 0x2190, 0x00c8, 0xb000, 0xb017 }, + { 0x2194, 0x0064, 0x5800, 0xb017 }, { 0x2198, 0x003d, 0x5be7, 0xb017 }, + { 0x219c, 0x0054, 0x060a, 0xb017 }, { 0x21a0, 0x00c8, 0xa310, 0xb017 }, + { 0x21a4, 0x0029, 0x4de5, 0xb017 }, { 0x21a8, 0x0032, 0x420c, 0xb017 }, + { 0x21ac, 0x0029, 0x4de5, 0xb017 }, { 0x21b0, 0x00fa, 0xe50c, 0xb017 }, + { 0x21b4, 0x0000, 0x0010, 0xb017 }, { 0x21b8, 0x0000, 0x0000, 0xb017 }, + { 0x21bc, 0x0000, 0x0000, 0xb017 }, { 0x21c0, 0x0000, 0x0000, 0xb017 }, + { 0x21c4, 0x0000, 0x0000, 0xb017 }, { 0x21c8, 0x0056, 0xc50f, 0xb017 }, + { 0x21cc, 0x007b, 0xd7e1, 0xb017 }, { 0x21d0, 0x0077, 0xa70e, 0xb017 }, + { 0x21d4, 0x00e0, 0xbde1, 0xb017 }, { 0x21d8, 0x0032, 0x530e, 0xb017 }, + { 0x2204, 0x00fb, 0x7e0f, 0xb017 }, { 0x2208, 0x000b, 0x02e1, 0xb017 }, + { 0x220c, 0x00fb, 0x7e0f, 0xb017 }, { 0x2210, 0x00d5, 0x17e1, 0xb017 }, + { 0x2214, 0x00c0, 0x130f, 0xb017 }, { 0x2218, 0x00e5, 0x0a00, 0xb017 }, + { 0x221c, 0x00cb, 0x1500, 0xb017 }, { 0x2220, 0x00e5, 0x0a00, 0xb017 }, + { 0x2224, 0x00d5, 0x17e1, 0xb017 }, { 0x2228, 0x00c0, 0x130f, 0xb017 }, + { 0x222c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2230, 0x0017, 0x48e2, 0xb017 }, + { 0x2234, 0x00f5, 0xdb0e, 0xb017 }, { 0x2238, 0x00ef, 0x5ce2, 0xb017 }, + { 0x223c, 0x00c1, 0xcc0d, 0xb017 }, { 0x2240, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2244, 0x0017, 0x48e2, 0xb017 }, { 0x2248, 0x00f5, 0xdb0e, 0xb017 }, + { 0x224c, 0x00ef, 0x5ce2, 0xb017 }, { 0x2250, 0x00c1, 0xcc0d, 0xb017 }, + { 0x2254, 0x00f5, 0xdb0e, 0xb017 }, { 0x2258, 0x0017, 0x48e2, 0xb017 }, + { 0x225c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2260, 0x00ef, 0x5ce2, 0xb017 }, + { 0x2264, 0x00c1, 0xcc0d, 0xb017 }, { 0x2268, 0x00f5, 0xdb0e, 0xb017 }, + { 0x226c, 0x0017, 0x48e2, 0xb017 }, { 0x2270, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2274, 0x00ef, 0x5ce2, 0xb017 }, { 0x2278, 0x00c1, 0xcc0d, 0xb017 }, + { 0x227c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2280, 0x0017, 0x48e2, 0xb017 }, + { 0x2284, 0x00f5, 0xdb0e, 0xb017 }, { 0x2288, 0x00ef, 0x5ce2, 0xb017 }, + { 0x228c, 0x00c1, 0xcc0d, 0xb017 }, { 0x22cc, 0x00e8, 0x8d00, 0xb017 }, + { 0x22d0, 0x0000, 0x0000, 0xb017 }, { 0x22d4, 0x0018, 0x72ff, 0xb017 }, + { 0x22d8, 0x00ce, 0x25e1, 0xb017 }, { 0x22dc, 0x002f, 0xe40e, 0xb017 }, + { 0x238e, 0x0000, 0x0099, 0xb011 }, { 0x238f, 0x0000, 0x0011, 0xb011 }, + { 0x2390, 0x0000, 0x0056, 0xb011 }, { 0x2391, 0x0000, 0x0004, 0xb011 }, + { 0x2392, 0x0000, 0x00bb, 0xb011 }, { 0x2393, 0x0000, 0x006d, 0xb011 }, + { 0x2394, 0x0000, 0x0010, 0xb011 }, { 0x2395, 0x0000, 0x0064, 0xb011 }, + { 0x2396, 0x0000, 0x00b6, 0xb011 }, { 0x2397, 0x0000, 0x0028, 0xb011 }, + { 0x2398, 0x0000, 0x000b, 0xb011 }, { 0x239a, 0x0000, 0x0099, 0xb011 }, + { 0x239b, 0x0000, 0x000d, 0xb011 }, { 0x23a6, 0x0000, 0x0064, 0xb011 }, + { 0x23a7, 0x0000, 0x0078, 0xb011 }, { 0x23b9, 0x0000, 0x0000, 0xb011 }, + { 0x23e0, 0x0000, 0x0021, 0xb011 }, { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x3c[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0001, 0xb011 }, { 0x201d, 0x0000, 0x0001, 0xb011 }, + { 0x201f, 0x0000, 0x00fe, 0xb011 }, { 0x2021, 0x0000, 0x0000, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x23ba, 0x0000, 0x008d, 0xb011 }, + { 0x2128, 0x0005, 0x460d, 0xb017 }, { 0x212c, 0x00f6, 0x73e5, 0xb017 }, + { 0x2130, 0x0005, 0x460d, 0xb017 }, { 0x2134, 0x00c0, 0xe9e5, 0xb017 }, + { 0x2138, 0x00d5, 0x010b, 0xb017 }, { 0x213c, 0x009d, 0x7809, 0xb017 }, + { 0x2140, 0x00c5, 0x0eed, 0xb017 }, { 0x2144, 0x009d, 0x7809, 0xb017 }, + { 0x2148, 0x00c4, 0x4ef0, 0xb017 }, { 0x214c, 0x003a, 0x3106, 0xb017 }, + { 0x2150, 0x00af, 0x750e, 0xb017 }, { 0x2154, 0x008c, 0x1ff1, 0xb017 }, + { 0x2158, 0x009e, 0x360c, 0xb017 }, { 0x215c, 0x008c, 0x1ff1, 0xb017 }, + { 0x2160, 0x004d, 0xac0a, 0xb017 }, { 0x2164, 0x007d, 0xa00f, 0xb017 }, + { 0x2168, 0x00e1, 0x9ce3, 0xb017 }, { 0x216c, 0x00e8, 0x590e, 0xb017 }, + { 0x2170, 0x00e1, 0x9ce3, 0xb017 }, { 0x2174, 0x0066, 0xfa0d, 0xb017 }, + { 0x2178, 0x0000, 0x0010, 0xb017 }, { 0x217c, 0x0000, 0x0000, 0xb017 }, + { 0x2180, 0x0000, 0x0000, 0xb017 }, { 0x2184, 0x0000, 0x0000, 0xb017 }, + { 0x2188, 0x0000, 0x0000, 0xb017 }, { 0x218c, 0x0000, 0x0010, 0xb017 }, + { 0x2190, 0x0000, 0x0000, 0xb017 }, { 0x2194, 0x0000, 0x0000, 0xb017 }, + { 0x2198, 0x0000, 0x0000, 0xb017 }, { 0x219c, 0x0000, 0x0000, 0xb017 }, + { 0x21a0, 0x0000, 0x0010, 0xb017 }, { 0x21a4, 0x0000, 0x0000, 0xb017 }, + { 0x21a8, 0x0000, 0x0000, 0xb017 }, { 0x21ac, 0x0000, 0x0000, 0xb017 }, + { 0x21b0, 0x0000, 0x0000, 0xb017 }, { 0x21b4, 0x0000, 0x0010, 0xb017 }, + { 0x21b8, 0x0000, 0x0000, 0xb017 }, { 0x21bc, 0x0000, 0x0000, 0xb017 }, + { 0x21c0, 0x0000, 0x0000, 0xb017 }, { 0x21c4, 0x0000, 0x0000, 0xb017 }, + { 0x23b9, 0x0000, 0x0000, 0xb011 }, { 0x23e0, 0x0000, 0x0020, 0xb011 }, + { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x3d[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0002, 0xb011 }, { 0x201d, 0x0000, 0x0002, 0xb011 }, + { 0x201f, 0x0000, 0x00fd, 0xb011 }, { 0x2021, 0x0000, 0x0001, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x23ba, 0x0000, 0x008d, 0xb011 }, + { 0x2128, 0x0005, 0x460d, 0xb017 }, { 0x212c, 0x00f6, 0x73e5, 0xb017 }, + { 0x2130, 0x0005, 0x460d, 0xb017 }, { 0x2134, 0x00c0, 0xe9e5, 0xb017 }, + { 0x2138, 0x00d5, 0x010b, 0xb017 }, { 0x213c, 0x009d, 0x7809, 0xb017 }, + { 0x2140, 0x00c5, 0x0eed, 0xb017 }, { 0x2144, 0x009d, 0x7809, 0xb017 }, + { 0x2148, 0x00c4, 0x4ef0, 0xb017 }, { 0x214c, 0x003a, 0x3106, 0xb017 }, + { 0x2150, 0x00af, 0x750e, 0xb017 }, { 0x2154, 0x008c, 0x1ff1, 0xb017 }, + { 0x2158, 0x009e, 0x360c, 0xb017 }, { 0x215c, 0x008c, 0x1ff1, 0xb017 }, + { 0x2160, 0x004d, 0xac0a, 0xb017 }, { 0x2164, 0x007d, 0xa00f, 0xb017 }, + { 0x2168, 0x00e1, 0x9ce3, 0xb017 }, { 0x216c, 0x00e8, 0x590e, 0xb017 }, + { 0x2170, 0x00e1, 0x9ce3, 0xb017 }, { 0x2174, 0x0066, 0xfa0d, 0xb017 }, + { 0x2178, 0x0000, 0x0010, 0xb017 }, { 0x217c, 0x0000, 0x0000, 0xb017 }, + { 0x2180, 0x0000, 0x0000, 0xb017 }, { 0x2184, 0x0000, 0x0000, 0xb017 }, + { 0x2188, 0x0000, 0x0000, 0xb017 }, { 0x218c, 0x0000, 0x0010, 0xb017 }, + { 0x2190, 0x0000, 0x0000, 0xb017 }, { 0x2194, 0x0000, 0x0000, 0xb017 }, + { 0x2198, 0x0000, 0x0000, 0xb017 }, { 0x219c, 0x0000, 0x0000, 0xb017 }, + { 0x21a0, 0x0000, 0x0010, 0xb017 }, { 0x21a4, 0x0000, 0x0000, 0xb017 }, + { 0x21a8, 0x0000, 0x0000, 0xb017 }, { 0x21ac, 0x0000, 0x0000, 0xb017 }, + { 0x21b0, 0x0000, 0x0000, 0xb017 }, { 0x21b4, 0x0000, 0x0010, 0xb017 }, + { 0x21b8, 0x0000, 0x0000, 0xb017 }, { 0x21bc, 0x0000, 0x0000, 0xb017 }, + { 0x21c0, 0x0000, 0x0000, 0xb017 }, { 0x21c4, 0x0000, 0x0000, 0xb017 }, + { 0x23b9, 0x0000, 0x0000, 0xb011 }, { 0x23e0, 0x0000, 0x0020, 0xb011 }, + { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_seq1[] = { + { 0x23ff, 0x0000, 0x0000, 0xb011 }, { 0x203a, 0x0000, 0x0080, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_fixup_desc fixups2[] = { + { 0x4f, 0xb029 }, { 0x05, 0x2be0 }, { 0x30, 0x2421 }, + }; + + + static const struct alc298_samsung_coeff_seq_desc amp_seq2[] = { + { 0x203a, 0x0000, 0x0081, 0xb011 }, { 0x23ff, 0x0000, 0x0001, 0xb011 }, + }; + + if (action != HDA_FIXUP_ACT_INIT) + return; + + // First set of fixups + for (i = 0; i < ARRAY_SIZE(fixups1); i++) + alc_write_coef_idx(codec, fixups1[i].coeff_idx, fixups1[i].coeff_value); + + // First set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_0x38, ARRAY_SIZE(amp_0x38)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_0x39, ARRAY_SIZE(amp_0x39)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_0x3c, ARRAY_SIZE(amp_0x3c)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_0x3d, ARRAY_SIZE(amp_0x3d)); + + // Second set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_seq1, ARRAY_SIZE(amp_seq1)); + + // Second set of fixups + for (i = 0; i < ARRAY_SIZE(fixups2); i++) + alc_write_coef_idx(codec, fixups2[i].coeff_idx, fixups2[i].coeff_value); + + // Third set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_seq2, ARRAY_SIZE(amp_seq2)); + + // Final fixup + alc_write_coef_idx(codec, 0x10, 0x0F21); +} diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 49bd7097d892..89d8235537cd 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -2,10 +2,12 @@ // // TAS2781 HDA I2C driver // -// Copyright 2023 Texas Instruments, Inc. +// Copyright 2023 - 2024 Texas Instruments, Inc. // // Author: Shenghao Ding <shenghao-ding@ti.com> +// Current maintainer: Baojun Xu <baojun.xu@ti.com> +#include <asm/unaligned.h> #include <linux/acpi.h> #include <linux/crc8.h> #include <linux/crc32.h> @@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) static const unsigned char rgno_array[CALIB_MAX] = { 0x74, 0x0c, 0x14, 0x70, 0x7c, }; - unsigned char *data; + int offset = 0; int i, j, rc; + __be32 data; for (i = 0; i < tas_priv->ndev; i++) { - data = tas_priv->cali_data.data + - i * TASDEVICE_SPEAKER_CALIBRATION_SIZE; for (j = 0; j < CALIB_MAX; j++) { + data = cpu_to_be32( + *(uint32_t *)&tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), - &(data[4 * j]), 4); + (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); + offset += 4; } } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f54466ed8e3e..0523c16305db 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -224,6 +224,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M5"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), } }, @@ -406,6 +420,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A44"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_BOARD_NAME, "8A22"), } }, @@ -420,6 +441,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8B27"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_BOARD_NAME, "8B2F"), } }, diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 7434aeeda292..402b9a2ff024 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -2786,15 +2786,13 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) { struct arizona_pdata *pdata = &arizona->pdata; struct device_node *np = arizona->dev->of_node; - struct property *prop; - const __be32 *cur; u32 val; u32 pdm_val[ARIZONA_MAX_PDM_SPK]; int ret; int count = 0; count = 0; - of_property_for_each_u32(np, "wlf,inmode", prop, cur, val) { + of_property_for_each_u32(np, "wlf,inmode", val) { if (count == ARRAY_SIZE(pdata->inmode)) break; @@ -2803,7 +2801,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,dmic-ref", prop, cur, val) { + of_property_for_each_u32(np, "wlf,dmic-ref", val) { if (count == ARRAY_SIZE(pdata->dmic_ref)) break; @@ -2812,7 +2810,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,out-mono", prop, cur, val) { + of_property_for_each_u32(np, "wlf,out-mono", val) { if (count == ARRAY_SIZE(pdata->out_mono)) break; @@ -2821,7 +2819,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,max-channels-clocked", prop, cur, val) { + of_property_for_each_u32(np, "wlf,max-channels-clocked", val) { if (count == ARRAY_SIZE(pdata->max_channels_clocked)) break; @@ -2830,7 +2828,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,out-volume-limit", prop, cur, val) { + of_property_for_each_u32(np, "wlf,out-volume-limit", val) { if (count == ARRAY_SIZE(pdata->out_vol_limit)) break; diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 287ac01a3873..605964af8afa 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -108,7 +108,7 @@ static efi_status_t cs_amp_get_efi_variable(efi_char16_t *name, KUNIT_STATIC_STUB_REDIRECT(cs_amp_get_efi_variable, name, guid, size, buf); - if (IS_ENABLED(CONFIG_EFI)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return efi.get_variable(name, guid, &attr, size, buf); return EFI_NOT_FOUND; diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 2392c6effed8..1e9d73bee3b4 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -176,17 +176,10 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, struct snd_kcontrol *kcontrol; struct snd_kcontrol_volatile *vd; unsigned int index_offset; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - if (component->name_prefix) - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s", - component->name_prefix, ctl_name); - else - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - - kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); + kcontrol = snd_soc_component_get_kcontrol_locked(component, ctl_name); if (!kcontrol) { - dev_err(component->dev, "Can't find kcontrol %s\n", name); + dev_err(component->dev, "Can't find kcontrol %s\n", ctl_name); return -EINVAL; } diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index fc03bb7ecae1..7c9a17fe2195 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -23,6 +23,79 @@ /* Register addresses are offset when sent over SoundWire */ #define CS35L56_SDW_ADDR_OFFSET 0x8000 +/* Cirrus bus bridge registers */ +#define CS35L56_SDW_MEM_ACCESS_STATUS 0xd0 +#define CS35L56_SDW_MEM_READ_DATA 0xd8 + +#define CS35L56_SDW_LAST_LATE BIT(3) +#define CS35L56_SDW_CMD_IN_PROGRESS BIT(2) +#define CS35L56_SDW_RDATA_RDY BIT(0) + +#define CS35L56_LATE_READ_POLL_US 10 +#define CS35L56_LATE_READ_TIMEOUT_US 1000 + +static int cs35l56_sdw_poll_mem_status(struct sdw_slave *peripheral, + unsigned int mask, + unsigned int match) +{ + int ret, val; + + ret = read_poll_timeout(sdw_read_no_pm, val, + (val < 0) || ((val & mask) == match), + CS35L56_LATE_READ_POLL_US, CS35L56_LATE_READ_TIMEOUT_US, + false, peripheral, CS35L56_SDW_MEM_ACCESS_STATUS); + if (ret < 0) + return ret; + + if (val < 0) + return val; + + return 0; +} + +static int cs35l56_sdw_slow_read(struct sdw_slave *peripheral, unsigned int reg, + u8 *buf, size_t val_size) +{ + int ret, i; + + reg += CS35L56_SDW_ADDR_OFFSET; + + for (i = 0; i < val_size; i += sizeof(u32)) { + /* Poll for bus bridge idle */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_CMD_IN_PROGRESS, + 0); + if (ret < 0) { + dev_err(&peripheral->dev, "!CMD_IN_PROGRESS fail: %d\n", ret); + return ret; + } + + /* Reading LSByte triggers read of register to holding buffer */ + sdw_read_no_pm(peripheral, reg + i); + + /* Wait for data available */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_RDATA_RDY, + CS35L56_SDW_RDATA_RDY); + if (ret < 0) { + dev_err(&peripheral->dev, "RDATA_RDY fail: %d\n", ret); + return ret; + } + + /* Read data from buffer */ + ret = sdw_nread_no_pm(peripheral, CS35L56_SDW_MEM_READ_DATA, + sizeof(u32), &buf[i]); + if (ret) { + dev_err(&peripheral->dev, "Late read @%#x failed: %d\n", reg + i, ret); + return ret; + } + + swab32s((u32 *)&buf[i]); + } + + return 0; +} + static int cs35l56_sdw_read_one(struct sdw_slave *peripheral, unsigned int reg, void *buf) { int ret; @@ -48,6 +121,10 @@ static int cs35l56_sdw_read(void *context, const void *reg_buf, int ret; reg = le32_to_cpu(*(const __le32 *)reg_buf); + + if (cs35l56_is_otp_register(reg)) + return cs35l56_sdw_slow_read(peripheral, reg, buf8, val_size); + reg += CS35L56_SDW_ADDR_OFFSET; if (val_size == 4) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e7e8d617da94..bd74fef33d49 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -36,6 +36,7 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + { CS35L56_IRQ1_MASK_18, 0x1f7df0ff }, /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 84c34f5b1a51..757ade6373ed 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1095,6 +1095,11 @@ int cs35l56_system_resume(struct device *dev) } EXPORT_SYMBOL_GPL(cs35l56_system_resume); +static int cs35l56_control_add_nop(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl) +{ + return 0; +} + static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) { struct wm_adsp *dsp; @@ -1117,6 +1122,12 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) dsp->fw = 12; dsp->wmfw_optional = true; + /* + * None of the firmware controls need to be exported so add a no-op + * callback that suppresses creating an ALSA control. + */ + dsp->control_add = &cs35l56_control_add_nop; + dev_dbg(cs35l56->base.dev, "DSP system name: '%s'\n", dsp->system_name); ret = wm_halo_init(dsp); diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 92674314227c..5183b4586424 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -7,6 +7,7 @@ #include <linux/bitops.h> #include <linux/bits.h> +#include <linux/build_bug.h> #include <linux/clk.h> #include <linux/device.h> #include <linux/err.h> @@ -252,24 +253,20 @@ CS42L43_IRQ_COMPLETE(load_detect) static irqreturn_t cs42l43_mic_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - static const char * const controls[] = { - "Decimator 1 Switch", - "Decimator 2 Switch", - "Decimator 3 Switch", - "Decimator 4 Switch", - }; - int i, ret; + struct snd_soc_component *component = priv->component; + int i; dev_dbg(priv->dev, "Microphone shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - for (i = 0; i < ARRAY_SIZE(controls); i++) { - ret = snd_soc_component_notify_control(priv->component, - controls[i]); - if (ret) + for (i = 1; i < ARRAY_SIZE(priv->kctl); i++) { + if (!priv->kctl[i]) return IRQ_NONE; + + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[i]->id); } return IRQ_HANDLED; @@ -278,18 +275,19 @@ static irqreturn_t cs42l43_mic_shutter(int irq, void *data) static irqreturn_t cs42l43_spk_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - int ret; + struct snd_soc_component *component = priv->component; dev_dbg(priv->dev, "Speaker shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - ret = snd_soc_component_notify_control(priv->component, - "Speaker Digital Switch"); - if (ret) + if (!priv->kctl[0]) return IRQ_NONE; + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[0]->id); + return IRQ_HANDLED; } @@ -590,7 +588,46 @@ static int cs42l43_asp_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mas return 0; } +static int cs42l43_dai_probe(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + static const char * const controls[] = { + "Speaker Digital Switch", + "Decimator 1 Switch", + "Decimator 2 Switch", + "Decimator 3 Switch", + "Decimator 4 Switch", + }; + int i; + + static_assert(ARRAY_SIZE(controls) == ARRAY_SIZE(priv->kctl)); + + for (i = 0; i < ARRAY_SIZE(controls); i++) { + if (priv->kctl[i]) + continue; + + priv->kctl[i] = snd_soc_component_get_kcontrol(component, controls[i]); + } + + return 0; +} + +static int cs42l43_dai_remove(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->kctl); i++) + priv->kctl[i] = NULL; + + return 0; +} + static const struct snd_soc_dai_ops cs42l43_asp_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .hw_params = cs42l43_asp_hw_params, .set_fmt = cs42l43_asp_set_fmt, @@ -608,9 +645,11 @@ static int cs42l43_sdw_hw_params(struct snd_pcm_substream *substream, return ret; return cs42l43_set_sample_rate(substream, params, dai); -}; +} static const struct snd_soc_dai_ops cs42l43_sdw_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .set_stream = cs42l43_sdw_set_stream, .hw_params = cs42l43_sdw_hw_params, diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9924c13e1eb5..9c144e129535 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -100,6 +100,8 @@ struct cs42l43_codec { struct delayed_work hp_ilimit_clear_work; bool hp_ilimited; int hp_ilimit_count; + + struct snd_kcontrol *kctl[5]; }; #if IS_REACHABLE(CONFIG_SND_SOC_CS42L43_SDW) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index 25a86a32e936..da52afe56c3c 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -129,16 +129,16 @@ volsw_err: static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1270, 50, 0); -static const char * const cs530x_in_hpf_text[] = { +static const char * const cs530x_in_filter_text[] = { "Min Phase Slow Roll-off", "Min Phase Fast Roll-off", "Linear Phase Slow Roll-off", "Linear Phase Fast Roll-off", }; -static SOC_ENUM_SINGLE_DECL(cs530x_in_hpf_enum, CS530X_IN_FILTER, +static SOC_ENUM_SINGLE_DECL(cs530x_in_filter_enum, CS530X_IN_FILTER, CS530X_IN_FILTER_SHIFT, - cs530x_in_hpf_text); + cs530x_in_filter_text); static const char * const cs530x_in_4ch_sum_text[] = { "None", @@ -189,7 +189,7 @@ SOC_SINGLE_EXT_TLV("IN1 Volume", CS530X_IN_VOL_CTRL1_0, 0, 255, 1, SOC_SINGLE_EXT_TLV("IN2 Volume", CS530X_IN_VOL_CTRL1_1, 0, 255, 1, snd_soc_get_volsw, cs530x_put_volsw_vu, in_vol_tlv), -SOC_ENUM("IN HPF Select", cs530x_in_hpf_enum), +SOC_ENUM("IN DEC Filter Select", cs530x_in_filter_enum), SOC_ENUM("Input Ramp Up", cs530x_ramp_inc_enum), SOC_ENUM("Input Ramp Down", cs530x_ramp_dec_enum), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b246694ebb4f..be3c79232a31 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -843,6 +843,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326_disable_micbias(es8326->component); if (es8326->jack->status & SND_JACK_HEADPHONE) { dev_dbg(comp->dev, "Report hp remove event\n"); + snd_soc_jack_report(es8326->jack, 0, + SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2); snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET); /* mute adc when mic path switch */ regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44); diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index b852cc7ffad9..a62ccd09bacd 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1472,6 +1472,8 @@ static void va_macro_set_lpass_codec_version(struct va_macro *va) if ((core_id_0 == 0x01) && (core_id_1 == 0x0F)) version = LPASS_CODEC_VERSION_2_0; + if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && core_id_2 == 0x01) + version = LPASS_CODEC_VERSION_2_0; if ((core_id_0 == 0x02) && (core_id_1 == 0x0E)) version = LPASS_CODEC_VERSION_2_1; if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && (core_id_2 == 0x50 || core_id_2 == 0x51)) diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index e1cbaf8a944d..fd4a96a12060 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -736,7 +736,7 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, return ret; } - dev_info(component->dev, + dev_dbg(component->dev, "pll_int=%x pll_frac=%x mclk_scaler=%x pre_factor=%x\n", pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 63626b982d04..8f9a3ae7153e 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2162,7 +2162,7 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) return; cal = cal_fmw->calibrations; - if (cal) + if (!cal) return; load_calib_data(priv, &cal->dev_data); diff --git a/sound/soc/codecs/wcd937x-sdw.c b/sound/soc/codecs/wcd937x-sdw.c index 3abc8041406a..0c33f7f3dc25 100644 --- a/sound/soc/codecs/wcd937x-sdw.c +++ b/sound/soc/codecs/wcd937x-sdw.c @@ -1049,7 +1049,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; @@ -1062,7 +1062,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, /* Start in cache-only until device is enumerated */ regcache_cache_only(wcd->regmap, true); } else { - pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index c995bcc59ead..7da8a10bd0a9 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1252,12 +1252,12 @@ static int wcd9380_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd939x-sdw.c b/sound/soc/codecs/wcd939x-sdw.c index 94b1e99a3ca0..fca95777a75a 100644 --- a/sound/soc/codecs/wcd939x-sdw.c +++ b/sound/soc/codecs/wcd939x-sdw.c @@ -1453,12 +1453,12 @@ static int wcd9390_probe(struct sdw_slave *pdev, const struct sdw_device_id *id) pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd939x_tx_dpn_prop; wcd->ch_info = &wcd939x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd939x_rx_dpn_prop; wcd->ch_info = &wcd939x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 9f8549b34e30..e69283195f36 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -583,7 +583,7 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(kcontrol); } -static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) { struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); struct cs_dsp *cs_dsp = &dsp->cs_dsp; @@ -658,6 +658,17 @@ err_ctl: return ret; } +EXPORT_SYMBOL_GPL(wm_adsp_control_add); + +static int wm_adsp_control_add_cb(struct cs_dsp_coeff_ctl *cs_ctl) +{ + struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); + + if (dsp->control_add) + return (dsp->control_add)(dsp, cs_ctl); + else + return wm_adsp_control_add(cs_ctl); +} static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) { @@ -2072,12 +2083,12 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data) EXPORT_SYMBOL_GPL(wm_halo_wdt_expire); static const struct cs_dsp_client_ops wm_adsp1_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, }; static const struct cs_dsp_client_ops wm_adsp2_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, .pre_run = wm_adsp_pre_run, .post_run = wm_adsp_event_post_run, diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index e53dfcf1f78f..edc5b02ae765 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -37,6 +37,7 @@ struct wm_adsp { bool wmfw_optional; struct work_struct boot_work; + int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); int (*pre_run)(struct wm_adsp *dsp); bool preloaded; @@ -132,6 +133,8 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, int wm_adsp_compr_copy(struct snd_soc_component *component, struct snd_compr_stream *stream, char __user *buf, size_t count); + +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl); int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 0478599d0f35..fb9e92f08d98 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1152,7 +1152,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, wsa881x->sconfig.frame_rate = 48000; wsa881x->sconfig.direction = SDW_DATA_DIR_RX; wsa881x->sconfig.type = SDW_STREAM_PDM; - pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; pdev->prop.clk_stop_mode1 = true; diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index d0ab4e2290b6..3e4fdaa3f44f 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1406,7 +1406,7 @@ static int wsa883x_probe(struct sdw_slave *pdev, WSA883X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index d17ae17b2938..89eb5e03a617 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1895,7 +1895,7 @@ static int wsa884x_probe(struct sdw_slave *pdev, WSA884X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa884x_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 82df887b3af5..f6c3aeff0d8e 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -306,27 +306,12 @@ static int be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, return 0; } -SND_SOC_DAILINK_DEFS(hifi, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY(), COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(hifi_fe, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(hifi_be, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY(), COMP_EMPTY())); - static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { /* Default ASoC DAI Link*/ { .name = "HiFi", .stream_name = "HiFi", .ops = &fsl_asoc_card_ops, - SND_SOC_DAILINK_REG(hifi), }, /* DPCM Link between Front-End and Back-End (Optional) */ { @@ -335,7 +320,6 @@ static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { .dpcm_playback = 1, .dpcm_capture = 1, .dynamic = 1, - SND_SOC_DAILINK_REG(hifi_fe), }, { .name = "HiFi-ASRC-BE", @@ -345,7 +329,6 @@ static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { .dpcm_playback = 1, .dpcm_capture = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(hifi_be), }, }; @@ -637,6 +620,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) struct platform_device *cpu_pdev; struct fsl_asoc_card_priv *priv; struct device *codec_dev[2] = { NULL, NULL }; + struct snd_soc_dai_link_component *dlc; const char *codec_dai_name[2]; const char *codec_dev_name[2]; u32 asrc_fmt = 0; @@ -717,7 +701,35 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) memcpy(priv->dai_link, fsl_asoc_card_dai, sizeof(struct snd_soc_dai_link) * ARRAY_SIZE(priv->dai_link)); + /* + * "Default ASoC DAI Link": 1 cpus, 2 codecs, 1 platforms + * "DPCM Link Front-End": 1 cpus, 1 codecs (dummy), 1 platforms + * "DPCM Link Back-End": 1 cpus, 2 codecs + * totally 10 components + */ + dlc = devm_kcalloc(&pdev->dev, 10, sizeof(*dlc), GFP_KERNEL); + if (!dlc) { + ret = -ENOMEM; + goto asrc_fail; + } + + priv->dai_link[0].cpus = &dlc[0]; + priv->dai_link[0].num_cpus = 1; + priv->dai_link[0].codecs = &dlc[1]; priv->dai_link[0].num_codecs = 1; + priv->dai_link[0].platforms = &dlc[3]; + priv->dai_link[0].num_platforms = 1; + + priv->dai_link[1].cpus = &dlc[4]; + priv->dai_link[1].num_cpus = 1; + priv->dai_link[1].codecs = &dlc[5]; + priv->dai_link[1].num_codecs = 0; /* dummy */ + priv->dai_link[1].platforms = &dlc[6]; + priv->dai_link[1].num_platforms = 1; + + priv->dai_link[2].cpus = &dlc[7]; + priv->dai_link[2].num_cpus = 1; + priv->dai_link[2].codecs = &dlc[8]; priv->dai_link[2].num_codecs = 1; priv->card.dapm_routes = audio_map; diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 0d37edb70261..22b240a70ad4 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -831,7 +831,7 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { {REG_MICFIL_CTRL1, 0x00000000}, {REG_MICFIL_CTRL2, 0x00000000}, {REG_MICFIL_STAT, 0x00000000}, - {REG_MICFIL_FIFO_CTRL, 0x00000007}, + {REG_MICFIL_FIFO_CTRL, 0x0000001F}, {REG_MICFIL_FIFO_STAT, 0x00000000}, {REG_MICFIL_DATACH0, 0x00000000}, {REG_MICFIL_DATACH1, 0x00000000}, @@ -855,6 +855,8 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -872,9 +874,6 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: - case REG_MICFIL_FSYNC_CTRL: - case REG_MICFIL_VERID: - case REG_MICFIL_PARAM: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: @@ -883,6 +882,12 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NDATA: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + case REG_MICFIL_VERID: + case REG_MICFIL_PARAM: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } @@ -890,6 +895,8 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -899,7 +906,6 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: /* Write 1 to Clear */ - case REG_MICFIL_FSYNC_CTRL: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: /* Write 1 to Clear */ @@ -907,6 +913,10 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NCONFIG: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } diff --git a/sound/soc/fsl/fsl_micfil.h b/sound/soc/fsl/fsl_micfil.h index c6b902ba0a53..b7798a7cbf2a 100644 --- a/sound/soc/fsl/fsl_micfil.h +++ b/sound/soc/fsl/fsl_micfil.h @@ -72,7 +72,7 @@ #define MICFIL_STAT_CHXF(ch) BIT(ch) /* MICFIL FIFO Control Register -- REG_MICFIL_FIFO_CTRL 0x10 */ -#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(2, 0) +#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(4, 0) /* MICFIL FIFO Status Register -- REG_MICFIL_FIFO_STAT 0x14 */ #define MICFIL_FIFO_STAT_FIFOX_OVER(ch) BIT(ch) diff --git a/sound/soc/intel/common/soc-acpi-intel-ssp-common.c b/sound/soc/intel/common/soc-acpi-intel-ssp-common.c index 75d0b931d895..de7a3f7f47f1 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ssp-common.c +++ b/sound/soc/intel/common/soc-acpi-intel-ssp-common.c @@ -64,6 +64,15 @@ static const struct codec_map amps[] = { CODEC_MAP_ENTRY("RT1015P", "rt1015", RT1015P_ACPI_HID, CODEC_RT1015P), CODEC_MAP_ENTRY("RT1019P", "rt1019", RT1019P_ACPI_HID, CODEC_RT1019P), CODEC_MAP_ENTRY("RT1308", "rt1308", RT1308_ACPI_HID, CODEC_RT1308), + + /* + * Monolithic components + * + * Only put components that can serve as both the amp and the codec below this line. + * This will ensure that if the part is used just as a codec and there is an amp as well + * then the amp will be selected properly. + */ + CODEC_MAP_ENTRY("RT5650", "rt5650", RT5650_ACPI_HID, CODEC_RT5650), }; enum snd_soc_acpi_intel_codec diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h index de4e550c5b34..42bd51456b94 100644 --- a/sound/soc/intel/common/soc-intel-quirks.h +++ b/sound/soc/intel/common/soc-intel-quirks.h @@ -11,7 +11,7 @@ #include <linux/platform_data/x86/soc.h> -#if IS_ENABLED(CONFIG_X86) +#if IS_REACHABLE(CONFIG_IOSF_MBI) #include <linux/dmi.h> #include <asm/iosf_mbi.h> diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 7e6090af720b..75909196b769 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -207,25 +207,18 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) status = FIELD_GET(STATUS1_INT_STS, status); axg_fifo_ack_irq(fifo, status); - /* Use the thread to call period elapsed on nonatomic links */ - if (status & FIFO_INT_COUNT_REPEAT) - return IRQ_WAKE_THREAD; + if (status & ~FIFO_INT_COUNT_REPEAT) + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + if (status & FIFO_INT_COUNT_REPEAT) { + snd_pcm_period_elapsed(ss); + return IRQ_HANDLED; + } return IRQ_NONE; } -static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) -{ - struct snd_pcm_substream *ss = dev_id; - - snd_pcm_period_elapsed(ss); - - return IRQ_HANDLED; -} - int axg_fifo_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *ss) { @@ -251,8 +244,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, - axg_fifo_pcm_irq_block_thread, + /* Use the threaded irq handler only with non-atomic links */ + ret = request_threaded_irq(fifo->irq, NULL, + axg_fifo_pcm_irq_block, IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 4d7c2e3c929a..42f481321919 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -236,19 +236,45 @@ int snd_soc_component_force_enable_pin_unlocked( } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); -int snd_soc_component_notify_control(struct snd_soc_component *component, - const char * const ctl) +static void soc_get_kcontrol_name(struct snd_soc_component *component, + char *buf, int size, const char * const ctl) { - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - struct snd_kcontrol *kctl; - /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) - snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); + snprintf(buf, size, "%s %s", component->name_prefix, ctl); else - snprintf(name, ARRAY_SIZE(name), "%s", ctl); + snprintf(buf, size, "%s", ctl); +} + +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol); + +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol_locked(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol_locked); + +int snd_soc_component_notify_control(struct snd_soc_component *component, + const char * const ctl) +{ + struct snd_kcontrol *kctl; - kctl = snd_soc_card_get_kcontrol(component->card, name); + kctl = snd_soc_component_get_kcontrol(component, ctl); if (!kctl) return soc_component_ret(component, -EINVAL); diff --git a/sound/soc/sof/amd/pci-vangogh.c b/sound/soc/sof/amd/pci-vangogh.c index 16eb2994fbab..eba580840100 100644 --- a/sound/soc/sof/amd/pci-vangogh.c +++ b/sound/soc/sof/amd/pci-vangogh.c @@ -34,7 +34,6 @@ static const struct sof_amd_acp_desc vangogh_chip_info = { .dsp_intr_base = ACP5X_DSP_SW_INTR_BASE, .sram_pte_offset = ACP5X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP5X_AXI2DAGB_SEM_0, - .acp_clkmux_sel = ACP5X_CLKMUX_SEL, .probe_reg_offset = ACP5X_FUTURE_REG_ACLK_0, }; diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index 1c7019c3cbd3..cdd1e79ef9f6 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -234,7 +234,7 @@ static int imx8m_probe(struct snd_sof_dev *sdev) /* set default mailbox offset for FW ready message */ sdev->dsp_box.offset = MBOX_OFFSET; - priv->regmap = syscon_regmap_lookup_by_compatible("fsl,dsp-ctrl"); + priv->regmap = syscon_regmap_lookup_by_phandle(np, "fsl,dsp-ctrl"); if (IS_ERR(priv->regmap)) { dev_err(sdev->dev, "cannot find dsp-ctrl registers"); ret = PTR_ERR(priv->regmap); diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index b8b914eaf7e0..75f6240cf3e1 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -310,15 +310,19 @@ int hda_cl_copy_fw(struct snd_sof_dev *sdev, struct hdac_ext_stream *hext_stream return ret; } - /* Wait for completion of transfer */ - time_left = wait_for_completion_timeout(&hda_stream->ioc, - msecs_to_jiffies(HDA_CL_DMA_IOC_TIMEOUT_MS)); - - if (!time_left) { - dev_err(sdev->dev, "Code loader DMA did not complete\n"); - return -ETIMEDOUT; + if (sdev->pdata->ipc_type == SOF_IPC_TYPE_4) { + /* Wait for completion of transfer */ + time_left = wait_for_completion_timeout(&hda_stream->ioc, + msecs_to_jiffies(HDA_CL_DMA_IOC_TIMEOUT_MS)); + + if (!time_left) { + dev_err(sdev->dev, "Code loader DMA did not complete\n"); + return -ETIMEDOUT; + } + dev_dbg(sdev->dev, "Code loader DMA done\n"); } - dev_dbg(sdev->dev, "Code loader DMA done, waiting for FW_ENTERED status\n"); + + dev_dbg(sdev->dev, "waiting for FW_ENTERED status\n"); status = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, chip->rom_status_reg, reg, diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index daf364f773dd..5a40b8fbbbd3 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1307,9 +1307,10 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) const struct sof_dev_desc *desc = sof_pdata->desc; struct hdac_bus *bus = sof_to_bus(sdev); struct snd_soc_acpi_mach *mach = NULL; - enum snd_soc_acpi_intel_codec codec_type; + enum snd_soc_acpi_intel_codec codec_type, amp_type; const char *tplg_filename; const char *tplg_suffix; + bool amp_name_valid; /* Try I2S or DMIC if it is supported */ if (interface_mask & (BIT(SOF_DAI_INTEL_SSP) | BIT(SOF_DAI_INTEL_DMIC))) @@ -1413,15 +1414,16 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) } } - codec_type = snd_soc_acpi_intel_detect_amp_type(sdev->dev); + amp_type = snd_soc_acpi_intel_detect_amp_type(sdev->dev); + codec_type = snd_soc_acpi_intel_detect_codec_type(sdev->dev); + amp_name_valid = amp_type != CODEC_NONE && amp_type != codec_type; - if (tplg_fixup && - mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_AMP_NAME && - codec_type != CODEC_NONE) { - tplg_suffix = snd_soc_acpi_intel_get_amp_tplg_suffix(codec_type); + if (tplg_fixup && amp_name_valid && + mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_AMP_NAME) { + tplg_suffix = snd_soc_acpi_intel_get_amp_tplg_suffix(amp_type); if (!tplg_suffix) { dev_err(sdev->dev, "no tplg suffix found, amp %d\n", - codec_type); + amp_type); return NULL; } @@ -1436,7 +1438,6 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) add_extension = true; } - codec_type = snd_soc_acpi_intel_detect_codec_type(sdev->dev); if (tplg_fixup && mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_CODEC_NAME && diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index 90f6856ee80c..87be7f16e8c2 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -1358,7 +1358,13 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget) ipc4_copier = dai->private; if (pipeline->use_chain_dma) { - pipeline->msg.primary = 0; + /* + * Preserve the DMA Link ID and clear other bits since + * the DMA Link ID is only configured once during + * dai_config, other fields are expected to be 0 for + * re-configuration + */ + pipeline->msg.primary &= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; pipeline->msg.extension = 0; } @@ -3095,8 +3101,14 @@ static int sof_ipc4_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * return 0; if (pipeline->use_chain_dma) { - pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; - pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + /* + * Only configure the DMA Link ID for ChainDMA when this op is + * invoked with SOF_DAI_CONFIG_FLAGS_HW_PARAMS + */ + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) { + pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; + pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + } return 0; } diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 24ae1d4959be..1c6e035fd313 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -573,7 +573,7 @@ static const struct snd_sof_dsp_ops sof_mt8195_ops = { static struct snd_sof_of_mach sof_mt8195_machs[] = { { .compatible = "google,tomato", - .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682-dts.tplg" + .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682.tplg" }, { .compatible = "mediatek,mt8195", .sof_tplg_filename = "sof-mt8195.tplg" diff --git a/sound/soc/sti/sti_uniperif.c b/sound/soc/sti/sti_uniperif.c index ba824f14a39c..a7956e5a4ee5 100644 --- a/sound/soc/sti/sti_uniperif.c +++ b/sound/soc/sti/sti_uniperif.c @@ -352,7 +352,7 @@ static int sti_uniperiph_resume(struct snd_soc_component *component) return ret; } -static int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct sti_uniperiph_dai *dai_data = &priv->dai_data; diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h index 2a5de328501c..74e51f0ff85c 100644 --- a/sound/soc/sti/uniperif.h +++ b/sound/soc/sti/uniperif.h @@ -1380,6 +1380,7 @@ int uni_reader_init(struct platform_device *pdev, struct uniperif *reader); /* common */ +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai); int sti_uniperiph_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index dd9013c47664..6d1ce030963c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -1038,6 +1038,7 @@ static const struct snd_soc_dai_ops uni_player_dai_ops = { .startup = uni_player_startup, .shutdown = uni_player_shutdown, .prepare = uni_player_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_player_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 065c5f0d1f5f..05ea2b794eb9 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -401,6 +401,7 @@ static const struct snd_soc_dai_ops uni_reader_dai_ops = { .startup = uni_reader_startup, .shutdown = uni_reader_shutdown, .prepare = uni_reader_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_reader_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 74effc57a7a0..2463c22e9cf6 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -78,6 +78,7 @@ config SND_SOC_TEGRA210_DMIC config SND_SOC_TEGRA210_I2S tristate "Tegra210 I2S module" + select SND_SIMPLE_CARD_UTILS help Config to enable the Inter-IC Sound (I2S) Controller which implements full-duplex and bidirectional and single direction diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index f4437015d43a..9df49a880b75 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -286,12 +286,14 @@ static void line6_data_received(struct urb *urb) { struct usb_line6 *line6 = (struct usb_line6 *)urb->context; struct midi_buffer *mb = &line6->line6midi->midibuf_in; + unsigned long flags; int done; if (urb->status == -ESHUTDOWN) return; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_write(mb, urb->transfer_buffer, urb->actual_length); @@ -300,12 +302,15 @@ static void line6_data_received(struct urb *urb) dev_dbg(line6->ifcdev, "%d %d buffer overflow - message skipped\n", done, urb->actual_length); } + spin_unlock_irqrestore(&line6->line6midi->lock, flags); for (;;) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN, LINE6_MIDIBUF_READ_RX); + spin_unlock_irqrestore(&line6->line6midi->lock, flags); if (done <= 0) break; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c00009b545c0..f7ce8e8c3c3e 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1211,6 +1211,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 16; } break; + case USB_ID(0x1bcf, 0x2281): /* HD Webcam */ + if (!strcmp(kctl->id.name, "Mic Capture Volume")) { + usb_audio_info(chip, + "set resolution quirk: cval->res = 16\n"); + cval->res = 16; + } + break; } } diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 73abc38a5400..aaa6a515d0f8 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL), YAMAHA_DEVICE(0x105b, NULL), YAMAHA_DEVICE(0x105c, NULL), YAMAHA_DEVICE(0x105d, NULL), +YAMAHA_DEVICE(0x1718, "P-125"), { USB_DEVICE(0x0499, 0x1503), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { @@ -2594,6 +2595,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Stanton ScratchAmp */ +{ USB_DEVICE(0x103d, 0x0100) }, +{ USB_DEVICE(0x103d, 0x0101) }, + /* Novation EMS devices */ { USB_DEVICE_VENDOR_SPEC(0x1235, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 58156fbca02c..e7b68c67852e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2125,6 +2125,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ @@ -2167,6 +2169,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2281, /* HD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ @@ -2217,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/sound/usb/stream.c b/sound/usb/stream.c index d5409f387945..e14c725acebf 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -244,8 +244,8 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ - SNDRV_CHMAP_SL, /* left surround */ - SNDRV_CHMAP_SR, /* right surround */ + SNDRV_CHMAP_RL, /* left surround */ + SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a2..5fd7caea4419 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a..eaeda001784e 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661..dd4682857c12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c..82c6a4d350e0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..bf57a824f722 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 489cbed7e82a..12796808f07a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -82,7 +82,30 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, are required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + endif + endif +endif all: $(FILES) @@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin: DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) -DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif $(OUTPUT)test-dwarf.bin: @@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin: $(BUILD) -lnuma $(OUTPUT)test-libunwind.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-debug-frame.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-x86.bin: - $(BUILD) -lelf -lunwind-x86 + $(BUILD) -lelf -llzma -lunwind-x86 $(OUTPUT)test-libunwind-x86_64.bin: - $(BUILD) -lelf -lunwind-x86_64 + $(BUILD) -lelf -llzma -lunwind-x86_64 $(OUTPUT)test-libunwind-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libunwind-debug-frame-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-debug-frame-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 210c13b1b857..2a7f260ef9dc 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -19,7 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); -void bitmap_clear(unsigned long *map, unsigned int start, int len); +void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -150,4 +150,19 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_clear(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map &= ~GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0, nbits / 8); + else + __bitmap_clear(map, start, nbits); +} #endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 000000000000..7147b1b2cb28 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d02723..5bf6148cac2b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc0..637efc055145 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index c3e4871967bc..2178862bb114 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -100,3 +100,23 @@ bool __bitmap_intersects(const unsigned long *bitmap1, return true; return false; } + +void __bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index 3766886c4bca..83dc87c662b6 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime. $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a If UBSan detects any problem at runtime, it outputs a “runtime error:” message. + +4) Cross compilation +==================== +As Multiarch is commonly supported in Linux distributions, we can install +libraries for multiple architectures on the same system and then cross-compile +Linux perf. For example, Aarch64 libraries and toolchains can be installed on +an x86_64 machine, allowing us to compile perf for an Aarch64 target. + +Below is the command for building the perf with dynamic linking. + + $ cd /path/to/Linux + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +For static linking, the option `LDFLAGS="-static"` is required. + + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + LDFLAGS="-static" -C tools/perf + +In the embedded system world, a use case is to explicitly specify the package +configuration paths for cross building: + + $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \ + PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \ + make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the +variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to +the library paths for cross compilation. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c896babf7a74..fa679db61f62 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -152,7 +152,17 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 + DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) @@ -296,6 +306,11 @@ endif ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) + # Update the python flags for cross compilation + ifdef CROSS_COMPILE + PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/') + PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS)) + endif PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) @@ -897,6 +912,9 @@ else PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no") ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes) PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])') + ifdef CROSS_COMPILE + PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX)) + endif LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX) else $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 175e4c7898f0..f8148db5fc38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -193,7 +193,32 @@ HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -PKG_CONFIG = $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, is required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,) + $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.) + endif + endif +endif RM = rm -f LN = ln -f diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..7093ee21c0d1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# <number> <abi> <name> <entry point> +# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfb..5c9335fff2d3 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <internal/lib.h> +#include <inttypes.h> #include <subcmd/parse-options.h> #include <api/fd/array.h> #include <api/fs/fs.h> @@ -688,7 +689,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - daemon->start) / 60); @@ -700,7 +701,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - daemon->start) / 60); } } @@ -727,7 +728,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - session->start) / 60); @@ -745,7 +746,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - session->start) / 60); } } diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json index a9939823b14b..0c9b9a2d2958 100644 --- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -74,7 +74,7 @@ { "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", "ConfigCode": "0x800000000000000c", - "EventName": "FW_SFENCE_VMA_RECEIVED", + "EventName": "FW_SFENCE_VMA_ASID_SENT", "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" }, { diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370b..df9cdb8bbfb8 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd664..753971770733 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc/<pid>/maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc/<pid>/maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92..8bf7e8a0eb6f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947..6d075648d2cc 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index b1e6817f1e54..3946d5254a1f 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -46,6 +46,7 @@ snapshot: turbostat @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8d37acd39201..067717bce1d4 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -28,10 +28,13 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. .nf - location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP | \fBperf/<device>/<event>\fP} msrDDD is a decimal offset, eg. msr16 msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute + <device> is a perf device from /sys/bus/event_source/devices/<device> eg. cstate_core + <event> is a perf event for given device from /sys/bus/event_source/devices/<device>/events/<event> eg. c1-residency + perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} sample and print the counter for every cpu, core, or package. @@ -52,6 +55,39 @@ name as necessary to disambiguate it from others is necessary. Note that option as the column header. .fi .PP +\fB--add pmt,[attr_name=attr_value, ...]\fP add column with a PMT (Intel Platform Monitoring Technology) counter in a similar way to --add option above, but require PMT metadata to be supplied to correctly read and display the counter. The metadata can be found in the Intel PMT XML files, hosted at https://github.com/intel/Intel-PMT. For a complete example see "ADD PMT COUNTER EXAMPLE". +.nf + name="name_string" + For column header. + + type={\fBraw\fP} + 'raw' shows the counter contents in hex. + default: raw + + format={\fBraw\fP | \fBdelta\fP} + 'raw' shows the counter contents in hex. + 'delta' shows the difference in values during the measurement interval. + default: raw + + domain={\fBcpu%u\fP | \fBcore%u\fP | \fBpackage%u\fP} + 'cpu' per cpu/thread counter. + 'core' per core counter. + 'package' per package counter. + '%u' denotes id of the domain that the counter is associated with. For example core4 would mean that the counter is associated with core number 4. + + offset=\fB%u\fP + '%u' offset within the PMT MMIO region. + + lsb=\fB%u\fP + '%u' least significant bit within the 64 bit value read from 'offset'. Together with 'msb', used to form a read mask. + + msb=\fB%u\fP + '%u' most significant bit within the 64 bit value read from 'offset'. Together with 'lsb', used to form a read mask. + + guid=\fB%x\fP + '%x' hex identifier of the PMT MMIO region. +.fi +.PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. @@ -67,10 +103,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--quiet\fP Do not decode and print the system configuration header information. .PP -+\fB--no-msr\fP Disable all the uses of the MSR driver. -+.PP -+\fB--no-perf\fP Disable all the uses of the perf API. -+.PP +\fB--no-msr\fP Disable all the uses of the MSR driver. +.PP +\fB--no-perf\fP Disable all the uses of the perf API. +.PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--num_iterations num\fP number of the measurement iterations. @@ -320,7 +356,7 @@ available on all processors. Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), labeling it with the column header, "PRF_CTRL", and display it only once, -afte the conclusion of a 0.1 second sleep. +after the conclusion of a 0.1 second sleep. .nf sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 0.101604 sec @@ -333,6 +369,56 @@ CPU PRF_CTRL .fi +.SH ADD PERF COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing time spent in C1 core cstate, +labeling it with the column header, "pCPU%c1", and display it only once, +after the conclusion of 0.1 second sleep. +We also show CPU%c1 built-in counter that should show similar values. +.nf +sudo ./turbostat --quiet --cpu 0-3 --show CPU,CPU%c1 --add perf/cstate_core/c1-residency,cpu,delta,percent,pCPU%c1 sleep .1 +0.102448 sec +CPU pCPU%c1 CPU%c1 +- 34.89 34.89 +0 45.99 45.99 +1 45.94 45.94 +2 23.83 23.83 +3 23.84 23.84 + +.fi + +.SH ADD PMT COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number 0. +We add two counters, showing crystal clock count and the DC6 residency. +All the parameters passed are based on the metadata found in the PMT XML files. + +For the crystal clock count, we +label it with the column header, "XTAL", +we set the type to 'raw', to read the number of clock ticks in hex, +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +For the DC6 residency counter, we +label it with the column header, "Die%c6", +we set the type to 'txtal_time', to obtain the percent residency value +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +.nf +sudo ./turbostat --quiet --cpu 0 --show CPU --add pmt,name=XTAL,type=raw,format=delta,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102 --add pmt,name=Die%c6,type=txtal_time,format=delta,domain=package0,offset=120,lsb=0,msb=63,guid=0x1a067102 +0.104352 sec +CPU XTAL Die%c6 +- 0x0000006d4d957ca7 0.00 +0 0x0000006d4d957ca7 0.00 +0.102448 sec +.fi + .SH INPUT For interval-mode, turbostat will immediately end the current interval diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9f5d053d4bc6..089220aaa5c9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9,6 +9,30 @@ #define _GNU_SOURCE #include MSRHEADER + +// copied from arch/x86/include/asm/cpu_device_id.h +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +// end copied section + +#define X86_VENDOR_INTEL 0 + #include INTEL_FAMILY_HEADER #include BUILD_BUG_HEADER #include <stdarg.h> @@ -20,6 +44,7 @@ #include <sys/stat.h> #include <sys/select.h> #include <sys/resource.h> +#include <sys/mman.h> #include <fcntl.h> #include <signal.h> #include <sys/time.h> @@ -55,15 +80,39 @@ */ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define PERF_NAME_BYTES 128 #define MAX_NOFILE 0x8000 +#define COUNTER_KIND_PERF_PREFIX "perf/" +#define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) +#define PERF_DEV_NAME_BYTES 32 +#define PERF_EVT_NAME_BYTES 32 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; -enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; -enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; -enum cstate_source { CSTATE_SOURCE_NONE, CSTATE_SOURCE_PERF, CSTATE_SOURCE_MSR }; +enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; + +struct perf_counter_info { + struct perf_counter_info *next; + + /* How to open the counter / What counter it is. */ + char device[PERF_DEV_NAME_BYTES]; + char event[PERF_EVT_NAME_BYTES]; + + /* How to show/format the counter. */ + char name[PERF_NAME_BYTES]; + unsigned int width; + enum counter_scope scope; + enum counter_type type; + enum counter_format format; + double scale; + + /* For reading the counter. */ + int *fd_perf_per_domain; + size_t num_domains; +}; struct sysfs_path { char path[PATH_BYTES]; @@ -144,6 +193,7 @@ struct msr_counter bic[] = { { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -205,11 +255,12 @@ struct msr_counter bic[] = { #define BIC_SAM_mc6 (1ULL << 55) #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) +#define BIC_Diec6 (1ULL << 58) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) +#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -252,7 +303,6 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; -struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -267,6 +317,7 @@ unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int has_aperf; +unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; @@ -307,7 +358,6 @@ unsigned int first_counter_read = 1; int ignore_stdin; bool no_msr; bool no_perf; -enum amperf_source amperf_source; enum gfx_sysfs_idx { GFX_rc6, @@ -367,7 +417,7 @@ struct platform_features { }; struct platform_data { - unsigned int model; + unsigned int vfm; const struct platform_features *features; }; @@ -910,75 +960,75 @@ static const struct platform_features amd_features_with_rapl = { }; static const struct platform_data turbostat_pdata[] = { - { INTEL_FAM6_NEHALEM, &nhm_features }, - { INTEL_FAM6_NEHALEM_G, &nhm_features }, - { INTEL_FAM6_NEHALEM_EP, &nhm_features }, - { INTEL_FAM6_NEHALEM_EX, &nhx_features }, - { INTEL_FAM6_WESTMERE, &nhm_features }, - { INTEL_FAM6_WESTMERE_EP, &nhm_features }, - { INTEL_FAM6_WESTMERE_EX, &nhx_features }, - { INTEL_FAM6_SANDYBRIDGE, &snb_features }, - { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, - { INTEL_FAM6_IVYBRIDGE, &ivb_features }, - { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, - { INTEL_FAM6_HASWELL, &hsw_features }, - { INTEL_FAM6_HASWELL_X, &hsx_features }, - { INTEL_FAM6_HASWELL_L, &hswl_features }, - { INTEL_FAM6_HASWELL_G, &hswg_features }, - { INTEL_FAM6_BROADWELL, &bdw_features }, - { INTEL_FAM6_BROADWELL_G, &bdwg_features }, - { INTEL_FAM6_BROADWELL_X, &bdx_features }, - { INTEL_FAM6_BROADWELL_D, &bdx_features }, - { INTEL_FAM6_SKYLAKE_L, &skl_features }, - { INTEL_FAM6_SKYLAKE, &skl_features }, - { INTEL_FAM6_SKYLAKE_X, &skx_features }, - { INTEL_FAM6_KABYLAKE_L, &skl_features }, - { INTEL_FAM6_KABYLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE_L, &skl_features }, - { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_X, &icx_features }, - { INTEL_FAM6_ICELAKE_D, &icx_features }, - { INTEL_FAM6_ICELAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, - { INTEL_FAM6_ROCKETLAKE, &cnl_features }, - { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, - { INTEL_FAM6_TIGERLAKE, &cnl_features }, - { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, - { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, - { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, - { INTEL_FAM6_LAKEFIELD, &cnl_features }, - { INTEL_FAM6_ALDERLAKE, &adl_features }, - { INTEL_FAM6_ALDERLAKE_L, &adl_features }, - { INTEL_FAM6_RAPTORLAKE, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, - { INTEL_FAM6_METEORLAKE, &cnl_features }, - { INTEL_FAM6_METEORLAKE_L, &cnl_features }, - { INTEL_FAM6_ARROWLAKE_H, &arl_features }, - { INTEL_FAM6_ARROWLAKE_U, &arl_features }, - { INTEL_FAM6_ARROWLAKE, &arl_features }, - { INTEL_FAM6_LUNARLAKE_M, &arl_features }, - { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, - { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, - { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, - { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, - { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, - { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, - { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, - { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, - { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, - { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, - { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, - { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, - { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, - { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + { INTEL_NEHALEM, &nhm_features }, + { INTEL_NEHALEM_G, &nhm_features }, + { INTEL_NEHALEM_EP, &nhm_features }, + { INTEL_NEHALEM_EX, &nhx_features }, + { INTEL_WESTMERE, &nhm_features }, + { INTEL_WESTMERE_EP, &nhm_features }, + { INTEL_WESTMERE_EX, &nhx_features }, + { INTEL_SANDYBRIDGE, &snb_features }, + { INTEL_SANDYBRIDGE_X, &snx_features }, + { INTEL_IVYBRIDGE, &ivb_features }, + { INTEL_IVYBRIDGE_X, &ivx_features }, + { INTEL_HASWELL, &hsw_features }, + { INTEL_HASWELL_X, &hsx_features }, + { INTEL_HASWELL_L, &hswl_features }, + { INTEL_HASWELL_G, &hswg_features }, + { INTEL_BROADWELL, &bdw_features }, + { INTEL_BROADWELL_G, &bdwg_features }, + { INTEL_BROADWELL_X, &bdx_features }, + { INTEL_BROADWELL_D, &bdx_features }, + { INTEL_SKYLAKE_L, &skl_features }, + { INTEL_SKYLAKE, &skl_features }, + { INTEL_SKYLAKE_X, &skx_features }, + { INTEL_KABYLAKE_L, &skl_features }, + { INTEL_KABYLAKE, &skl_features }, + { INTEL_COMETLAKE, &skl_features }, + { INTEL_COMETLAKE_L, &skl_features }, + { INTEL_CANNONLAKE_L, &cnl_features }, + { INTEL_ICELAKE_X, &icx_features }, + { INTEL_ICELAKE_D, &icx_features }, + { INTEL_ICELAKE_L, &cnl_features }, + { INTEL_ICELAKE_NNPI, &cnl_features }, + { INTEL_ROCKETLAKE, &cnl_features }, + { INTEL_TIGERLAKE_L, &cnl_features }, + { INTEL_TIGERLAKE, &cnl_features }, + { INTEL_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_EMERALDRAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_LAKEFIELD, &cnl_features }, + { INTEL_ALDERLAKE, &adl_features }, + { INTEL_ALDERLAKE_L, &adl_features }, + { INTEL_RAPTORLAKE, &adl_features }, + { INTEL_RAPTORLAKE_P, &adl_features }, + { INTEL_RAPTORLAKE_S, &adl_features }, + { INTEL_METEORLAKE, &cnl_features }, + { INTEL_METEORLAKE_L, &cnl_features }, + { INTEL_ARROWLAKE_H, &arl_features }, + { INTEL_ARROWLAKE_U, &arl_features }, + { INTEL_ARROWLAKE, &arl_features }, + { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_ATOM_SILVERMONT, &slv_features }, + { INTEL_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_ATOM_AIRMONT, &amt_features }, + { INTEL_ATOM_GOLDMONT, &gmt_features }, + { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_ATOM_TREMONT, &tmt_features }, + { INTEL_ATOM_TREMONT_L, &tmt_features }, + { INTEL_ATOM_GRACEMONT, &adl_features }, + { INTEL_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_XEON_PHI_KNL, &knl_features }, + { INTEL_XEON_PHI_KNM, &knl_features }, /* * Missing support for - * INTEL_FAM6_ICELAKE - * INTEL_FAM6_ATOM_SILVERMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_NP + * INTEL_ICELAKE + * INTEL_ATOM_SILVERMONT_MID + * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, }; @@ -1003,11 +1053,11 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } - if (!genuine_intel || family != 6) + if (!genuine_intel) return; for (i = 0; turbostat_pdata[i].features; i++) { - if (turbostat_pdata[i].model == model) { + if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { platform = turbostat_pdata[i].features; return; } @@ -1034,8 +1084,13 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 +#define PMT_MAX_ADDED_THREAD_COUNTERS 24 +#define PMT_MAX_ADDED_CORE_COUNTERS 8 +#define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 #define BITMASK_SIZE 32 +#define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) + /* Indexes used to map data read from perf and MSRs into global variables */ enum rapl_rci_index { RAPL_RCI_INDEX_ENERGY_PKG = 0, @@ -1056,19 +1111,13 @@ enum rapl_unit { struct rapl_counter_info_t { unsigned long long data[NUM_RAPL_COUNTERS]; - enum rapl_source source[NUM_RAPL_COUNTERS]; + enum counter_source source[NUM_RAPL_COUNTERS]; unsigned long long flags[NUM_RAPL_COUNTERS]; double scale[NUM_RAPL_COUNTERS]; enum rapl_unit unit[NUM_RAPL_COUNTERS]; - - union { - /* Active when source == RAPL_SOURCE_MSR */ - struct { - unsigned long long msr[NUM_RAPL_COUNTERS]; - unsigned long long msr_mask[NUM_RAPL_COUNTERS]; - int msr_shift[NUM_RAPL_COUNTERS]; - }; - }; + unsigned long long msr[NUM_RAPL_COUNTERS]; + unsigned long long msr_mask[NUM_RAPL_COUNTERS]; + int msr_shift[NUM_RAPL_COUNTERS]; int fd_perf; }; @@ -1224,7 +1273,7 @@ enum ccstate_rci_index { struct cstate_counter_info_t { unsigned long long data[NUM_CSTATE_COUNTERS]; - enum cstate_source source[NUM_CSTATE_COUNTERS]; + enum counter_source source[NUM_CSTATE_COUNTERS]; unsigned long long msr[NUM_CSTATE_COUNTERS]; int fd_perf_core; int fd_perf_pkg; @@ -1361,6 +1410,167 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { }, }; +/* Indexes used to map data read from perf and MSRs into global variables */ +enum msr_rci_index { + MSR_RCI_INDEX_APERF = 0, + MSR_RCI_INDEX_MPERF = 1, + MSR_RCI_INDEX_SMI = 2, + NUM_MSR_COUNTERS, +}; + +struct msr_counter_info_t { + unsigned long long data[NUM_MSR_COUNTERS]; + enum counter_source source[NUM_MSR_COUNTERS]; + unsigned long long msr[NUM_MSR_COUNTERS]; + unsigned long long msr_mask[NUM_MSR_COUNTERS]; + int fd_perf; +}; + +struct msr_counter_info_t *msr_counter_info; +unsigned int msr_counter_info_size; + +struct msr_counter_arch_info { + const char *perf_subsys; + const char *perf_name; + unsigned long long msr; + unsigned long long msr_mask; + unsigned int rci_index; /* Maps data from perf counters to global variables */ + bool needed; + bool present; +}; + +enum msr_arch_info_index { + MSR_ARCH_INFO_APERF_INDEX = 0, + MSR_ARCH_INFO_MPERF_INDEX = 1, + MSR_ARCH_INFO_SMI_INDEX = 2, +}; + +static struct msr_counter_arch_info msr_counter_arch_infos[] = { + [MSR_ARCH_INFO_APERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "aperf", + .msr = MSR_IA32_APERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_APERF, + }, + + [MSR_ARCH_INFO_MPERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "mperf", + .msr = MSR_IA32_MPERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_MPERF, + }, + + [MSR_ARCH_INFO_SMI_INDEX] = { + .perf_subsys = "msr", + .perf_name = "smi", + .msr = MSR_SMI_COUNT, + .msr_mask = 0xFFFFFFFF, + .rci_index = MSR_RCI_INDEX_SMI, + }, +}; + +/* Can be redefined when compiling, useful for testing. */ +#ifndef SYSFS_TELEM_PATH +#define SYSFS_TELEM_PATH "/sys/class/intel_pmt" +#endif + +#define PMT_COUNTER_MTL_DC6_OFFSET 120 +#define PMT_COUNTER_MTL_DC6_LSB 0 +#define PMT_COUNTER_MTL_DC6_MSB 63 +#define PMT_MTL_DC6_GUID 0x1a067102 + +#define PMT_COUNTER_NAME_SIZE_BYTES 16 +#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 + +struct pmt_mmio { + struct pmt_mmio *next; + + unsigned int guid; + unsigned int size; + + /* Base pointer to the mmaped memory. */ + void *mmio_base; + + /* + * Offset to be applied to the mmio_base + * to get the beginning of the PMT counters for given GUID. + */ + unsigned long pmt_offset; +} *pmt_mmios; + +enum pmt_datatype { + PMT_TYPE_RAW, + PMT_TYPE_XTAL_TIME, +}; + +struct pmt_domain_info { + /* + * Pointer to the MMIO obtained by applying a counter offset + * to the mmio_base of the mmaped region for the given GUID. + * + * This is where to read the raw value of the counter from. + */ + unsigned long *pcounter; +}; + +struct pmt_counter { + struct pmt_counter *next; + + /* PMT metadata */ + char name[PMT_COUNTER_NAME_SIZE_BYTES]; + enum pmt_datatype type; + enum counter_scope scope; + unsigned int lsb; + unsigned int msb; + + /* BIC-like metadata */ + enum counter_format format; + + unsigned int num_domains; + struct pmt_domain_info *domains; +}; + +unsigned int pmt_counter_get_width(const struct pmt_counter *p) +{ + return (p->msb - p->lsb) + 1; +} + +void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) +{ + struct pmt_domain_info *new_mem; + + new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); + if (!new_mem) { + fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); + exit(1); + } + + /* Zero initialize just allocated memory. */ + const size_t num_new_domains = new_size - pcounter->num_domains; + + memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); + + pcounter->num_domains = new_size; + pcounter->domains = new_mem; +} + +void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) +{ + /* + * Allocate more memory ahead of time. + * + * Always allocate space for at least 8 elements + * and double the size when growing. + */ + if (new_size < 8) + new_size = 8; + new_size = MAX(new_size, pcounter->num_domains * 2); + + pmt_counter_resize_(pcounter, new_size); +} + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1378,6 +1588,8 @@ struct thread_data { unsigned int flags; bool is_atom; unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -1391,6 +1603,8 @@ struct core_data { unsigned int core_id; unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -1423,7 +1637,10 @@ struct pkg_data { struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned int uncore_mhz; + unsigned long long die_c6; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -1558,12 +1775,25 @@ int idx_valid(int idx) } struct sys_counters { + /* MSR added counters */ unsigned int added_thread_counters; unsigned int added_core_counters; unsigned int added_package_counters; struct msr_counter *tp; struct msr_counter *cp; struct msr_counter *pp; + + /* perf added counters */ + unsigned int added_thread_perf_counters; + unsigned int added_core_perf_counters; + unsigned int added_package_perf_counters; + struct perf_counter_info *perf_tp; + struct perf_counter_info *perf_cp; + struct perf_counter_info *perf_pp; + + struct pmt_counter *pmt_tp; + struct pmt_counter *pmt_cp; + struct pmt_counter *pmt_pp; } sys; static size_t free_msr_counters_(struct msr_counter **pp) @@ -1747,7 +1977,7 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_SMI | BIC_Mod_c6 | BIC_CoreTmp | + const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; bic_enabled &= ~bic_msrs; @@ -1823,6 +2053,23 @@ int probe_msr(int cpu, off_t offset) return 0; } +/* Convert CPU ID to domain ID for given added perf counter. */ +unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) +{ + switch (pc->scope) { + case SCOPE_CPU: + return cpu; + + case SCOPE_CORE: + return cpus[cpu].physical_core_id; + + case SCOPE_PACKAGE: + return cpus[cpu].physical_package_id; + } + + __builtin_unreachable(); +} + #define MAX_DEFERRED 16 char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; @@ -1846,9 +2093,12 @@ void help(void) "to print statistics, until interrupted.\n" " -a, --add add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" + " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " debug messages are printed to stderr\n" " -D, --Dump displays the raw counter values\n" " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" @@ -1955,6 +2205,8 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) void print_header(char *delim) { struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; int printed = 0; if (DO_BIC(BIC_USEC)) @@ -2012,6 +2264,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_tp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_tp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c3)) @@ -2052,6 +2338,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_cp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_cp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2096,6 +2416,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_Diec6)) + outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_LPI)) outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); if (DO_BIC(BIC_SYS_LPI)) @@ -2147,6 +2469,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_pp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_pp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + outp += sprintf(outp, "\n"); } @@ -2267,6 +2623,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *fmt8; int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; char *delim = "\t"; int printed = 0; @@ -2404,6 +2762,51 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + /* Added perf counters */ + for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + if (pp->type == COUNTER_USEC) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->perf_counter[i] / interval_float / 10000); + else + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + /* C1 */ if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); @@ -2447,6 +2850,44 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) @@ -2526,6 +2967,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + if (DO_BIC(BIC_Diec6)) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) outp += @@ -2601,6 +3046,47 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); + } else if (pp->type == COUNTER_K2M) { + outp += + sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + } + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -2654,6 +3140,8 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -2674,6 +3162,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->die_c6 = new->die_c6 - old->die_c6; old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; @@ -2714,6 +3203,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->counter[i] = new->counter[i] - old->counter[i]; } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else if (pp->format == FORMAT_AVERAGE) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2721,6 +3226,8 @@ void delta_core(struct core_data *new, struct core_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; @@ -2737,6 +3244,20 @@ void delta_core(struct core_data *new, struct core_data *old) else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } } int soft_c1_residency_display(int bic) @@ -2754,6 +3275,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* we run cpuid just the 1st time, copy the results */ if (DO_BIC(BIC_APIC)) @@ -2832,6 +3355,21 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2908,6 +3446,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->die_c6 = 0; p->cpu_lpi = 0; p->sys_lpi = 0; @@ -2934,6 +3473,14 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) p->counter[i] = 0; + + memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); + memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); + memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); + + memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); + memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); + memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); } void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) @@ -2954,6 +3501,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* copy un-changing apic_id's */ if (DO_BIC(BIC_APIC)) @@ -2984,6 +3533,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.counter[i] += t->counter[i]; } + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.threads.perf_counter[i] += t->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] += t->pmt_counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3004,6 +3563,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.counter[i] += c->counter[i]; } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.cores.perf_counter[i] += c->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] += c->pmt_counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!is_cpu_first_core_in_package(t, c, p)) return 0; @@ -3027,6 +3596,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.die_c6 += p->die_c6; average.packages.cpu_lpi = p->cpu_lpi; average.packages.sys_lpi = p->sys_lpi; @@ -3055,6 +3625,18 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) else average.packages.counter[i] += p->counter[i]; } + + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.perf_counter[i] = p->perf_counter[i]; + else + average.packages.perf_counter[i] += p->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] += p->pmt_counter[i]; + } + return 0; } @@ -3066,6 +3648,8 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; clear_counters(&average.threads, &average.cores, &average.packages); @@ -3108,6 +3692,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data average.packages.pc8 /= topo.allowed_packages; average.packages.pc9 /= topo.allowed_packages; average.packages.pc10 /= topo.allowed_packages; + average.packages.die_c6 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3137,6 +3722,45 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data } average.packages.counter[i] /= topo.allowed_packages; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.threads.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } + average.threads.perf_counter[i] /= topo.allowed_cpus; + } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.cores.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.cores.perf_counter[i] /= topo.allowed_cores; + } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.packages.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.packages.perf_counter[i] /= topo.allowed_packages; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] /= topo.allowed_cpus; + } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] /= topo.allowed_cores; + } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] /= topo.allowed_packages; + } } static unsigned long long rdtsc(void) @@ -3382,30 +4006,6 @@ static unsigned int read_perf_counter_info_n(const char *const path, const char return v; } -static unsigned int read_msr_type(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/type"; - const char *const format = "%u"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_aperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_mperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - static unsigned int read_perf_type(const char *subsys) { const char *const path_format = "/sys/bus/event_source/devices/%s/type"; @@ -3417,15 +4017,55 @@ static unsigned int read_perf_type(const char *subsys) return read_perf_counter_info_n(path, format); } -static unsigned int read_rapl_config(const char *subsys, const char *event_name) +static unsigned int read_perf_config(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; - const char *const format = "event=%x"; + FILE *fconfig = NULL; char path[128]; + char config_str[64]; + unsigned int config; + unsigned int umask; + bool has_config = false; + bool has_umask = false; + unsigned int ret = -1; snprintf(path, sizeof(path), path_format, subsys, event_name); - return read_perf_counter_info_n(path, format); + fconfig = fopen(path, "r"); + if (!fconfig) + return -1; + + if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) + goto cleanup_and_exit; + + for (char *pconfig_str = &config_str[0]; pconfig_str;) { + if (sscanf(pconfig_str, "event=%x", &config) == 1) { + has_config = true; + goto next; + } + + if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { + has_umask = true; + goto next; + } + +next: + pconfig_str = strchr(pconfig_str, ','); + if (pconfig_str) { + *pconfig_str = '\0'; + ++pconfig_str; + } + } + + if (!has_umask) + umask = 0; + + if (has_config) + ret = (umask << 8) | config; + +cleanup_and_exit: + fclose(fconfig); + return ret; } static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) @@ -3444,7 +4084,7 @@ static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_na return RAPL_UNIT_INVALID; } -static double read_perf_rapl_scale(const char *subsys, const char *event_name) +static double read_perf_scale(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; const char *const format = "%lf"; @@ -3459,130 +4099,12 @@ static double read_perf_rapl_scale(const char *subsys, const char *event_name) return scale; } -static struct amperf_group_fd open_amperf_fd(int cpu) -{ - const unsigned int msr_type = read_msr_type(); - const unsigned int aperf_config = read_aperf_config(); - const unsigned int mperf_config = read_mperf_config(); - struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; - - fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); - fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); - - return fds; -} - -static int get_amperf_fd(int cpu) -{ - assert(fd_amperf_percpu); - - if (fd_amperf_percpu[cpu].aperf) - return fd_amperf_percpu[cpu].aperf; - - fd_amperf_percpu[cpu] = open_amperf_fd(cpu); - - return fd_amperf_percpu[cpu].aperf; -} - -/* Read APERF, MPERF and TSC using the perf API. */ -static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) -{ - union { - struct { - unsigned long nr_entries; - unsigned long aperf; - unsigned long mperf; - }; - - unsigned long as_array[3]; - } cnt; - - const int fd_amperf = get_amperf_fd(cpu); - - /* - * Read the TSC with rdtsc, because we want the absolute value and not - * the offset from the start of the counter. - */ - t->tsc = rdtsc(); - - const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); - - if (n != sizeof(cnt.as_array)) - return -2; - - t->aperf = cnt.aperf * aperf_mperf_multiplier; - t->mperf = cnt.mperf * aperf_mperf_multiplier; - - return 0; -} - -/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ -static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) -{ - unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; - int aperf_mperf_retry_count = 0; - - /* - * The TSC, APERF and MPERF must be read together for - * APERF/MPERF and MPERF/TSC to give accurate results. - * - * Unfortunately, APERF and MPERF are read by - * individual system call, so delays may occur - * between them. If the time to read them - * varies by a large amount, we re-read them. - */ - - /* - * This initial dummy APERF read has been seen to - * reduce jitter in the subsequent reads. - */ - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - -retry: - t->tsc = rdtsc(); /* re-read close to APERF */ - - tsc_before = t->tsc; - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - tsc_between = rdtsc(); - - if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) - return -4; - - tsc_after = rdtsc(); - - aperf_time = tsc_between - tsc_before; - mperf_time = tsc_after - tsc_between; - - /* - * If the system call latency to read APERF and MPERF - * differ by more than 2x, then try again. - */ - if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { - aperf_mperf_retry_count++; - if (aperf_mperf_retry_count < 5) - goto retry; - else - warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); - } - aperf_mperf_retry_count = 0; - - t->aperf = t->aperf * aperf_mperf_multiplier; - t->mperf = t->mperf * aperf_mperf_multiplier; - - return 0; -} - size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) { size_t ret = 0; for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) - if (rci->source[i] == RAPL_SOURCE_PERF) + if (rci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3593,7 +4115,7 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t size_t ret = 0; for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) - if (cci->source[i] == CSTATE_SOURCE_PERF) + if (cci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3611,7 +4133,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); assert(rapl_counter_info_perdomain); @@ -3634,14 +4156,14 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { - case RAPL_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case RAPL_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(rci->fd_perf != -1); - if (debug) + if (debug >= 2) fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); @@ -3650,8 +4172,8 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct ++pi; break; - case RAPL_SOURCE_MSR: - if (debug) + case COUNTER_SOURCE_MSR: + if (debug >= 2) fprintf(stderr, "Reading rapl counter via msr at %u\n", i); assert(!no_msr); @@ -3709,15 +4231,15 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat struct cstate_counter_info_t *cci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d\n", __func__, cpu); assert(ccstate_counter_info); assert(cpu <= ccstate_counter_info_size); - memset(perf_data, 0, sizeof(perf_data)); - memset(perf_data_core, 0, sizeof(perf_data_core)); - memset(perf_data_pkg, 0, sizeof(perf_data_pkg)); + ZERO_ARRAY(perf_data); + ZERO_ARRAY(perf_data_core); + ZERO_ARRAY(perf_data_pkg); cci = &ccstate_counter_info[cpu]; @@ -3772,30 +4294,28 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { switch (cci->source[i]) { - case CSTATE_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case CSTATE_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); - } cci->data[i] = perf_data[pi]; ++pi; break; - case CSTATE_SOURCE_MSR: + case COUNTER_SOURCE_MSR: assert(!no_msr); if (get_msr(cpu, cci->msr[i], &cci->data[i])) return -13 - i; - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); - } break; } @@ -3809,7 +4329,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat * when invoked for the thread sibling. */ #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ - if (cci->source[index] != CSTATE_SOURCE_NONE) \ + if (cci->source[index] != COUNTER_SOURCE_NONE) \ out_counter = cci->data[index]; \ } while (0) @@ -3833,6 +4353,135 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat return 0; } +size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) +{ + size_t ret = 0; + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) + if (mci->source[i] == COUNTER_SOURCE_PERF) + ++ret; + + return ret; +} + +int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) +{ + unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; + + struct msr_counter_info_t *mci; + + if (debug >= 2) + fprintf(stderr, "%s: cpu%d\n", __func__, cpu); + + assert(msr_counter_info); + assert(cpu <= msr_counter_info_size); + + mci = &msr_counter_info[cpu]; + + ZERO_ARRAY(perf_data); + ZERO_ARRAY(mci->data); + + if (mci->fd_perf != -1) { + const size_t num_perf_counters = msr_counter_info_count_perf(mci); + const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); + const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); + + if (actual_read_size != expected_read_size) + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, + actual_read_size); + } + + for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { + switch (mci->source[i]) { + case COUNTER_SOURCE_NONE: + break; + + case COUNTER_SOURCE_PERF: + assert(pi < ARRAY_SIZE(perf_data)); + assert(mci->fd_perf != -1); + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); + + mci->data[i] = perf_data[pi]; + + ++pi; + break; + + case COUNTER_SOURCE_MSR: + assert(!no_msr); + + if (get_msr(cpu, mci->msr[i], &mci->data[i])) + return -2 - i; + + mci->data[i] &= mci->msr_mask[i]; + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); + + break; + } + } + + BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); + t->aperf = mci->data[MSR_RCI_INDEX_APERF]; + t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; + t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; + + return 0; +} + +int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) +{ + unsigned int domain; + unsigned long long value; + int fd_counter; + + for (size_t i = 0; pp; ++i, pp = pp->next) { + domain = cpu_to_domain(pp, cpu); + assert(domain < pp->num_domains); + + fd_counter = pp->fd_perf_per_domain[domain]; + + if (fd_counter == -1) + continue; + + if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) + return 1; + + assert(i < out_size); + out[i] = value * pp->scale; + } + + return 0; +} + +unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) +{ + unsigned long mask; + + if (msb == 63) + mask = 0xffffffffffffffff; + else + mask = ((1 << (msb + 1)) - 1); + + mask -= (1 << lsb) - 1; + + return mask; +} + +unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) +{ + assert(domain_id < ppmt->num_domains); + + const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; + const unsigned long value = pmmio ? *pmmio : 0; + const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); + const unsigned long value_shift = ppmt->lsb; + + return (value & value_mask) >> value_shift; +} + /* * get_counters(...) * migrate to cpu @@ -3843,6 +4492,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; struct msr_counter *mp; + struct pmt_counter *pp; int i; int status; @@ -3858,24 +4508,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) - || soft_c1_residency_display(BIC_Avg_MHz)) { - int status = -1; - - assert(!no_perf || !no_msr); - - switch (amperf_source) { - case AMPERF_SOURCE_PERF: - status = read_aperf_mperf_tsc_perf(t, cpu); - break; - case AMPERF_SOURCE_MSR: - status = read_aperf_mperf_tsc_msr(t, cpu); - break; - } - - if (status != 0) - return status; - } + get_smi_aperf_mperf(cpu, t); if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) @@ -3883,11 +4516,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; - if (DO_BIC(BIC_SMI)) { - if (get_msr(cpu, MSR_SMI_COUNT, &msr)) - return -5; - t->smi_count = msr & 0xFFFFFFFF; - } get_cstate_counters(cpu, t, c, p); @@ -3896,6 +4524,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) + t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); + /* collect core counters only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) goto done; @@ -3934,6 +4568,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) + c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); + /* collect package counters only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) goto done; @@ -4006,6 +4646,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_mp(cpu, mp, &p->counter[i], path)) return -10; } + + if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) + p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); + done: gettimeofday(&t->tv_end, (struct timezone *)NULL); @@ -4469,25 +5116,6 @@ void free_fd_percpu(void) fd_percpu = NULL; } -void free_fd_amperf_percpu(void) -{ - int i; - - if (!fd_amperf_percpu) - return; - - for (i = 0; i < topo.max_cpu_num + 1; ++i) { - if (fd_amperf_percpu[i].mperf != 0) - close(fd_amperf_percpu[i].mperf); - - if (fd_amperf_percpu[i].aperf != 0) - close(fd_amperf_percpu[i].aperf); - } - - free(fd_amperf_percpu); - fd_amperf_percpu = NULL; -} - void free_fd_instr_count_percpu(void) { if (!fd_instr_count_percpu) @@ -4522,6 +5150,21 @@ void free_fd_cstate(void) ccstate_counter_info_size = 0; } +void free_fd_msr(void) +{ + if (!msr_counter_info) + return; + + for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { + if (msr_counter_info[cpu].fd_perf != -1) + close(msr_counter_info[cpu].fd_perf); + } + + free(msr_counter_info); + msr_counter_info = NULL; + msr_counter_info_size = 0; +} + void free_fd_rapl_percpu(void) { if (!rapl_counter_info_perdomain) @@ -4539,6 +5182,36 @@ void free_fd_rapl_percpu(void) rapl_counter_info_perdomain_size = 0; } +void free_fd_added_perf_counters_(struct perf_counter_info *pp) +{ + if (!pp) + return; + + if (!pp->fd_perf_per_domain) + return; + + while (pp) { + for (size_t domain = 0; domain < pp->num_domains; ++domain) { + if (pp->fd_perf_per_domain[domain] != -1) { + close(pp->fd_perf_per_domain[domain]); + pp->fd_perf_per_domain[domain] = -1; + } + } + + free(pp->fd_perf_per_domain); + pp->fd_perf_per_domain = NULL; + + pp = pp->next; + } +} + +void free_fd_added_perf_counters(void) +{ + free_fd_added_perf_counters_(sys.perf_tp); + free_fd_added_perf_counters_(sys.perf_cp); + free_fd_added_perf_counters_(sys.perf_pp); +} + void free_all_buffers(void) { int i; @@ -4581,9 +5254,10 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); - free_fd_amperf_percpu(); + free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); + free_fd_added_perf_counters(); free(irq_column_2_cpu); free(irqs_per_cpu); @@ -4918,16 +5592,22 @@ static void update_effective_set(bool startup) } void linux_perf_init(void); +void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void added_perf_counters_init(void); +void pmt_init(void); void re_initialize(void) { free_all_buffers(); setup_all_buffers(false); linux_perf_init(); + msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } @@ -6779,22 +7459,13 @@ static int has_instr_count_access(void) return has_access; } -bool is_aperf_access_required(void) -{ - return BIC_IS_ENABLED(BIC_Avg_MHz) - || BIC_IS_ENABLED(BIC_Busy) - || BIC_IS_ENABLED(BIC_Bzy_MHz) - || BIC_IS_ENABLED(BIC_IPC) - || BIC_IS_ENABLED(BIC_CPU_c1); -} - int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { if (no_perf) return -1; - const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); + const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); if (scale == 0.0) return -1; @@ -6805,7 +7476,7 @@ int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struc return -1; const unsigned int rapl_type = read_perf_type(cai->perf_subsys); - const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); @@ -6826,7 +7497,7 @@ int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct { int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; @@ -6846,14 +7517,6 @@ void linux_perf_init(void) if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } - - const bool aperf_required = is_aperf_access_required(); - - if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { - fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); - if (fd_amperf_percpu == NULL) - err(-1, "calloc fd_amperf_percpu"); - } } void rapl_perf_init(void) @@ -6875,7 +7538,7 @@ void rapl_perf_init(void) rci->fd_perf = -1; for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { rci->data[i] = 0; - rci->source[i] = RAPL_SOURCE_NONE; + rci->source[i] = COUNTER_SOURCE_NONE; } } @@ -6917,14 +7580,14 @@ void rapl_perf_init(void) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { - rci->source[cai->rci_index] = RAPL_SOURCE_PERF; + rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; rci->scale[cai->rci_index] = scale * cai->compat_scale; rci->unit[cai->rci_index] = unit; rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { - rci->source[cai->rci_index] = RAPL_SOURCE_MSR; + rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; rci->msr_shift[cai->rci_index] = cai->msr_shift; @@ -6934,7 +7597,7 @@ void rapl_perf_init(void) } } - if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) + if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) has_counter = 1; } @@ -6946,75 +7609,11 @@ void rapl_perf_init(void) free(domain_visited); } -static int has_amperf_access_via_msr(void) -{ - if (no_msr) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_APERF)) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_MPERF)) - return 0; - - return 1; -} - -static int has_amperf_access_via_perf(void) -{ - struct amperf_group_fd fds; - - /* - * Cache the last result, so we don't warn the user multiple times - * - * Negative means cached, no access - * Zero means not cached - * Positive means cached, has access - */ - static int has_access_cached; - - if (no_perf) - return 0; - - if (has_access_cached != 0) - return has_access_cached > 0; - - fds = open_amperf_fd(base_cpu); - has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); - - if (fds.aperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "APERF perf counter", "--no-perf"); - else - close(fds.aperf); - - if (fds.mperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "MPERF perf counter", "--no-perf"); - else - close(fds.mperf); - - if (has_access_cached == 0) - has_access_cached = -1; - - return has_access_cached > 0; -} - -/* Check if we can access APERF and MPERF */ +/* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - if (!is_aperf_access_required()) - return 0; - - if (!no_msr && has_amperf_access_via_msr()) - return 1; - - if (!no_perf && has_amperf_access_via_perf()) - return 1; - - return 0; + return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) @@ -7039,7 +7638,7 @@ int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const s return -1; const unsigned int type = read_perf_type(cai->perf_subsys); - const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); @@ -7057,12 +7656,120 @@ int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const st { int ret = add_cstate_perf_counter_(cpu, cci, cai); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; } +int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + if (no_perf) + return -1; + + const unsigned int type = read_perf_type(cai->perf_subsys); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); + + const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); + + if (fd_counter == -1) + return -1; + + /* If it's the first counter opened, make it a group descriptor */ + if (cci->fd_perf == -1) + cci->fd_perf = fd_counter; + + return fd_counter; +} + +int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + int ret = add_msr_perf_counter_(cpu, cci, cai); + + if (debug) + fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); + + return ret; +} + +void msr_perf_init_(void) +{ + const int mci_num = topo.max_cpu_num + 1; + + msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); + if (!msr_counter_info) + err(1, "calloc msr_counter_info"); + msr_counter_info_size = mci_num; + + for (int cpu = 0; cpu < mci_num; ++cpu) + msr_counter_info[cpu].fd_perf = -1; + + for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { + + struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; + + cai->present = false; + + for (int cpu = 0; cpu < mci_num; ++cpu) { + + struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; + + if (cpu_is_not_allowed(cpu)) + continue; + + if (cai->needed) { + /* Use perf API for this counter */ + if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; + cai->present = true; + + /* User MSR for this counter */ + } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; + cci->msr[cai->rci_index] = cai->msr; + cci->msr_mask[cai->rci_index] = cai->msr_mask; + cai->present = true; + } + } + } + } +} + +/* Initialize data for reading perf counters from the MSR group. */ +void msr_perf_init(void) +{ + bool need_amperf = false, need_smi = false; + const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); + + need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) + || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; + + if (BIC_IS_ENABLED(BIC_SMI)) + need_smi = true; + + /* Enable needed counters */ + msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; + + msr_perf_init_(); + + const bool has_amperf = has_amperf_access(); + const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; + + has_aperf_access = has_amperf; + + if (has_amperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + BIC_PRESENT(BIC_SMI); + } + + if (has_smi) + BIC_PRESENT(BIC_SMI); +} + void cstate_perf_init_(bool soft_c1) { bool has_counter; @@ -7127,17 +7834,17 @@ void cstate_perf_init_(bool soft_c1) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { - cci->source[cai->rci_index] = CSTATE_SOURCE_PERF; + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit && probe_msr(cpu, cai->msr) == 0) { - cci->source[cai->rci_index] = CSTATE_SOURCE_MSR; + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } } - if (cci->source[cai->rci_index] != CSTATE_SOURCE_NONE) { + if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { has_counter = true; cores_visited[core_id] = true; pkg_visited[pkg_id] = true; @@ -7320,12 +8027,6 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); - if (has_aperf && has_amperf_access()) { - BIC_PRESENT(BIC_Avg_MHz); - BIC_PRESENT(BIC_Busy); - BIC_PRESENT(BIC_Bzy_MHz); - BIC_PRESENT(BIC_IPC); - } do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -7442,6 +8143,11 @@ static void counter_info_init(void) if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; } + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { + msr_counter_arch_infos[i].present = false; + msr_counter_arch_infos[i].needed = false; + } } void probe_pm_features(void) @@ -7817,100 +8523,446 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } -static void set_amperf_source(void) +bool has_added_counters(void) { - amperf_source = AMPERF_SOURCE_PERF; + /* + * It only makes sense to call this after the command line is parsed, + * otherwise sys structure is not populated. + */ - const bool aperf_required = is_aperf_access_required(); + return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; +} - if (no_perf || !aperf_required || !has_amperf_access_via_perf()) - amperf_source = AMPERF_SOURCE_MSR; +void check_msr_access(void) +{ + check_dev_msr(); + check_msr_permission(); - if (quiet || !debug) - return; + if (no_msr) + bic_disable_msr_access(); +} - fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); +void check_perf_access(void) +{ + if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) + bic_enabled &= ~BIC_IPC; } -bool has_added_counters(void) +int added_perf_counters_init_(struct perf_counter_info *pinfo) +{ + size_t num_domains = 0; + unsigned int next_domain; + bool *domain_visited; + unsigned int perf_type, perf_config; + double perf_scale; + int fd_perf; + + if (!pinfo) + return 0; + + const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); + + domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); + + while (pinfo) { + switch (pinfo->scope) { + case SCOPE_CPU: + num_domains = topo.max_cpu_num + 1; + break; + + case SCOPE_CORE: + num_domains = topo.max_core_id + 1; + break; + + case SCOPE_PACKAGE: + num_domains = topo.max_package_id + 1; + break; + } + + /* Allocate buffer for file descriptor for each domain. */ + pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); + if (!pinfo->fd_perf_per_domain) + errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); + + for (size_t i = 0; i < num_domains; ++i) + pinfo->fd_perf_per_domain[i] = -1; + + pinfo->num_domains = num_domains; + pinfo->scale = 1.0; + + memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); + + for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { + + next_domain = cpu_to_domain(pinfo, cpu); + + assert(next_domain < num_domains); + + if (cpu_is_not_allowed(cpu)) + continue; + + if (domain_visited[next_domain]) + continue; + + perf_type = read_perf_type(pinfo->device); + if (perf_type == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "type"); + continue; + } + + perf_config = read_perf_config(pinfo->device, pinfo->event); + if (perf_config == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "config"); + continue; + } + + /* Scale is not required, some counters just don't have it. */ + perf_scale = read_perf_scale(pinfo->device, pinfo->event); + if (perf_scale == 0.0) + perf_scale = 1.0; + + fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); + if (fd_perf == -1) { + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", + __func__, pinfo->device, pinfo->event, cpu); + continue; + } + + domain_visited[next_domain] = 1; + pinfo->fd_perf_per_domain[next_domain] = fd_perf; + pinfo->scale = perf_scale; + + if (debug) + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", + pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + } + + pinfo = pinfo->next; + } + + free(domain_visited); + + return 0; +} + +void added_perf_counters_init(void) +{ + if (added_perf_counters_init_(sys.perf_tp)) + errx(1, "%s: %s", __func__, "thread"); + + if (added_perf_counters_init_(sys.perf_cp)) + errx(1, "%s: %s", __func__, "core"); + + if (added_perf_counters_init_(sys.perf_pp)) + errx(1, "%s: %s", __func__, "package"); +} + +int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) +{ + int fd_telem_info; + FILE *file_telem_info; + unsigned long value; + + fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); + if (fd_telem_info == -1) + return -1; + + file_telem_info = fdopen(fd_telem_info, "r"); + if (file_telem_info == NULL) { + close(fd_telem_info); + return -1; + } + + if (fscanf(file_telem_info, format, &value) != 1) { + fclose(file_telem_info); + return -1; + } + + fclose(file_telem_info); + + *output = value; + + return 0; +} + +struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) { + DIR *dirp; + struct dirent *entry; + struct stat st; + unsigned int telem_idx; + int fd_telem_dir, fd_pmt; + unsigned long guid, size, offset; + size_t mmap_size; + void *mmio; + struct pmt_mmio *ret = NULL; + + if (stat(SYSFS_TELEM_PATH, &st) == -1) + return NULL; + + dirp = opendir(SYSFS_TELEM_PATH); + if (dirp == NULL) + return NULL; + + for (;;) { + entry = readdir(dirp); + + if (entry == NULL) + break; + + if (strcmp(entry->d_name, ".") == 0) + continue; + + if (strcmp(entry->d_name, "..") == 0) + continue; + + if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) + continue; + + if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + break; + } + + if (!S_ISDIR(st.st_mode)) + continue; + + fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) { + break; + } + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + close(fd_telem_dir); + break; + } + + if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { + close(fd_telem_dir); + break; + } + + if (guid != target_guid) { + close(fd_telem_dir); + continue; + } + + if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { + close(fd_telem_dir); + break; + } + + assert(offset == 0); + + fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); + if (fd_pmt == -1) + goto loop_cleanup_and_break; + + mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); + if (mmio != MAP_FAILED) { + + if (debug) + fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); + + ret = calloc(1, sizeof(*ret)); + + if (!ret) { + fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); + exit(1); + } + + ret->guid = guid; + ret->mmio_base = mmio; + ret->pmt_offset = offset; + ret->size = size; + + ret->next = pmt_mmios; + pmt_mmios = ret; + } + +loop_cleanup_and_break: + close(fd_pmt); + close(fd_telem_dir); + break; + } + + closedir(dirp); + + return ret; +} + +struct pmt_mmio *pmt_mmio_find(unsigned int guid) +{ + struct pmt_mmio *pmmio = pmt_mmios; + + while (pmmio) { + if (pmmio->guid == guid) + return pmmio; + + pmmio = pmmio->next; + } + + return NULL; +} + +void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) +{ + char *ret; + + /* Get base of mmaped PMT file. */ + ret = (char *)pmmio->mmio_base; + /* - * It only makes sense to call this after the command line is parsed, - * otherwise sys structure is not populated. + * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. + * It's not guaranteed that the mmaped memory begins with the telemetry data + * - we might have to apply the offset first. */ + ret += pmmio->pmt_offset; - return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; + /* Apply the counter offset to get the address to the mmaped counter. */ + ret += counter_offset; + + return ret; } -bool is_msr_access_required(void) +struct pmt_mmio *pmt_add_guid(unsigned int guid) { - if (no_msr) - return false; - - if (has_added_counters()) - return true; - - return BIC_IS_ENABLED(BIC_SMI) - || BIC_IS_ENABLED(BIC_CPU_c1) - || BIC_IS_ENABLED(BIC_CPU_c3) - || BIC_IS_ENABLED(BIC_CPU_c6) - || BIC_IS_ENABLED(BIC_CPU_c7) - || BIC_IS_ENABLED(BIC_Mod_c6) - || BIC_IS_ENABLED(BIC_CoreTmp) - || BIC_IS_ENABLED(BIC_Totl_c0) - || BIC_IS_ENABLED(BIC_Any_c0) - || BIC_IS_ENABLED(BIC_GFX_c0) - || BIC_IS_ENABLED(BIC_CPUGFX) - || BIC_IS_ENABLED(BIC_Pkgpc3) - || BIC_IS_ENABLED(BIC_Pkgpc6) - || BIC_IS_ENABLED(BIC_Pkgpc2) - || BIC_IS_ENABLED(BIC_Pkgpc7) - || BIC_IS_ENABLED(BIC_Pkgpc8) - || BIC_IS_ENABLED(BIC_Pkgpc9) - || BIC_IS_ENABLED(BIC_Pkgpc10) - /* TODO: Multiplex access with perf */ - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_PkgWatt) - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_GFXWatt) - || BIC_IS_ENABLED(BIC_RAMWatt) - || BIC_IS_ENABLED(BIC_Pkg_J) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_GFX_J) - || BIC_IS_ENABLED(BIC_RAM_J) - || BIC_IS_ENABLED(BIC_PKG__) - || BIC_IS_ENABLED(BIC_RAM__) - || BIC_IS_ENABLED(BIC_PkgTmp) - || (is_aperf_access_required() && !has_amperf_access_via_perf()); + struct pmt_mmio *ret; + + ret = pmt_mmio_find(guid); + if (!ret) + ret = pmt_mmio_open(guid); + + return ret; } -void check_msr_access(void) +enum pmt_open_mode { + PMT_OPEN_TRY, /* Open failure is not an error. */ + PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ +}; + +struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) { - if (!is_msr_access_required()) - no_msr = 1; + while (pcounter) { + if (strcmp(pcounter->name, name) == 0) + break; - check_dev_msr(); - check_msr_permission(); + pcounter = pcounter->next; + } - if (no_msr) - bic_disable_msr_access(); + return pcounter; } -void check_perf_access(void) +struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) { - const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); + switch (scope) { + case SCOPE_CPU: + return &sys.pmt_tp; + case SCOPE_CORE: + return &sys.pmt_cp; + case SCOPE_PACKAGE: + return &sys.pmt_pp; + } - if (no_perf || !intrcount_required || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + __builtin_unreachable(); +} - const bool aperf_required = is_aperf_access_required(); +void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) +{ + /* Make sure the new domain fits. */ + if (domain_id >= pcounter->num_domains) + pmt_counter_resize(pcounter, domain_id + 1); - if (!aperf_required || !has_amperf_access()) { - bic_enabled &= ~BIC_Avg_MHz; - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; - bic_enabled &= ~BIC_IPC; + assert(pcounter->domains); + assert(domain_id < pcounter->num_domains); + + pcounter->domains[domain_id].pcounter = pmmio; +} + +int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, + unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, + enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) +{ + struct pmt_mmio *mmio; + struct pmt_counter *pcounter; + struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); + bool new_counter = false; + int conflict = 0; + + if (lsb > msb) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); + exit(1); + } + + if (msb >= 64) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); + exit(1); + } + + mmio = pmt_add_guid(guid); + if (!mmio) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + exit(1); + } + + return 1; + } + + if (offset >= mmio->size) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); + exit(1); + } + + return 1; + } + + pcounter = pmt_find_counter(*pmt_root, name); + if (!pcounter) { + pcounter = calloc(1, sizeof(*pcounter)); + new_counter = true; + } + + if (new_counter) { + strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); + pcounter->type = type; + pcounter->scope = scope; + pcounter->lsb = lsb; + pcounter->msb = msb; + pcounter->format = format; + } else { + conflict += pcounter->type != type; + conflict += pcounter->scope != scope; + conflict += pcounter->lsb != lsb; + conflict += pcounter->msb != msb; + conflict += pcounter->format != format; + } + + if (conflict) { + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", + __func__, name); + exit(1); + } + + pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); + + if (new_counter) { + pcounter->next = *pmt_root; + *pmt_root = pcounter; + } + + return 0; +} + +void pmt_init(void) +{ + if (BIC_IS_ENABLED(BIC_Diec6)) { + pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, + PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, + 0, PMT_OPEN_TRY); } } @@ -7923,16 +8975,18 @@ void turbostat_init() process_cpuid(); counter_info_init(); probe_pm_features(); - set_amperf_source(); + msr_perf_init(); linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (DO_BIC(BIC_IPC)) - (void)get_instr_count_fd(base_cpu); + if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) + BIC_PRESENT(BIC_IPC); /* * If TSC tweak is needed, but couldn't get it, @@ -8017,7 +9071,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.05.10 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2024.07.26 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -8049,7 +9103,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) - printf("%s: %s %s\n", __func__, name, mp->name); + fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); if (!strncmp(name, mp->name, strlen(mp->name))) return mp; } @@ -8066,8 +9120,8 @@ int add_counter(unsigned int msr_num, char *path, char *name, errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); if (debug) - printf("%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", __func__, msr_num, - path, name, width, scope, type, format, flags, id); + fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", + __func__, msr_num, path, name, width, scope, type, format, flags, id); switch (scope) { @@ -8075,7 +9129,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.tp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { @@ -8087,7 +9141,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.cp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { @@ -8099,7 +9153,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.pp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { @@ -8116,6 +9170,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = calloc(1, sizeof(struct msr_counter)); if (msrp == NULL) err(-1, "calloc msr_counter"); + msrp->msr_num = msr_num; strncpy(msrp->name, name, NAME_BYTES - 1); msrp->width = width; @@ -8156,11 +9211,106 @@ int add_counter(unsigned int msr_num, char *path, char *name, return 0; } -void parse_add_command(char *add_command) +/* + * Initialize the fields used for identifying and opening the counter. + * + * Defer the initialization of any runtime buffers for actually reading + * the counters for when we initialize all perf counters, so we can later + * easily call re_initialize(). + */ +struct perf_counter_info *make_perf_counter_info(const char *perf_device, + const char *perf_event, + const char *name, + unsigned int width, + enum counter_scope scope, + enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + pinfo = calloc(1, sizeof(*pinfo)); + if (!pinfo) + errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); + + strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); + strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); + + strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); + pinfo->width = width; + pinfo->scope = scope; + pinfo->type = type; + pinfo->format = format; + + return pinfo; +} + +int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, + enum counter_scope scope, enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + switch (scope) { + case SCOPE_CPU: + if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { + warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_CORE: + if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { + warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_PACKAGE: + if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { + warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + } + + pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); + + if (!pinfo) + return -1; + + switch (scope) { + case SCOPE_CPU: + pinfo->next = sys.perf_tp; + sys.perf_tp = pinfo; + ++sys.added_thread_perf_counters; + break; + + case SCOPE_CORE: + pinfo->next = sys.perf_cp; + sys.perf_cp = pinfo; + ++sys.added_core_perf_counters; + break; + + case SCOPE_PACKAGE: + pinfo->next = sys.perf_pp; + sys.perf_pp = pinfo; + ++sys.added_package_perf_counters; + break; + } + + // FIXME: we might not have debug here yet + if (debug) + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", + __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + + return 0; +} + +void parse_add_command_msr(char *add_command) { int msr_num = 0; char *path = NULL; - char name_buffer[NAME_BYTES] = ""; + char perf_device[PERF_DEV_NAME_BYTES] = ""; + char perf_event[PERF_EVT_NAME_BYTES] = ""; + char name_buffer[PERF_NAME_BYTES] = ""; int width = 64; int fail = 0; enum counter_scope scope = SCOPE_CPU; @@ -8175,6 +9325,11 @@ void parse_add_command(char *add_command) if (sscanf(add_command, "msr%d", &msr_num) == 1) goto next; + BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); + BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); + if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) + goto next; + if (*add_command == '/') { path = add_command; goto next; @@ -8222,7 +9377,8 @@ void parse_add_command(char *add_command) goto next; } - if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { char *eos; eos = strchr(name_buffer, ','); @@ -8239,21 +9395,33 @@ next: } } - if ((msr_num == 0) && (path == NULL)) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); + if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); fail++; } + /* Test for non-empty perf_device and perf_event */ + const bool is_perf_counter = perf_device[0] && perf_event[0]; + /* generate default column header */ if (*name_buffer == '\0') { - if (width == 32) - sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); - else - sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + if (is_perf_counter) { + snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); + } else { + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + } } - if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) - fail++; + if (is_perf_counter) { + if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) + fail++; + } else { + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) + fail++; + } if (fail) { help(); @@ -8261,6 +9429,195 @@ next: } } +bool starts_with(const char *str, const char *prefix) +{ + return strncmp(prefix, str, strlen(prefix)) == 0; +} + +void parse_add_command_pmt(char *add_command) +{ + char *name = NULL; + char *type_name = NULL; + char *format_name = NULL; + unsigned int offset; + unsigned int lsb; + unsigned int msb; + unsigned int guid; + unsigned int domain_id; + enum counter_scope scope = 0; + enum pmt_datatype type = PMT_TYPE_RAW; + enum counter_format format = FORMAT_RAW; + bool has_offset = false; + bool has_lsb = false; + bool has_msb = false; + bool has_format = true; /* Format has a default value. */ + bool has_guid = false; + bool has_scope = false; + bool has_type = true; /* Type has a default value. */ + + /* Consume the "pmt," prefix. */ + add_command = strchr(add_command, ','); + if (!add_command) { + help(); + exit(1); + } + ++add_command; + + while (add_command) { + if (starts_with(add_command, "name=")) { + name = add_command + strlen("name="); + goto next; + } + + if (starts_with(add_command, "type=")) { + type_name = add_command + strlen("type="); + goto next; + } + + if (starts_with(add_command, "domain=")) { + const size_t prefix_len = strlen("domain="); + + if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { + scope = SCOPE_CPU; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { + scope = SCOPE_CORE; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { + scope = SCOPE_PACKAGE; + has_scope = true; + } + + if (!has_scope) { + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", + __func__); + exit(1); + } + + goto next; + } + + if (starts_with(add_command, "format=")) { + format_name = add_command + strlen("format="); + goto next; + } + + if (sscanf(add_command, "offset=%u", &offset) == 1) { + has_offset = true; + goto next; + } + + if (sscanf(add_command, "lsb=%u", &lsb) == 1) { + has_lsb = true; + goto next; + } + + if (sscanf(add_command, "msb=%u", &msb) == 1) { + has_msb = true; + goto next; + } + + if (sscanf(add_command, "guid=%x", &guid) == 1) { + has_guid = true; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) { + *add_command = '\0'; + add_command++; + } + } + + if (!name) { + printf("%s: missing %s\n", __func__, "name"); + exit(1); + } + + if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { + printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); + exit(1); + } + + if (format_name) { + has_format = false; + + if (strcmp("raw", format_name) == 0) { + format = FORMAT_RAW; + has_format = true; + } + + if (strcmp("delta", format_name) == 0) { + format = FORMAT_DELTA; + has_format = true; + } + + if (!has_format) { + fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + exit(1); + } + } + + if (type_name) { + has_type = false; + + if (strcmp("raw", type_name) == 0) { + type = PMT_TYPE_RAW; + has_type = true; + } + + if (strcmp("txtal_time", type_name) == 0) { + type = PMT_TYPE_XTAL_TIME; + has_type = true; + } + + if (!has_type) { + printf("%s: invalid %s: %s\n", __func__, "type", type_name); + exit(1); + } + } + + if (!has_offset) { + printf("%s : missing %s\n", __func__, "offset"); + exit(1); + } + + if (!has_lsb) { + printf("%s: missing %s\n", __func__, "lsb"); + exit(1); + } + + if (!has_msb) { + printf("%s: missing %s\n", __func__, "msb"); + exit(1); + } + + if (!has_guid) { + printf("%s: missing %s\n", __func__, "guid"); + exit(1); + } + + if (!has_scope) { + printf("%s: missing %s\n", __func__, "scope"); + exit(1); + } + + if (lsb > msb) { + printf("%s: lsb > msb doesn't make sense\n", __func__); + exit(1); + } + + pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); +} + +void parse_add_command(char *add_command) +{ + if (strncmp(add_command, "pmt", strlen("pmt")) == 0) + return parse_add_command_pmt(add_command); + return parse_add_command_msr(add_command); +} + int is_deferred_add(char *name) { int i; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f0..3d1ca9e38b1f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index eaf091a3d331..129f179b0ac5 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -385,19 +385,21 @@ struct cxl_test_gen_media { struct cxl_test_gen_media gen_media = { .id = CXL_EVENT_GEN_MEDIA_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = <set below> */ + .channel = 1, + .rank = 30, }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = <set below> */ - .channel = 1, - .rank = 30 }, }; @@ -409,18 +411,20 @@ struct cxl_test_dram { struct cxl_test_dram dram = { .id = CXL_EVENT_DRAM_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = <set below> */ + .channel = 1, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = <set below> */ - .channel = 1, .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, @@ -474,11 +478,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.rec.validity_flags); + &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.rec.validity_flags); + &dram.rec.media_hdr.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, @@ -1131,27 +1135,28 @@ static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds) return (count >= poison_inject_dev_max); } -static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) +static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) { + /* Return EBUSY to match the CXL driver handling */ if (mock_poison_dev_max_injected(cxlds)) { dev_dbg(cxlds->dev, "Device poison injection limit has been reached: %d\n", - MOCK_INJECT_DEV_MAX); - return false; + poison_inject_dev_max); + return -EBUSY; } for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { if (!mock_poison_list[i].cxlds) { mock_poison_list[i].cxlds = cxlds; mock_poison_list[i].dpa = dpa; - return true; + return 0; } } dev_dbg(cxlds->dev, "Mock test poison injection limit has been reached: %d\n", MOCK_INJECT_TEST_MAX); - return false; + return -ENXIO; } static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa) @@ -1175,10 +1180,8 @@ static int mock_inject_poison(struct cxl_dev_state *cxlds, dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa); return 0; } - if (!mock_poison_add(cxlds, dpa)) - return -ENXIO; - return 0; + return mock_poison_add(cxlds, dpa); } static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa) diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e..d619672faa49 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 7527f738b4a1..d1acd7d58850 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -5,8 +5,8 @@ CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray maple -CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \ - slab.o maple.o +LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o +CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o maple.o $(LIBS) OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ iteration_check_2.o benchmark.o diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c deleted file mode 100644 index 66ec4a24a203..000000000000 --- a/tools/testing/radix-tree/bitmap.c +++ /dev/null @@ -1,23 +0,0 @@ -/* lib/bitmap.c pulls in at least two other files. */ - -#include <linux/bitmap.h> - -void bitmap_clear(unsigned long *map, unsigned int start, int len) -{ - unsigned long *p = map + BIT_WORD(start); - const unsigned int size = start + len; - int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); - unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - - while (len - bits_to_clear >= 0) { - *p &= ~mask_to_clear; - len -= bits_to_clear; - bits_to_clear = BITS_PER_LONG; - mask_to_clear = ~0UL; - p++; - } - if (len) { - mask_to_clear &= BITMAP_LAST_WORD_MASK(size); - *p &= ~mask_to_clear; - } -} diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index 4c941270d8de..e4fa507cbdd0 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -156,7 +156,7 @@ static void test_hw_debug(pid_t child, int type, const char *type_name) /* Zero is not currently architecturally valid */ ksft_test_result(arch, "%s_arch_set\n", type_name); } else { - ksft_test_result_skip("%s_arch_set\n"); + ksft_test_result_skip("%s_arch_set\n", type_name); } } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6d6d0c29db47..ec7d425c4022 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -780,7 +780,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index bd8c75b620c2..c397336fe1ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void) } #ifndef __NR_uretprobe -#define __NR_uretprobe 467 +#define __NR_uretprobe 335 #endif __naked unsigned long uretprobe_syscall_call_1(void) @@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void) struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c; - if (ASSERT_OK(pipe(go), "pipe")) + if (!ASSERT_OK(pipe(go), "pipe")) return; skel = uprobe_syscall_executed__open_and_load(); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 16bdc3e25591..ef70b88bccb2 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx) return sum; } +__u32 upper, select_n, result; +__u64 global; + +static __noinline bool nest_2(char *str) +{ + /* some insns (including branch insns) to ensure stacksafe() is triggered + * in nest_2(). This way, stacksafe() can compare frame associated with nest_1(). + */ + if (str[0] == 't') + return true; + if (str[1] == 'e') + return true; + if (str[2] == 's') + return true; + if (str[3] == 't') + return true; + return false; +} + +static __noinline bool nest_1(int n) +{ + /* case 0: allocate stack, case 1: no allocate stack */ + switch (n) { + case 0: { + char comm[16]; + + if (bpf_get_current_comm(comm, 16)) + return false; + return nest_2(comm); + } + case 1: + return nest_2((char *)&global); + default: + return false; + } +} + +SEC("raw_tp") +__success +int iter_subprog_check_stacksafe(const void *ctx) +{ + long i; + + bpf_for(i, 0, upper) { + if (!nest_1(select_n)) { + result = 1; + return 0; + } + } + + result = 2; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 991c473e3859..12b4eb9d0434 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot) EXPECT_EQ(close(fd3), 0); } +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 5f541522364f..5d0a809dc2df 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -29,9 +29,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 931dbc36ca43..011508ca604b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -19,6 +19,15 @@ def _rss_key_rand(length): return [random.randint(0, 255) for _ in range(length)] +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + def get_rss(cfg, context=0): return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] @@ -90,8 +99,9 @@ def _send_traffic_check(cfg, port, name, params): def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" - if len(_get_rx_cnts(cfg)) < 2: - KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -101,6 +111,7 @@ def test_rss_key_indir(cfg): if not data[k]: raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + _rss_key_check(cfg, data=data) key_len = len(data['rss-hash-key']) # Set the key @@ -110,9 +121,26 @@ def test_rss_key_indir(cfg): data = get_rss(cfg) ksft_eq(key, data['rss-hash-key']) + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + # Set the indirection table ethtool(f"-X {cfg.ifname} equal 2") - reset_indir = defer(ethtool, f"-X {cfg.ifname} default") data = get_rss(cfg) ksft_eq(0, min(data['rss-indirection-table'])) ksft_eq(1, max(data['rss-indirection-table'])) @@ -317,8 +345,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ctx_cnt = i break + _rss_key_check(cfg, context=ctx_id) + if not create_with_cfg: ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) # Sanity check the context we just created data = get_rss(cfg, ctx_id) diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index dc0408a831d0..75b7b4ef6cfa 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { + struct bpf_map *iter_map; int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) @@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[], *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); @@ -687,6 +695,24 @@ TEST_F(hid_bpf, subprog_raw_event) } /* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + +/* * Ensures that we can attach/detach programs */ TEST_F(hid_bpf, test_attach_detach) diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index ee9bbbcf751b..5ecc845ef792 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -455,7 +455,7 @@ struct { __type(value, struct elem); } hmap SEC(".maps"); -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *work) { __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; struct hid_bpf_ctx *hid_ctx; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index cfe37f491906..e5db897586bb 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *wq), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e..bf215790a89d 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -70,7 +70,7 @@ def test_result(condition, description=""): def finished(): - if ksft_cnt["pass"] == ksft_num_tests: + if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests: exit_code = KSFT_PASS else: exit_code = KSFT_FAIL diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d721760..4abebde78187 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -32,13 +32,13 @@ static struct feature_id_reg feat_id_regs[] = { { ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 }, { ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 } }; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index f92c2fb23fcd..8e34f7fa44e9 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); -KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), -KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), -KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), -KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 69849acd95b0..618cd2442390 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -184,6 +184,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -211,4 +238,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 3c1e9f35b531..3b26bf3cf5b9 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> +#include <linux/keyctl.h> #include <linux/landlock.h> #include <string.h> #include <sys/prctl.h> @@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer) ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); } +TEST(cred_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd, dir_fd; + pid_t child; + int status; + + drop_caps(_metadata); + + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + EXPECT_LE(0, dir_fd); + EXPECT_EQ(0, close(dir_fd)); + + /* Denies opening directories. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0, + 0, 0)) + { + TH_LOG("Failed to join session keyring: %s", strerror(errno)); + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* + * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a + * different session keyring in the child, so make that happen. + */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, + NULL, 0, 0, 0)); + + /* + * KEYCTL_SESSION_TO_PARENT installs credentials on the parent + * that never go through the cred_prepare hook, this path uses + * cred_transfer instead. + */ + EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0, + 0, 0, 0)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + _exit(_metadata->exit_code); + return; + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 0086efaa7b68..29af19c4e9f9 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -2,6 +2,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y +CONFIG_KEYS=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 901e0d07765b..cfad627e8d94 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -110,7 +112,7 @@ endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53..2c3a0eb6b22d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1b03bcfaefdf..5a3a9bcba640 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,8 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce..36045edb10de 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index fa7eabfaf841..896b3f73fc53 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -293,6 +293,20 @@ static int run_test(struct testcase *test, int count) return ret; } +#ifdef __aarch64__ +/* Check if userspace VA > 48 bits */ +static int high_address_present(void) +{ + void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr == MAP_FAILED) + return 0; + + munmap(ptr, 1); + return 1; +} +#endif + static int supported_arch(void) { #if defined(__powerpc64__) @@ -300,7 +314,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return 1; + return high_address_present(); #else return 0; #endif diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaeb..535eb2c3d7d1 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, + const char *expected_buf, int expected_len, int buf_len, int flags) { int i, ret[2], recv_errno[2], expected_errno = 0; diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f773..8ee4489238ca 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -146,6 +146,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d..4209b9569039 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1115,11 +1115,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef1..9ea6d698e9d3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -661,7 +661,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -1415,18 +1415,28 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1438,7 +1448,7 @@ chk_add_nr() fi print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1449,7 +1459,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1459,7 +1469,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1470,7 +1480,7 @@ chk_add_nr() fi print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1481,7 +1491,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1492,7 +1502,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1503,7 +1513,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1513,6 +1523,8 @@ chk_add_nr() print_ok fi fi + + print_info "$extra_msg" } chk_add_tx_nr() @@ -1634,6 +1646,8 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count print_check "ptx" @@ -1655,6 +1669,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1955,6 +1989,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -2612,33 +2661,46 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +2709,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +2737,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +2747,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -3053,7 +3124,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3137,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3318,7 +3389,7 @@ userspace_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 - chk_prio_nr 0 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3526,6 +3597,35 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 mptcp_lib_kill_wait $tests_pid fi + + # remove and re-add + if reset "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 + mptcp_lib_kill_wait $tests_pid + fi + } # [$1: error message] diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f92..d13fb5ea3e89 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 000000000000..6a764d70ab06 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin <<EOF +table ip filter { + chain forward { + type filter hook forward priority 0; policy accept; + ct state new counter + ip protocol icmp counter queue num 0 bypass + } +} +EOF +./nf_queue -t 5 > /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bf..3f2fca02fec5 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -256,6 +259,13 @@ struct testcase testcases_v6[] = { .r_num_mss = 2, }, { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, + { /* send 2B + 2B + 1B segments */ .tlen = 5, .gso_len = 2, @@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e3f97f90d8db..8c3a73461475 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -60,7 +60,9 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 diff --git a/tools/testing/selftests/turbostat/added_perf_counters.py b/tools/testing/selftests/turbostat/added_perf_counters.py new file mode 100755 index 000000000000..9ab4aaf45fb8 --- /dev/null +++ b/tools/testing/selftests/turbostat/added_perf_counters.py @@ -0,0 +1,178 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +class PerfCounterInfo: + def __init__(self, subsys, event): + self.subsys = subsys + self.event = event + + def get_perf_event_name(self): + return f'{self.subsys}/{self.event}/' + + def get_turbostat_perf_id(self, counter_scope, counter_type, column_name): + return f'perf/{self.subsys}/{self.event},{counter_scope},{counter_type},{column_name}' + +PERF_COUNTERS_CANDIDATES = [ + PerfCounterInfo('msr', 'mperf'), + PerfCounterInfo('msr', 'aperf'), + PerfCounterInfo('msr', 'tsc'), + PerfCounterInfo('cstate_core', 'c1-residency'), + PerfCounterInfo('cstate_core', 'c6-residency'), + PerfCounterInfo('cstate_core', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c2-residency'), + PerfCounterInfo('cstate_pkg', 'c3-residency'), + PerfCounterInfo('cstate_pkg', 'c6-residency'), + PerfCounterInfo('cstate_pkg', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c8-residency'), + PerfCounterInfo('cstate_pkg', 'c9-residency'), + PerfCounterInfo('cstate_pkg', 'c10-residency'), +] +present_perf_counters = [] + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + if b'<not supported>' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + return True + + for counter in PERF_COUNTERS_CANDIDATES: + if has_perf_counter_access(counter.get_perf_event_name()): + present_perf_counters.append(counter) + + if len(present_perf_counters) == 0: + print('SKIP: Could not read any perf counter.') + return False + + if len(present_perf_counters) != len(PERF_COUNTERS_CANDIDATES): + print(f'WARN: Could not access all of the counters - some will be left untested') + + return True + +if not check_perf_access(): + exit(0) + +turbostat_counter_source_opts = [''] + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = [b'usec', b'Time_Of_Day_Seconds', b'APIC', b'X2APIC'] + +expected_columns = [b'CPU'] +counters_argv = [] +for counter in present_perf_counters: + if counter.subsys == 'cstate_core': + counter_scope = 'core' + elif counter.subsys == 'cstate_pkg': + counter_scope = 'package' + else: + counter_scope = 'cpu' + + counter_type = 'delta' + column_name = counter.event + + cparams = counter.get_turbostat_perf_id( + counter_scope = counter_scope, + counter_type = counter_type, + column_name = column_name + ) + expected_columns.append(column_name.encode()) + counters_argv.extend(['--add', cparams]) + +expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + expected_columns + +def gen_user_friendly_cmdline(argv_): + argv = argv_[:] + ret = '' + + while len(argv) != 0: + arg = argv.pop(0) + arg_next = '' + + if arg in ('-i', '--show', '--add'): + arg_next = argv.pop(0) if len(argv) > 0 else '' + + ret += f'{arg} {arg_next} \\\n\t' + + # Remove the last separator and return + return ret[:-4] + +# +# Run turbostat for some time and send SIGINT +# +timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] +turbostat_argv = [turbostat, '-i', '0.50', '--show', 'CPU'] + counters_argv + +def check_columns_or_fail(expected_columns: list, actual_columns: list): + if len(actual_columns) != len(expected_columns): + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + + failed = False + for expected_column in expected_columns: + if expected_column not in actual_columns: + print(f'turbostat column check failed: missing column {expected_column.decode()}') + failed = True + + if failed: + exit(1) + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns, actual_columns) +print('OK') + +# +# Same, but with --debug +# +# We explicitly specify '--show CPU' to make sure turbostat +# don't show a bunch of default counters instead. +# +turbostat_argv.append('--debug') + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat (in debug mode) with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns_debug, actual_columns) +print('OK') diff --git a/tools/testing/selftests/turbostat/smi_aperf_mperf.py b/tools/testing/selftests/turbostat/smi_aperf_mperf.py new file mode 100755 index 000000000000..6289cc47d5f0 --- /dev/null +++ b/tools/testing/selftests/turbostat/smi_aperf_mperf.py @@ -0,0 +1,157 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +# CDLL calls dlopen underneath. +# Calling it with None (null), we get handle to the our own image (python interpreter). +# We hope to find sched_getcpu() inside ;] +# This is a bit ugly, but helps shipping working software, so.. +try: + import ctypes + + this_image = ctypes.CDLL(None) + BASE_CPU = this_image.sched_getcpu() +except: + BASE_CPU = 0 # If we fail, set to 0 and pray it's not offline. + +MSR_IA32_MPERF = 0x000000e7 +MSR_IA32_APERF = 0x000000e8 + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + if b'<not supported>' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + return True + + if not has_perf_counter_access('msr/mperf/'): + return False + if not has_perf_counter_access('msr/aperf/'): + return False + if not has_perf_counter_access('msr/smi/'): + return False + + return True + +def check_msr_access(): + try: + file_msr = open(f'/dev/cpu/{BASE_CPU}/msr', 'rb') + except: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_MPERF)) != 8: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_APERF)) != 8: + return False + + return True + +has_perf_access = check_perf_access() +has_msr_access = check_msr_access() + +turbostat_counter_source_opts = [''] + +if has_msr_access: + turbostat_counter_source_opts.append('--no-perf') +else: + print('SKIP: doesn\'t have MSR access, skipping run with --no-perf') + +if has_perf_access: + turbostat_counter_source_opts.append('--no-msr') +else: + print('SKIP: doesn\'t have perf access, skipping run with --no-msr') + +if not has_msr_access and not has_perf_access: + print('SKIP: No MSR nor perf access detected. Skipping the tests entirely') + exit(0) + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = b'usec\tTime_Of_Day_Seconds\tAPIC\tX2APIC' + +SMI_APERF_MPERF_DEPENDENT_BICS = [ + 'SMI', + 'Avg_MHz', + 'Busy%', + 'Bzy_MHz', +] +if has_perf_access: + SMI_APERF_MPERF_DEPENDENT_BICS.append('IPC') + +for bic in SMI_APERF_MPERF_DEPENDENT_BICS: + for counter_source_opt in turbostat_counter_source_opts: + + # Ugly special case, but it is what it is.. + if counter_source_opt == '--no-perf' and bic == 'IPC': + continue + + expected_columns = bic.encode() + expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + f'\t{bic}'.encode() + + # + # Run turbostat for some time and send SIGINT + # + timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] + turbostat_argv = [turbostat, '-i', '0.50', '--show', bic] + + if counter_source_opt: + turbostat_argv.append(counter_source_opt) + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns != actual_columns: + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + print('OK') + + # + # Same, but with --debug + # + turbostat_argv.append('--debug') + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns_debug != actual_columns: + print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}') + exit(1) + print('OK') diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840..2f756628613d 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index b14e14cdbfb9..fd6a3010afa8 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_PRIVATE_MEM bool -config HAVE_KVM_GMEM_PREPARE +config HAVE_KVM_ARCH_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM -config HAVE_KVM_GMEM_INVALIDATE +config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 229570059a1b..992f9beb3e7d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -97,18 +97,19 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); /* - * synchronize_srcu(&kvm->irq_srcu) already called + * synchronize_srcu_expedited(&kvm->irq_srcu) already called * in kvm_unregister_irq_ack_notifier(). */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); + } else { + synchronize_srcu_expedited(&kvm->irq_srcu); } mutex_unlock(&kvm->irqfds.resampler_lock); @@ -126,7 +127,7 @@ irqfd_shutdown(struct work_struct *work) u64 cnt; /* Make sure irqfd has been initialized in assign path. */ - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); /* * Synchronize with the wait-queue and unhook ourselves to prevent @@ -384,7 +385,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -523,7 +524,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); kvm_arch_post_irq_ack_notifier_list_update(kvm); } @@ -608,7 +609,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Take note of a change in irq routing. - * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. + * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 1c509c351261..8f079a61a56d 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,84 +13,93 @@ struct kvm_gmem { struct list_head entry; }; -static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +/** + * folio_file_pfn - like folio_file_page, but return a pfn. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Return: The pfn for this index. + */ +static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) { -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - struct kvm_gmem *gmem; + return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); +} - list_for_each_entry(gmem, gmem_list, entry) { - struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; - struct page *page; - kvm_pfn_t pfn; - gfn_t gfn; - int rc; - - if (!kvm_arch_gmem_prepare_needed(kvm)) - continue; - - slot = xa_load(&gmem->bindings, index); - if (!slot) - continue; - - page = folio_file_page(folio, index); - pfn = page_to_pfn(page); - gfn = slot->base_gfn + index - slot->gmem.pgoff; - rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); - if (rc) { - pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", - index, gfn, pfn, rc); - return rc; - } +static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE + kvm_pfn_t pfn = folio_file_pfn(folio, index); + gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; + int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", + index, gfn, pfn, rc); + return rc; } - #endif + return 0; } -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) +static inline void kvm_gmem_mark_prepared(struct folio *folio) { - struct folio *folio; + folio_mark_uptodate(folio); +} - /* TODO: Support huge pages. */ - folio = filemap_grab_folio(inode->i_mapping, index); - if (IS_ERR(folio)) - return folio; +/* + * Process @folio, which contains @gfn, so that the guest can use it. + * The folio must be locked and the gfn must be contained in @slot. + * On successful return the guest sees a zero page so as to avoid + * leaking host data and the up-to-date flag is set. + */ +static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, struct folio *folio) +{ + unsigned long nr_pages, i; + pgoff_t index; + int r; + + nr_pages = folio_nr_pages(folio); + for (i = 0; i < nr_pages; i++) + clear_highpage(folio_page(folio, i)); /* - * Use the up-to-date flag to track whether or not the memory has been - * zeroed before being handed off to the guest. There is no backing - * storage for the memory, so the folio will remain up-to-date until - * it's removed. + * Preparing huge folios should always be safe, since it should + * be possible to split them later if needed. * - * TODO: Skip clearing pages when trusted firmware will do it when - * assigning memory to the guest. + * Right now the folio order is always going to be zero, but the + * code is ready for huge folios. The only assumption is that + * the base pgoff of memslots is naturally aligned with the + * requested page order, ensuring that huge folios can also use + * huge page table entries for GPA->HPA mapping. + * + * The order will be passed when creating the guest_memfd, and + * checked when creating memslots. */ - if (!folio_test_uptodate(folio)) { - unsigned long nr_pages = folio_nr_pages(folio); - unsigned long i; - - for (i = 0; i < nr_pages; i++) - clear_highpage(folio_page(folio, i)); - - folio_mark_uptodate(folio); - } + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); + index = gfn - slot->base_gfn + slot->gmem.pgoff; + index = ALIGN_DOWN(index, 1 << folio_order(folio)); + r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); + if (!r) + kvm_gmem_mark_prepared(folio); - if (prepare) { - int r = kvm_gmem_prepare_folio(inode, index, folio); - if (r < 0) { - folio_unlock(folio); - folio_put(folio); - return ERR_PTR(r); - } - } + return r; +} - /* - * Ignore accessed, referenced, and dirty flags. The memory is - * unevictable and there is no storage to write back to. - */ - return folio; +/* + * Returns a locked folio on success. The caller is responsible for + * setting the up-to-date flag before the memory is mapped into the guest. + * There is no backing storage for the memory, so the folio will remain + * up-to-date until it's removed. + * + * Ignore accessed, referenced, and dirty flags. The memory is + * unevictable and there is no storage to write back to. + */ +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +{ + /* TODO: Support huge pages. */ + return filemap_grab_folio(inode->i_mapping, index); } static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, @@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index, true); + folio = kvm_gmem_get_folio(inode, index); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE static void kvm_gmem_free_folio(struct folio *folio) { struct page *page = folio_page(folio, 0); @@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE .free_folio = kvm_gmem_free_folio, #endif }; @@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) fput(file); } -static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) +/* Returns a locked folio on success. */ +static struct folio * +__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, + int *max_order) { pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; struct kvm_gmem *gmem = file->private_data; struct folio *folio; - struct page *page; - int r; if (file != slot->gmem.file) { WARN_ON_ONCE(slot->gmem.file); - return -EFAULT; + return ERR_PTR(-EFAULT); } gmem = file->private_data; if (xa_load(&gmem->bindings, index) != slot) { WARN_ON_ONCE(xa_load(&gmem->bindings, index)); - return -EIO; + return ERR_PTR(-EIO); } - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); + folio = kvm_gmem_get_folio(file_inode(file), index); if (IS_ERR(folio)) - return PTR_ERR(folio); + return folio; if (folio_test_hwpoison(folio)) { folio_unlock(folio); folio_put(folio); - return -EHWPOISON; + return ERR_PTR(-EHWPOISON); } - page = folio_file_page(folio, index); - - *pfn = page_to_pfn(page); + *pfn = folio_file_pfn(folio, index); if (max_order) *max_order = 0; - r = 0; - - folio_unlock(folio); - - return r; + *is_prepared = folio_test_uptodate(folio); + return folio; } int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order) { struct file *file = kvm_gmem_get_file(slot); - int r; + struct folio *folio; + bool is_prepared = false; + int r = 0; if (!file) return -EFAULT; - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); + folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); + if (IS_ERR(folio)) { + r = PTR_ERR(folio); + goto out; + } + + if (!is_prepared) + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + + folio_unlock(folio); + if (r < 0) + folio_put(folio); + +out: fput(file); return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { @@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); for (i = 0; i < npages; i += (1 << max_order)) { + struct folio *folio; gfn_t gfn = start_gfn + i; + bool is_prepared = false; kvm_pfn_t pfn; if (signal_pending(current)) { @@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long break; } - ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); - if (ret) + folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; + } - if (!IS_ALIGNED(gfn, (1 << max_order)) || - (npages - i) < (1 << max_order)) - max_order = 0; + if (is_prepared) { + folio_unlock(folio); + folio_put(folio); + ret = -EEXIST; + break; + } + + folio_unlock(folio); + WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || + (npages - i) < (1 << max_order)); + + ret = -EINVAL; + while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), + KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + if (!max_order) + goto put_folio_and_exit; + max_order--; + } p = src ? src + i * PAGE_SIZE : NULL; ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + if (!ret) + kvm_gmem_mark_prepared(folio); - put_page(pfn_to_page(pfn)); +put_folio_and_exit: + folio_put(folio); if (ret) break; } @@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long return ret && !i ? ret : i; } EXPORT_SYMBOL_GPL(kvm_gmem_populate); +#endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0788d0a72cc..cb2b78e92910 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1578,15 +1578,14 @@ static int check_memory_region_flags(struct kvm *kvm, if (mem->flags & KVM_MEM_GUEST_MEMFD) valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES; -#ifdef CONFIG_HAVE_KVM_READONLY_MEM /* * GUEST_MEMFD is incompatible with read-only memslots, as writes to * read-only memslots have emulated MMIO, not page fault, semantics, * and KVM doesn't allow emulated MMIO for private memory. */ - if (!(mem->flags & KVM_MEM_GUEST_MEMFD)) + if (kvm_arch_has_readonly_mem(kvm) && + !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; -#endif if (mem->flags & ~valid_flags) return -EINVAL; @@ -2398,48 +2397,47 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static u64 kvm_supported_mem_attributes(struct kvm *kvm) +{ + if (!kvm || kvm_arch_has_private_mem(kvm)) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +} + /* * Returns true if _all_ gfns in the range [@start, @end) have attributes - * matching @attrs. + * such that the bits in @mask match @attrs. */ bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs) + unsigned long mask, unsigned long attrs) { XA_STATE(xas, &kvm->mem_attr_array, start); unsigned long index; - bool has_attrs; void *entry; - rcu_read_lock(); + mask &= kvm_supported_mem_attributes(kvm); + if (attrs & ~mask) + return false; - if (!attrs) { - has_attrs = !xas_find(&xas, end - 1); - goto out; - } + if (end == start + 1) + return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; + + guard(rcu)(); + if (!attrs) + return !xas_find(&xas, end - 1); - has_attrs = true; for (index = start; index < end; index++) { do { entry = xas_next(&xas); } while (xas_retry(&xas, entry)); - if (xas.xa_index != index || xa_to_value(entry) != attrs) { - has_attrs = false; - break; - } + if (xas.xa_index != index || + (xa_to_value(entry) & mask) != attrs) + return false; } -out: - rcu_read_unlock(); - return has_attrs; -} - -static u64 kvm_supported_mem_attributes(struct kvm *kvm) -{ - if (!kvm || kvm_arch_has_private_mem(kvm)) - return KVM_MEMORY_ATTRIBUTE_PRIVATE; - - return 0; + return true; } static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, @@ -2534,7 +2532,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, mutex_lock(&kvm->slots_lock); /* Nothing to do if the entire range as the desired attributes. */ - if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) + if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) goto out_unlock; /* |