diff options
author | Linus Torvalds | 2022-08-04 11:31:20 -0700 |
---|---|---|
committer | Linus Torvalds | 2022-08-04 11:31:20 -0700 |
commit | cfeafd94668910334a77c9437a18212baf9f5610 (patch) | |
tree | b863f4f9688ac141f65b62a69addd0f9bfcc2126 | |
parent | 228dfe98a313f6b6bff5da8b2c5e650e297ebf1a (diff) | |
parent | 273aaa24369cb8d0f246bb16f7122b91a1ef5188 (diff) |
Merge tag 'driver-core-6.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
Pull driver core / kernfs updates from Greg KH:
"Here is the set of driver core and kernfs changes for 6.0-rc1.
The "biggest" thing in here is some scalability improvements for
kernfs for large systems. Other than that, included in here are:
- arch topology and cache info changes that have been reviewed and
discussed a lot.
- potential error path cleanup fixes
- deferred driver probe cleanups
- firmware loader cleanups and tweaks
- documentation updates
- other small things
All of these have been in the linux-next tree for a while with no
reported problems"
* tag 'driver-core-6.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core: (63 commits)
docs: embargoed-hardware-issues: fix invalid AMD contact email
firmware_loader: Replace kmap() with kmap_local_page()
sysfs docs: ABI: Fix typo in comment
kobject: fix Kconfig.debug "its" grammar
kernfs: Fix typo 'the the' in comment
docs: driver-api: firmware: add driver firmware guidelines. (v3)
arch_topology: Fix cache attributes detection in the CPU hotplug path
ACPI: PPTT: Leave the table mapped for the runtime usage
cacheinfo: Use atomic allocation for percpu cache attributes
drivers/base: fix userspace break from using bin_attributes for cpumap and cpulist
MAINTAINERS: Change mentions of mpm to olivia
docs: ABI: sysfs-devices-soc: Update Lee Jones' email address
docs: ABI: sysfs-class-pwm: Update Lee Jones' email address
Documentation/process: Add embargoed HW contact for LLVM
Revert "kernfs: Change kernfs_notify_list to llist."
ACPI: Remove the unused find_acpi_cpu_cache_topology()
arch_topology: Warn that topology for nested clusters is not supported
arch_topology: Add support for parsing sockets in /cpu-map
arch_topology: Set cluster identifier in each core/thread from /cpu-map
arch_topology: Limit span of cpu_clustergroup_mask()
...
47 files changed, 718 insertions, 374 deletions
diff --git a/Documentation/ABI/stable/sysfs-module b/Documentation/ABI/stable/sysfs-module index 560b4a3278df..41b1f16e8795 100644 --- a/Documentation/ABI/stable/sysfs-module +++ b/Documentation/ABI/stable/sysfs-module @@ -38,7 +38,7 @@ What: /sys/module/<MODULENAME>/srcversion Date: Jun 2005 Description: If the module source has MODULE_VERSION, this file will contain - the checksum of the the source code. + the checksum of the source code. What: /sys/module/<MODULENAME>/version Date: Jun 2005 diff --git a/Documentation/ABI/testing/sysfs-class-pwm b/Documentation/ABI/testing/sysfs-class-pwm index 3d65285bcd5f..0638c94d01ef 100644 --- a/Documentation/ABI/testing/sysfs-class-pwm +++ b/Documentation/ABI/testing/sysfs-class-pwm @@ -81,7 +81,7 @@ Description: What: /sys/class/pwm/pwmchip<N>/pwmX/capture Date: June 2016 KernelVersion: 4.8 -Contact: Lee Jones <lee.jones@linaro.org> +Contact: Lee Jones <lee@kernel.org> Description: Capture information about a PWM signal. The output format is a pair unsigned integers (period and duty cycle), separated by a diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-client b/Documentation/ABI/testing/sysfs-class-rtrs-client index 49a4157c7bf1..fecc59d1b96f 100644 --- a/Documentation/ABI/testing/sysfs-class-rtrs-client +++ b/Documentation/ABI/testing/sysfs-class-rtrs-client @@ -78,7 +78,7 @@ What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_name Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com> -Description: RO, Contains the the name of HCA the connection established on. +Description: RO, Contains the name of HCA the connection established on. What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_port Date: Feb 2020 diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-server b/Documentation/ABI/testing/sysfs-class-rtrs-server index 3b6d5b067df0..b08601d80409 100644 --- a/Documentation/ABI/testing/sysfs-class-rtrs-server +++ b/Documentation/ABI/testing/sysfs-class-rtrs-server @@ -24,7 +24,7 @@ What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_name Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com> -Description: RO, Contains the the name of HCA the connection established on. +Description: RO, Contains the name of HCA the connection established on. What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_port Date: Feb 2020 diff --git a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD index f7b360a61b21..bc44bc903bc8 100644 --- a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD +++ b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD @@ -74,7 +74,7 @@ Description: Reads also cause the AC alarm timer status to be reset. - Another way to reset the the status of the AC alarm timer is to + Another way to reset the status of the AC alarm timer is to write (the number) 0 to this file. If the status return value indicates that the timer has expired, diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 1b2a2d41ff80..54195530e97a 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -303,5 +303,5 @@ Date: Apr 2010 Contact: Dominik Brodowski <linux@dominikbrodowski.net> Description: Reports the runtime PM children usage count of a device, or - 0 if the the children will be ignored. + 0 if the children will be ignored. diff --git a/Documentation/ABI/testing/sysfs-devices-soc b/Documentation/ABI/testing/sysfs-devices-soc index ea999e292f11..5269808ec35f 100644 --- a/Documentation/ABI/testing/sysfs-devices-soc +++ b/Documentation/ABI/testing/sysfs-devices-soc @@ -1,6 +1,6 @@ What: /sys/devices/socX Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: The /sys/devices/ directory contains a sub-directory for each System-on-Chip (SoC) device on a running platform. Information @@ -14,14 +14,14 @@ Description: What: /sys/devices/socX/machine Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: Read-only attribute common to all SoCs. Contains the SoC machine name (e.g. Ux500). What: /sys/devices/socX/family Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: Read-only attribute common to all SoCs. Contains SoC family name (e.g. DB8500). @@ -59,7 +59,7 @@ Description: What: /sys/devices/socX/soc_id Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: Read-only attribute supported by most SoCs. In the case of ST-Ericsson's chips this contains the SoC serial number. @@ -72,21 +72,21 @@ Description: What: /sys/devices/socX/revision Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: Read-only attribute supported by most SoCs. Contains the SoC's manufacturing revision number. What: /sys/devices/socX/process Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: Read-only attribute supported ST-Ericsson's silicon. Contains the the process by which the silicon chip was manufactured. What: /sys/bus/soc Date: January 2012 -contact: Lee Jones <lee.jones@linaro.org> +contact: Lee Jones <lee@kernel.org> Description: The /sys/bus/soc/ directory contains the usual sub-folders expected under most buses. /sys/bus/soc/devices is of particular diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index df79e129d097..5bf61881f012 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -67,8 +67,7 @@ Description: Discover NUMA node a CPU belongs to /sys/devices/system/cpu/cpu42/node2 -> ../../node/node2 -What: /sys/devices/system/cpu/cpuX/topology/core_id - /sys/devices/system/cpu/cpuX/topology/core_siblings +What: /sys/devices/system/cpu/cpuX/topology/core_siblings /sys/devices/system/cpu/cpuX/topology/core_siblings_list /sys/devices/system/cpu/cpuX/topology/physical_package_id /sys/devices/system/cpu/cpuX/topology/thread_siblings @@ -84,10 +83,6 @@ Description: CPU topology files that describe a logical CPU's relationship Briefly, the files above are: - core_id: the CPU core ID of cpuX. Typically it is the - hardware platform's identifier (rather than the kernel's). - The actual value is architecture and platform dependent. - core_siblings: internal kernel map of cpuX's hardware threads within the same physical_package_id. diff --git a/Documentation/driver-api/firmware/core.rst b/Documentation/driver-api/firmware/core.rst index 1d1688cbc078..803cd574bbd7 100644 --- a/Documentation/driver-api/firmware/core.rst +++ b/Documentation/driver-api/firmware/core.rst @@ -13,4 +13,5 @@ documents these features. direct-fs-lookup fallback-mechanisms lookup-order + firmware-usage-guidelines diff --git a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst new file mode 100644 index 000000000000..fdcfce42c6d2 --- /dev/null +++ b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst @@ -0,0 +1,44 @@ +=================== +Firmware Guidelines +=================== + +Users switching to a newer kernel should *not* have to install newer +firmware files to keep their hardware working. At the same time updated +firmware files must not cause any regressions for users of older kernel +releases. + +Drivers that use firmware from linux-firmware should follow the rules in +this guide. (Where there is limited control of the firmware, +i.e. company doesn't support Linux, firmwares sourced from misc places, +then of course these rules will not apply strictly.) + +* Firmware files shall be designed in a way that it allows checking for + firmware ABI version changes. It is recommended that firmware files be + versioned with at least a major/minor version. It is suggested that + the firmware files in linux-firmware be named with some device + specific name, and just the major version. The firmware version should + be stored in the firmware header, or as an exception, as part of the + firmware file name, in order to let the driver detact any non-ABI + fixes/changes. The firmware files in linux-firmware should be + overwritten with the newest compatible major version. Newer major + version firmware shall remain compatible with all kernels that load + that major number. + +* If the kernel support for the hardware is normally inactive, or the + hardware isn't available for public consumption, this can + be ignored, until the first kernel release that enables that hardware. + This means no major version bumps without the kernel retaining + backwards compatibility for the older major versions. Minor version + bumps should not introduce new features that newer kernels depend on + non-optionally. + +* If a security fix needs lockstep firmware and kernel fixes in order to + be successful, then all supported major versions in the linux-firmware + repo that are required by currently supported stable/LTS kernels, + should be updated with the security fix. The kernel patches should + detect if the firmware is new enough to declare if the security issue + is fixed. All communications around security fixes should point at + both the firmware and kernel fixes. If a security fix requires + deprecating old major versions, then this should only be done as a + last option, and be stated clearly in all communications. + diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 95999302d279..b6b4481e2474 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -244,7 +244,7 @@ disclosure of a particular issue, unless requested by a response team or by an involved disclosed party. The current ambassadors list: ============= ======================================================== - AMD Tom Lendacky <tom.lendacky@amd.com> + AMD Tom Lendacky <thomas.lendacky@amd.com> Ampere Darren Hart <darren@os.amperecomputing.com> ARM Catalin Marinas <catalin.marinas@arm.com> IBM Power Anton Blanchard <anton@linux.ibm.com> @@ -264,6 +264,9 @@ an involved disclosed party. The current ambassadors list: Amazon Google Kees Cook <keescook@chromium.org> + + GCC + LLVM Nick Desaulniers <ndesaulniers@google.com> ============= ======================================================== If you want your organization to be added to the ambassadors list, please diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst index 88273ebe7823..cf5f1fca3d92 100644 --- a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst +++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst @@ -174,7 +174,7 @@ CVE分配 ============= ======================================================== ARM - AMD Tom Lendacky <tom.lendacky@amd.com> + AMD Tom Lendacky <thomas.lendacky@amd.com> IBM Intel Tony Luck <tony.luck@intel.com> Qualcomm Trilok Soni <tsoni@codeaurora.org> diff --git a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst index 6c76fc96131a..fbde3e26eda5 100644 --- a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst +++ b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst @@ -177,7 +177,7 @@ CVE分配 ============= ======================================================== ARM - AMD Tom Lendacky <tom.lendacky@amd.com> + AMD Tom Lendacky <thomas.lendacky@amd.com> IBM Intel Tony Luck <tony.luck@intel.com> Qualcomm Trilok Soni <tsoni@codeaurora.org> diff --git a/MAINTAINERS b/MAINTAINERS index b779a0b435e3..84ad954cee26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7494,7 +7494,7 @@ F: Documentation/admin-guide/media/em28xx* F: drivers/media/usb/em28xx/ EMBEDDED LINUX -M: Matt Mackall <mpm@selenic.com> +M: Olivia Mackall <olivia@selenic.com> M: David Woodhouse <dwmw2@infradead.org> L: linux-embedded@vger.kernel.org S: Maintained @@ -8902,7 +8902,7 @@ F: include/trace/events/hwmon*.h K: (devm_)?hwmon_device_(un)?register(|_with_groups|_with_info) HARDWARE RANDOM NUMBER GENERATOR CORE -M: Matt Mackall <mpm@selenic.com> +M: Olivia Mackall <olivia@selenic.com> M: Herbert Xu <herbert@gondor.apana.org.au> L: linux-crypto@vger.kernel.org S: Odd fixes diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 9ab78ad826e2..869ffc4d4484 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -89,8 +89,6 @@ int __init parse_acpi_topology(void) return 0; for_each_possible_cpu(cpu) { - int i, cache_id; - topology_id = find_acpi_cpu_topology(cpu, 0); if (topology_id < 0) return topology_id; @@ -107,18 +105,6 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].cluster_id = topology_id; topology_id = find_acpi_cpu_topology_package(cpu); cpu_topology[cpu].package_id = topology_id; - - i = acpi_find_last_cache_level(cpu); - - if (i > 0) { - /* - * this is the only part of cpu_topology that has - * a direct relationship with the cache topology - */ - cache_id = find_acpi_cpu_cache_topology(cpu, i); - if (cache_id > 0) - cpu_topology[cpu].llc_id = cache_id; - } } return 0; diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 701f61c01359..c91342dcbcd6 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -437,7 +437,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table, pr_debug("found = %p %p\n", found_cache, cpu_node); if (found_cache) update_cache_properties(this_leaf, found_cache, - cpu_node, table->revision); + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)), + table->revision); index++; } @@ -532,21 +533,37 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, return -ENOENT; } + +static struct acpi_table_header *acpi_get_pptt(void) +{ + static struct acpi_table_header *pptt; + acpi_status status; + + /* + * PPTT will be used at runtime on every CPU hotplug in path, so we + * don't need to call acpi_put_table() to release the table mapping. + */ + if (!pptt) { + status = acpi_get_table(ACPI_SIG_PPTT, 0, &pptt); + if (ACPI_FAILURE(status)) + acpi_pptt_warn_missing(); + } + + return pptt; +} + static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag) { struct acpi_table_header *table; - acpi_status status; int retval; - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); + table = acpi_get_pptt(); + if (!table) return -ENOENT; - } + retval = topology_get_acpi_cpu_tag(table, cpu, level, flag); pr_debug("Topology Setup ACPI CPU %d, level %d ret = %d\n", cpu, level, retval); - acpi_put_table(table); return retval; } @@ -567,16 +584,13 @@ static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag) static int check_acpi_cpu_flag(unsigned int cpu, int rev, u32 flag) { struct acpi_table_header *table; - acpi_status status; u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); struct acpi_pptt_processor *cpu_node = NULL; int ret = -ENOENT; - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return ret; - } + table = acpi_get_pptt(); + if (!table) + return -ENOENT; if (table->revision >= rev) cpu_node = acpi_find_processor_node(table, acpi_cpu_id); @@ -584,8 +598,6 @@ static int check_acpi_cpu_flag(unsigned int cpu, int rev, u32 flag) if (cpu_node) ret = (cpu_node->flags & flag) != 0; - acpi_put_table(table); - return ret; } @@ -604,18 +616,15 @@ int acpi_find_last_cache_level(unsigned int cpu) u32 acpi_cpu_id; struct acpi_table_header *table; int number_of_levels = 0; - acpi_status status; + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; pr_debug("Cache Setup find last level CPU=%d\n", cpu); acpi_cpu_id = get_acpi_id_for_cpu(cpu); - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - } else { - number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id); - acpi_put_table(table); - } + number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id); pr_debug("Cache Setup find last level level=%d\n", number_of_levels); return number_of_levels; @@ -637,20 +646,16 @@ int acpi_find_last_cache_level(unsigned int cpu) int cache_setup_acpi(unsigned int cpu) { struct acpi_table_header *table; - acpi_status status; - pr_debug("Cache Setup ACPI CPU %d\n", cpu); - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); + table = acpi_get_pptt(); + if (!table) return -ENOENT; - } + + pr_debug("Cache Setup ACPI CPU %d\n", cpu); cache_setup_acpi_cpu(table, cpu); - acpi_put_table(table); - return status; + return 0; } /** @@ -691,43 +696,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level) } /** - * find_acpi_cpu_cache_topology() - Determine a unique cache topology value - * @cpu: Kernel logical CPU number - * @level: The cache level for which we would like a unique ID - * - * Determine a unique ID for each unified cache in the system - * - * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found. - * Otherwise returns a value which represents a unique topological feature. - */ -int find_acpi_cpu_cache_topology(unsigned int cpu, int level) -{ - struct acpi_table_header *table; - struct acpi_pptt_cache *found_cache; - acpi_status status; - u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); - struct acpi_pptt_processor *cpu_node = NULL; - int ret = -1; - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return -ENOENT; - } - - found_cache = acpi_find_cache_node(table, acpi_cpu_id, - CACHE_TYPE_UNIFIED, - level, - &cpu_node); - if (found_cache) - ret = ACPI_PTR_DIFF(cpu_node, table); - - acpi_put_table(table); - - return ret; -} - -/** * find_acpi_cpu_topology_package() - Determine a unique CPU package value * @cpu: Kernel logical CPU number * @@ -766,50 +734,38 @@ int find_acpi_cpu_topology_package(unsigned int cpu) int find_acpi_cpu_topology_cluster(unsigned int cpu) { struct acpi_table_header *table; - acpi_status status; struct acpi_pptt_processor *cpu_node, *cluster_node; u32 acpi_cpu_id; int retval; int is_thread; - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); + table = acpi_get_pptt(); + if (!table) return -ENOENT; - } acpi_cpu_id = get_acpi_id_for_cpu(cpu); cpu_node = acpi_find_processor_node(table, acpi_cpu_id); - if (cpu_node == NULL || !cpu_node->parent) { - retval = -ENOENT; - goto put_table; - } + if (!cpu_node || !cpu_node->parent) + return -ENOENT; is_thread = cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD; cluster_node = fetch_pptt_node(table, cpu_node->parent); - if (cluster_node == NULL) { - retval = -ENOENT; - goto put_table; - } + if (!cluster_node) + return -ENOENT; + if (is_thread) { - if (!cluster_node->parent) { - retval = -ENOENT; - goto put_table; - } + if (!cluster_node->parent) + return -ENOENT; + cluster_node = fetch_pptt_node(table, cluster_node->parent); - if (cluster_node == NULL) { - retval = -ENOENT; - goto put_table; - } + if (!cluster_node) + return -ENOENT; } if (cluster_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID) retval = cluster_node->acpi_processor_id; else retval = ACPI_PTR_DIFF(cluster_node, table); -put_table: - acpi_put_table(table); - return retval; } diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 579c851a2bd7..0424b59b695e 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -7,6 +7,7 @@ */ #include <linux/acpi.h> +#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/device.h> @@ -496,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node) } static int __init parse_core(struct device_node *core, int package_id, - int core_id) + int cluster_id, int core_id) { char name[20]; bool leaf = true; @@ -512,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id, cpu = get_cpu_for_node(t); if (cpu >= 0) { cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else if (cpu != -ENODEV) { @@ -533,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id, } cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } else if (leaf && cpu != -ENODEV) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -542,13 +545,13 @@ static int __init parse_core(struct device_node *core, int package_id, return 0; } -static int __init parse_cluster(struct device_node *cluster, int depth) +static int __init parse_cluster(struct device_node *cluster, int package_id, + int cluster_id, int depth) { char name[20]; bool leaf = true; bool has_cores = false; struct device_node *c; - static int package_id __initdata; int core_id = 0; int i, ret; @@ -563,7 +566,9 @@ static int __init parse_cluster(struct device_node *cluster, int depth) c = of_get_child_by_name(cluster, name); if (c) { leaf = false; - ret = parse_cluster(c, depth + 1); + ret = parse_cluster(c, package_id, i, depth + 1); + if (depth > 0) + pr_warn("Topology for clusters of clusters not yet supported\n"); of_node_put(c); if (ret != 0) return ret; @@ -587,7 +592,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, package_id, core_id++); + ret = parse_core(c, package_id, cluster_id, + core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -604,10 +610,33 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); - if (leaf) + return 0; +} + +static int __init parse_socket(struct device_node *socket) +{ + char name[20]; + struct device_node *c; + bool has_socket = false; + int package_id = 0, ret; + + do { + snprintf(name, sizeof(name), "socket%d", package_id); + c = of_get_child_by_name(socket, name); + if (c) { + has_socket = true; + ret = parse_cluster(c, package_id, -1, 0); + of_node_put(c); + if (ret != 0) + return ret; + } package_id++; + } while (c); - return 0; + if (!has_socket) + ret = parse_cluster(socket, 0, -1, 0); + + return ret; } static int __init parse_dt_topology(void) @@ -630,7 +659,7 @@ static int __init parse_dt_topology(void) if (!map) goto out; - ret = parse_cluster(map, 0); + ret = parse_socket(map); if (ret != 0) goto out_map; @@ -641,8 +670,10 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].package_id == -1) + if (cpu_topology[cpu].package_id < 0) { ret = -EINVAL; + break; + } out_map: of_node_put(map); @@ -667,7 +698,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) /* not numa in package, lets use the package siblings */ core_mask = &cpu_topology[cpu].core_sibling; } - if (cpu_topology[cpu].llc_id != -1) { + + if (last_level_cache_is_valid(cpu)) { if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) core_mask = &cpu_topology[cpu].llc_sibling; } @@ -686,19 +718,31 @@ const struct cpumask *cpu_coregroup_mask(int cpu) const struct cpumask *cpu_clustergroup_mask(int cpu) { + /* + * Forbid cpu_clustergroup_mask() to span more or the same CPUs as + * cpu_coregroup_mask(). + */ + if (cpumask_subset(cpu_coregroup_mask(cpu), + &cpu_topology[cpu].cluster_sibling)) + return get_cpu_mask(cpu); + return &cpu_topology[cpu].cluster_sibling; } void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; - int cpu; + int cpu, ret; + + ret = detect_cache_attributes(cpuid); + if (ret) + pr_info("Early cacheinfo failed, ret = %d\n", ret); /* update core and thread sibling masks */ for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { + if (last_level_cache_is_shared(cpu, cpuid)) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } @@ -706,15 +750,17 @@ void update_siblings_masks(unsigned int cpuid) if (cpuid_topo->package_id != cpu_topo->package_id) continue; - if (cpuid_topo->cluster_id == cpu_topo->cluster_id && - cpuid_topo->cluster_id != -1) { + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + if (cpuid_topo->cluster_id >= 0) { cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling); cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling); } - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); - if (cpuid_topo->core_id != cpu_topo->core_id) continue; @@ -750,7 +796,6 @@ void __init reset_cpu_topology(void) cpu_topo->core_id = -1; cpu_topo->cluster_id = -1; cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; clear_cpu_topology(cpu); } @@ -780,15 +825,20 @@ __weak int __init parse_acpi_topology(void) #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) void __init init_cpu_topology(void) { + int ret; + reset_cpu_topology(); + ret = parse_acpi_topology(); + if (!ret) + ret = of_have_populated_dt() && parse_dt_topology(); - /* - * Discard anything that was parsed if we hit an error so we - * don't use partial information. - */ - if (parse_acpi_topology()) - reset_cpu_topology(); - else if (of_have_populated_dt() && parse_dt_topology()) + if (ret) { + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ reset_cpu_topology(); + return; + } } #endif diff --git a/drivers/base/base.h b/drivers/base/base.h index ab71403d102f..b3a43a164dcd 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -160,6 +160,7 @@ extern int devres_release_all(struct device *dev); extern void device_block_probing(void); extern void device_unblock_probing(void); extern void deferred_probe_extend_timeout(void); +extern void driver_deferred_probe_trigger(void); /* /sys/devices directory */ extern struct kset *devices_kset; diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index dad296229161..4b5cd08c5a65 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -14,7 +14,7 @@ #include <linux/cpu.h> #include <linux/device.h> #include <linux/init.h> -#include <linux/of.h> +#include <linux/of_device.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/smp.h> @@ -25,19 +25,60 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo); #define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu)) #define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves) #define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list) +#define per_cpu_cacheinfo_idx(cpu, idx) \ + (per_cpu_cacheinfo(cpu) + (idx)) struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) { return ci_cacheinfo(cpu); } -#ifdef CONFIG_OF static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { + /* + * For non DT/ACPI systems, assume unique level 1 caches, + * system-wide shared caches for all other levels. This will be used + * only if arch specific code has not populated shared_cpu_map + */ + if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI))) + return !(this_leaf->level == 1); + + if ((sib_leaf->attributes & CACHE_ID) && + (this_leaf->attributes & CACHE_ID)) + return sib_leaf->id == this_leaf->id; + return sib_leaf->fw_token == this_leaf->fw_token; } +bool last_level_cache_is_valid(unsigned int cpu) +{ + struct cacheinfo *llc; + + if (!cache_leaves(cpu)) + return false; + + llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); + + return (llc->attributes & CACHE_ID) || !!llc->fw_token; + +} + +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y) +{ + struct cacheinfo *llc_x, *llc_y; + + if (!last_level_cache_is_valid(cpu_x) || + !last_level_cache_is_valid(cpu_y)) + return false; + + llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1); + llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1); + + return cache_leaves_are_shared(llc_x, llc_y); +} + +#ifdef CONFIG_OF /* OF properties to query for a given cache type */ struct cache_type_info { const char *size_prop; @@ -157,27 +198,16 @@ static int cache_setup_of_node(unsigned int cpu) { struct device_node *np; struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if fw_token is already populated */ - if (this_cpu_ci->info_list->fw_token) { - return 0; - } - - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; + np = of_cpu_device_node_get(cpu); if (!np) { pr_err("Failed to find cpu%d device node\n", cpu); return -ENOENT; } while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); if (this_leaf->level != 1) np = of_find_next_cache_node(np); else @@ -196,16 +226,6 @@ static int cache_setup_of_node(unsigned int cpu) } #else static inline int cache_setup_of_node(unsigned int cpu) { return 0; } -static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, - struct cacheinfo *sib_leaf) -{ - /* - * For non-DT/ACPI systems, assume unique level 1 caches, system-wide - * shared caches for all other levels. This will be used only if - * arch specific code has not populated shared_cpu_map - */ - return !(this_leaf->level == 1); -} #endif int __weak cache_setup_acpi(unsigned int cpu) @@ -215,6 +235,18 @@ int __weak cache_setup_acpi(unsigned int cpu) unsigned int coherency_max_size; +static int cache_setup_properties(unsigned int cpu) +{ + int ret = 0; + + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + ret = cache_setup_acpi(cpu); + + return ret; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -225,21 +257,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (this_cpu_ci->cpu_map_populated) return 0; - if (of_have_populated_dt()) - ret = cache_setup_of_node(cpu); - else if (!acpi_disabled) - ret = cache_setup_acpi(cpu); - - if (ret) - return ret; + /* + * skip setting up cache properties if LLC is valid, just need + * to update the shared cpu_map if the cache attributes were + * populated early before all the cpus are brought online + */ + if (!last_level_cache_is_valid(cpu)) { + ret = cache_setup_properties(cpu); + if (ret) + return ret; + } for (index = 0; index < cache_leaves(cpu); index++) { unsigned int i; - this_leaf = this_cpu_ci->info_list + index; - /* skip if shared_cpu_map is already populated */ - if (!cpumask_empty(&this_leaf->shared_cpu_map)) - continue; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); for_each_online_cpu(i) { @@ -247,7 +279,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (i == cpu || !sib_cpu_ci->info_list) continue;/* skip if itself or no cacheinfo */ - sib_leaf = sib_cpu_ci->info_list + index; + + sib_leaf = per_cpu_cacheinfo_idx(i, index); if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); @@ -263,23 +296,19 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) static void cache_shared_cpu_map_remove(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int sibling, index; for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; + this_leaf = per_cpu_cacheinfo_idx(cpu, index); for_each_cpu(sibling, &this_leaf->shared_cpu_map) { - struct cpu_cacheinfo *sib_cpu_ci; - - if (sibling == cpu) /* skip itself */ - continue; + struct cpu_cacheinfo *sib_cpu_ci = + get_cpu_cacheinfo(sibling); - sib_cpu_ci = get_cpu_cacheinfo(sibling); - if (!sib_cpu_ci->info_list) - continue; + if (sibling == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ - sib_leaf = sib_cpu_ci->info_list + index; + sib_leaf = per_cpu_cacheinfo_idx(sibling, index); cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } @@ -310,17 +339,28 @@ int __weak populate_cache_leaves(unsigned int cpu) return -ENOENT; } -static int detect_cache_attributes(unsigned int cpu) +int detect_cache_attributes(unsigned int cpu) { int ret; + /* Since early detection of the cacheinfo is allowed via this + * function and this also gets called as CPU hotplug callbacks via + * cacheinfo_cpu_online, the initialisation can be skipped and only + * CPU maps can be updated as the CPU online status would be update + * if called via cacheinfo_cpu_online path. + */ + if (per_cpu_cacheinfo(cpu)) + goto update_cpu_map; + if (init_cache_level(cpu) || !cache_leaves(cpu)) return -ENOENT; per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), - sizeof(struct cacheinfo), GFP_KERNEL); - if (per_cpu_cacheinfo(cpu) == NULL) + sizeof(struct cacheinfo), GFP_ATOMIC); + if (per_cpu_cacheinfo(cpu) == NULL) { + cache_leaves(cpu) = 0; return -ENOMEM; + } /* * populate_cache_leaves() may completely setup the cache leaves and @@ -329,6 +369,8 @@ static int detect_cache_attributes(unsigned int cpu) ret = populate_cache_leaves(cpu); if (ret) goto free_ci; + +update_cpu_map: /* * For systems using DT for cache hierarchy, fw_token * and shared_cpu_map will be set up here only if they are @@ -614,7 +656,6 @@ static int cache_add_dev(unsigned int cpu) int rc; struct device *ci_dev, *parent; struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); const struct attribute_group **cache_groups; rc = cpu_cache_sysfs_init(cpu); @@ -623,7 +664,7 @@ static int cache_add_dev(unsigned int cpu) parent = per_cpu_cache_dev(cpu); for (i = 0; i < cache_leaves(cpu); i++) { - this_leaf = this_cpu_ci->info_list + i; + this_leaf = per_cpu_cacheinfo_idx(cpu, i); if (this_leaf->disable_sysfs) continue; if (this_leaf->type == CACHE_TYPE_NOCACHE) diff --git a/drivers/base/core.c b/drivers/base/core.c index 460d6f163e41..753e7cca0f40 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -54,6 +54,7 @@ static unsigned int defer_sync_state_count = 1; static DEFINE_MUTEX(fwnode_link_lock); static bool fw_devlink_is_permissive(void); static bool fw_devlink_drv_reg_done; +static bool fw_devlink_best_effort; /** * fwnode_link_add - Create a link between two fwnode_handles. @@ -976,6 +977,12 @@ static void device_links_missing_supplier(struct device *dev) } } +static bool dev_is_best_effort(struct device *dev) +{ + return (fw_devlink_best_effort && dev->can_match) || + (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT)); +} + /** * device_links_check_suppliers - Check presence of supplier drivers. * @dev: Consumer device. @@ -995,7 +1002,7 @@ static void device_links_missing_supplier(struct device *dev) int device_links_check_suppliers(struct device *dev) { struct device_link *link; - int ret = 0; + int ret = 0, fwnode_ret = 0; struct fwnode_handle *sup_fw; /* @@ -1008,12 +1015,17 @@ int device_links_check_suppliers(struct device *dev) sup_fw = list_first_entry(&dev->fwnode->suppliers, struct fwnode_link, c_hook)->supplier; - dev_err_probe(dev, -EPROBE_DEFER, "wait for supplier %pfwP\n", - sup_fw); - mutex_unlock(&fwnode_link_lock); - return -EPROBE_DEFER; + if (!dev_is_best_effort(dev)) { + fwnode_ret = -EPROBE_DEFER; + dev_err_probe(dev, -EPROBE_DEFER, + "wait for supplier %pfwP\n", sup_fw); + } else { + fwnode_ret = -EAGAIN; + } } mutex_unlock(&fwnode_link_lock); + if (fwnode_ret == -EPROBE_DEFER) + return fwnode_ret; device_links_write_lock(); @@ -1023,6 +1035,14 @@ int device_links_check_suppliers(struct device *dev) if (link->status != DL_STATE_AVAILABLE && !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { + + if (dev_is_best_effort(dev) && + link->flags & DL_FLAG_INFERRED && + !link->supplier->can_match) { + ret = -EAGAIN; + continue; + } + device_links_missing_supplier(dev); dev_err_probe(dev, -EPROBE_DEFER, "supplier %s not ready\n", @@ -1035,7 +1055,8 @@ int device_links_check_suppliers(struct device *dev) dev->links.status = DL_DEV_PROBING; device_links_write_unlock(); - return ret; + + return ret ? ret : fwnode_ret; } /** @@ -1300,6 +1321,18 @@ void device_links_driver_bound(struct device *dev) * save to drop the managed link completely. */ device_link_drop_managed(link); + } else if (dev_is_best_effort(dev) && + link->flags & DL_FLAG_INFERRED && + link->status != DL_STATE_CONSUMER_PROBE && + !link->supplier->can_match) { + /* + * When dev_is_best_effort() is true, we ignore device + * links to suppliers that don't have a driver. If the + * consumer device still managed to probe, there's no + * point in maintaining a device link in a weird state + * (consumer probed before supplier). So delete it. + */ + device_link_drop_managed(link); } else { WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); WRITE_ONCE(link->status, DL_STATE_ACTIVE); @@ -1592,7 +1625,7 @@ static int __init fw_devlink_setup(char *arg) } early_param("fw_devlink", fw_devlink_setup); -static bool fw_devlink_strict; +static bool fw_devlink_strict = true; static int __init fw_devlink_strict_setup(char *arg) { return strtobool(arg, &fw_devlink_strict); @@ -1666,6 +1699,62 @@ void fw_devlink_drivers_done(void) device_links_write_unlock(); } +/** + * wait_for_init_devices_probe - Try to probe any device needed for init + * + * Some devices might need to be probed and bound successfully before the kernel + * boot sequence can finish and move on to init/userspace. For example, a + * network interface might need to be bound to be able to mount a NFS rootfs. + * + * With fw_devlink=on by default, some of these devices might be blocked from + * probing because they are waiting on a optional supplier that doesn't have a + * driver. While fw_devlink will eventually identify such devices and unblock + * the probing automatically, it might be too late by the time it unblocks the + * probing of devices. For example, the IP4 autoconfig might timeout before + * fw_devlink unblocks probing of the network interface. + * + * This function is available to temporarily try and probe all devices that have + * a driver even if some of their suppliers haven't been added or don't have + * drivers. + * + * The drivers can then decide which of the suppliers are optional vs mandatory + * and probe the device if possible. By the time this function returns, all such + * "best effort" probes are guaranteed to be completed. If a device successfully + * probes in this mode, we delete all fw_devlink discovered dependencies of that + * device where the supplier hasn't yet probed successfully because they have to + * be optional dependencies. + * + * Any devices that didn't successfully probe go back to being treated as if + * this function was never called. + * + * This also means that some devices that aren't needed for init and could have + * waited for their optional supplier to probe (when the supplier's module is + * loaded later on) would end up probing prematurely with limited functionality. + * So call this function only when boot would fail without it. + */ +void __init wait_for_init_devices_probe(void) +{ + if (!fw_devlink_flags || fw_devlink_is_permissive()) + return; + + /* + * Wait for all ongoing probes to finish so that the "best effort" is + * only applied to devices that can't probe otherwise. + */ + wait_for_device_probe(); + + pr_info("Trying to probe devices needed for running init ...\n"); + fw_devlink_best_effort = true; + driver_deferred_probe_trigger(); + + /* + * Wait for all "best effort" probes to finish before going back to + * normal enforcement. + */ + wait_for_device_probe(); + fw_devlink_best_effort = false; +} + static void fw_devlink_unblock_consumers(struct device *dev) { struct device_link *link; @@ -3843,6 +3932,26 @@ struct device *device_find_child_by_name(struct device *parent, } EXPORT_SYMBOL_GPL(device_find_child_by_name); +static int match_any(struct device *dev, void *unused) +{ + return 1; +} + +/** + * device_find_any_child - device iterator for locating a child device, if any. + * @parent: parent struct device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a child device, if any. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +struct device *device_find_any_child(struct device *parent) +{ + return device_find_child(parent, NULL, match_any); +} +EXPORT_SYMBOL_GPL(device_find_any_child); + int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 11b0fb6414d3..70f79fc71539 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -172,7 +172,7 @@ static bool driver_deferred_probe_enable; * changes in the midst of a probe, then deferred processing should be triggered * again. */ -static void driver_deferred_probe_trigger(void) +void driver_deferred_probe_trigger(void) { if (!driver_deferred_probe_enable) return; @@ -256,7 +256,12 @@ static int deferred_devs_show(struct seq_file *s, void *data) } DEFINE_SHOW_ATTRIBUTE(deferred_devs); +#ifdef CONFIG_MODULES +int driver_deferred_probe_timeout = 10; +#else int driver_deferred_probe_timeout; +#endif + EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout); static int __init deferred_probe_timeout_setup(char *str) @@ -269,42 +274,12 @@ static int __init deferred_probe_timeout_setup(char *str) } __setup("deferred_probe_timeout=", deferred_probe_timeout_setup); -/** - * driver_deferred_probe_check_state() - Check deferred probe state - * @dev: device to check - * - * Return: - * * -ENODEV if initcalls have completed and modules are disabled. - * * -ETIMEDOUT if the deferred probe timeout was set and has expired - * and modules are enabled. - * * -EPROBE_DEFER in other cases. - * - * Drivers or subsystems can opt-in to calling this function instead of directly - * returning -EPROBE_DEFER. - */ -int driver_deferred_probe_check_state(struct device *dev) -{ - if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done) { - dev_warn(dev, "ignoring dependency for device, assuming no driver\n"); - return -ENODEV; - } - - if (!driver_deferred_probe_timeout && initcalls_done) { - dev_warn(dev, "deferred probe timeout, ignoring dependency\n"); - return -ETIMEDOUT; - } - - return -EPROBE_DEFER; -} -EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); - static void deferred_probe_timeout_work_func(struct work_struct *work) { struct device_private *p; fw_devlink_drivers_done(); - driver_deferred_probe_timeout = 0; driver_deferred_probe_trigger(); flush_work(&deferred_probe_work); @@ -580,7 +555,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) { bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) && !drv->suppress_bind_attrs; - int ret; + int ret, link_ret; if (defer_all_probes) { /* @@ -592,9 +567,9 @@ static int really_probe(struct device *dev, struct device_driver *drv) return -EPROBE_DEFER; } - ret = device_links_check_suppliers(dev); - if (ret) - return ret; + link_ret = device_links_check_suppliers(dev); + if (link_ret == -EPROBE_DEFER) + return link_ret; pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); @@ -634,6 +609,15 @@ re_probe: ret = call_driver_probe(dev, drv); if (ret) { /* + * If fw_devlink_best_effort is active (denoted by -EAGAIN), the + * device might actually probe properly once some of its missing + * suppliers have probed. So, treat this as if the driver + * returned -EPROBE_DEFER. + */ + if (link_ret == -EAGAIN) + ret = -EPROBE_DEFER; + + /* * Return probe errors as positive values so that the callers * can distinguish them from other errors. */ @@ -1115,6 +1099,7 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie) static int __driver_attach(struct device *dev, void *data) { struct device_driver *drv = data; + bool async = false; int ret; /* @@ -1153,9 +1138,11 @@ static int __driver_attach(struct device *dev, void *data) if (!dev->driver && !dev->p->async_driver) { get_device(dev); dev->p->async_driver = drv; - async_schedule_dev(__driver_attach_async_helper, dev); + async = true; } device_unlock(dev); + if (async) + async_schedule_dev(__driver_attach_async_helper, dev); return 0; } diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 8a3ddbae3b70..e4bffeabf344 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -482,6 +482,7 @@ int __init devtmpfs_init(void) if (err) { printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err); unregister_filesystem(&dev_fs_type); + thread = NULL; return err; } diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index ac3f34e80194..7c3590fd97c2 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -435,11 +435,11 @@ static int fw_decompress_xz_pages(struct device *dev, struct fw_priv *fw_priv, /* decompress onto the new allocated page */ page = fw_priv->pages[fw_priv->nr_pages - 1]; - xz_buf.out = kmap(page); + xz_buf.out = kmap_local_page(page); xz_buf.out_pos = 0; xz_buf.out_size = PAGE_SIZE; xz_ret = xz_dec_run(xz_dec, &xz_buf); - kunmap(page); + kunmap_local(xz_buf.out); fw_priv->size += xz_buf.out_pos; /* partial decompression means either end or error */ if (xz_buf.out_pos != PAGE_SIZE) diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c index 5b0b85b70b6f..77bad32c481a 100644 --- a/drivers/base/firmware_loader/sysfs.c +++ b/drivers/base/firmware_loader/sysfs.c @@ -242,19 +242,17 @@ static void firmware_rw(struct fw_priv *fw_priv, char *buffer, loff_t offset, size_t count, bool read) { while (count) { - void *page_data; int page_nr = offset >> PAGE_SHIFT; int page_ofs = offset & (PAGE_SIZE - 1); int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count); - page_data = kmap(fw_priv->pages[page_nr]); - if (read) - memcpy(buffer, page_data + page_ofs, page_cnt); + memcpy_from_page(buffer, fw_priv->pages[page_nr], + page_ofs, page_cnt); else - memcpy(page_data + page_ofs, buffer, page_cnt); + memcpy_to_page(fw_priv->pages[page_nr], page_ofs, + buffer, page_cnt); - kunmap(fw_priv->pages[page_nr]); buffer += page_cnt; offset += page_cnt; count -= page_cnt; diff --git a/drivers/base/node.c b/drivers/base/node.c index 0ac6376ef7a1..eb0f43784c2b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpumap, 0); +static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, struct bin_attribute *attr, char *buf, @@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpulist, 0); +static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); /** * struct node_access_nodes - Access class device to hold user visible diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 55a10e6d4e2a..5a2e0232862e 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2733,7 +2733,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, mutex_unlock(&gpd_list_lock); dev_dbg(dev, "%s() failed to find PM domain: %ld\n", __func__, PTR_ERR(pd)); - return driver_deferred_probe_check_state(base_dev); + return -ENODEV; } dev_dbg(dev, "adding to PM domain %s\n", pd->name); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index ac6ad9ab67f9..89f98be5c5b9 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx"); static DEVICE_ATTR_ADMIN_RO(ppin); define_siblings_read_func(thread_siblings, sibling_cpumask); -static BIN_ATTR_RO(thread_siblings, 0); -static BIN_ATTR_RO(thread_siblings_list, 0); +static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_cpus, sibling_cpumask); -static BIN_ATTR_RO(core_cpus, 0); -static BIN_ATTR_RO(core_cpus_list, 0); +static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_siblings, core_cpumask); -static BIN_ATTR_RO(core_siblings, 0); -static BIN_ATTR_RO(core_siblings_list, 0); +static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_CLUSTER_SYSFS define_siblings_read_func(cluster_cpus, cluster_cpumask); -static BIN_ATTR_RO(cluster_cpus, 0); -static BIN_ATTR_RO(cluster_cpus_list, 0); +static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DIE_SYSFS define_siblings_read_func(die_cpus, die_cpumask); -static BIN_ATTR_RO(die_cpus, 0); -static BIN_ATTR_RO(die_cpus_list, 0); +static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES); #endif define_siblings_read_func(package_cpus, core_cpumask); -static BIN_ATTR_RO(package_cpus, 0); -static BIN_ATTR_RO(package_cpus_list, 0); +static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_BOOK_SYSFS define_id_show_func(book_id, "%d"); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); -static BIN_ATTR_RO(book_siblings, 0); -static BIN_ATTR_RO(book_siblings_list, 0); +static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DRAWER_SYSFS define_id_show_func(drawer_id, "%d"); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); -static BIN_ATTR_RO(drawer_siblings, 0); -static BIN_ATTR_RO(drawer_siblings_list, 0); +static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES); #endif static struct bin_attribute *bin_attrs[] = { diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 5696314ae69e..41f4eb005219 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -40,7 +40,7 @@ static int of_iommu_xlate(struct device *dev, * a proper probe-ordering dependency mechanism in future. */ if (!ops) - return driver_deferred_probe_check_state(dev); + return -ENODEV; if (!try_module_get(ops->owner)) return -ENODEV; diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1c1584fca632..3e79c2c51929 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -47,9 +47,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, * just fall back to poll mode */ if (rc == -EPROBE_DEFER) - rc = driver_deferred_probe_check_state(&phy->mdio.dev); - if (rc == -EPROBE_DEFER) - return rc; + rc = -ENODEV; if (rc > 0) { phy->irq = rc; diff --git a/drivers/of/base.c b/drivers/of/base.c index d4f98c8469ed..a19cd0c73644 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1919,6 +1919,8 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) of_property_read_string(of_aliases, "stdout", &name); if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); + if (of_stdout) + of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT; } if (!of_aliases) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index 3fb238714718..ef898ee8ca6b 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -129,7 +129,7 @@ static int dt_to_map_one_config(struct pinctrl *p, np_pctldev = of_get_next_parent(np_pctldev); if (!np_pctldev || of_node_is_root(np_pctldev)) { of_node_put(np_pctldev); - ret = driver_deferred_probe_check_state(p->dev); + ret = -ENODEV; /* keep deferring if modules are enabled */ if (IS_ENABLED(CONFIG_MODULES) && !allow_default && ret < 0) ret = -EPROBE_DEFER; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 1c14d682ffed..8f97a3eacdea 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2687,11 +2687,6 @@ int spi_slave_abort(struct spi_device *spi) } EXPORT_SYMBOL_GPL(spi_slave_abort); -static int match_true(struct device *dev, void *data) -{ - return 1; -} - static ssize_t slave_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2699,7 +2694,7 @@ static ssize_t slave_show(struct device *dev, struct device_attribute *attr, dev); struct device *child; - child = device_find_child(&ctlr->dev, NULL, match_true); + child = device_find_any_child(&ctlr->dev); return sprintf(buf, "%s\n", child ? to_spi_device(child)->modalias : NULL); } @@ -2718,7 +2713,7 @@ static ssize_t slave_store(struct device *dev, struct device_attribute *attr, if (rc != 1 || !name[0]) return -EINVAL; - child = device_find_child(&ctlr->dev, NULL, match_true); + child = device_find_any_child(&ctlr->dev); if (child) { /* Remove registered slave */ device_unregister(child); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 6eca72cfa1f2..1cc88ba6de90 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1343,14 +1343,17 @@ static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; + /* Short-circuit if non-root @kn has already finished removal. */ + if (!kn) + return; + lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* - * Short-circuit if non-root @kn has already finished removal. * This is for kernfs_remove_self() which plays with active ref * after removal. */ - if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) + if (kn->parent && RB_EMPTY_NODE(&kn->rb)) return; pr_debug("kernfs %s: removing\n", kn->name); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e3abfa843879..b3ec34386b43 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -18,21 +18,8 @@ #include "kernfs-internal.h" -/* - * There's one kernfs_open_file for each open file and one kernfs_open_node - * for each kernfs_node with one or more open files. - * - * kernfs_node->attr.open points to kernfs_open_node. attr.open is - * protected by kernfs_open_node_lock. - * - * filp->private_data points to seq_file whose ->private points to - * kernfs_open_file. kernfs_open_files are chained at - * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. - */ -static DEFINE_SPINLOCK(kernfs_open_node_lock); -static DEFINE_MUTEX(kernfs_open_file_mutex); - struct kernfs_open_node { + struct rcu_head rcu_head; atomic_t event; wait_queue_head_t poll; struct list_head files; /* goes through kernfs_open_file.list */ @@ -51,6 +38,70 @@ struct kernfs_open_node { static DEFINE_SPINLOCK(kernfs_notify_lock); static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; +static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) +{ + int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); + + return &kernfs_locks->open_file_mutex[idx]; +} + +static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) +{ + struct mutex *lock; + + lock = kernfs_open_file_mutex_ptr(kn); + + mutex_lock(lock); + + return lock; +} + +/** + * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn. + * + * @of: associated kernfs_open_file instance. + * @kn: target kernfs_node. + * + * Fetch and return ->attr.open of @kn if @of->list is non empty. + * If @of->list is not empty we can safely assume that @of is on + * @kn->attr.open->files list and this guarantees that @kn->attr.open + * will not vanish i.e. dereferencing outside RCU read-side critical + * section is safe here. + * + * The caller needs to make sure that @of->list is not empty. + */ +static struct kernfs_open_node * +kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn) +{ + struct kernfs_open_node *on; + + on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list)); + + return on; +} + +/** + * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn + * + * @kn: target kernfs_node. + * + * Fetch and return ->attr.open of @kn when caller holds the + * kernfs_open_file_mutex_ptr(kn). + * + * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when + * the caller guarantees that this mutex is being held, other updaters can't + * change ->attr.open and this means that we can safely deref ->attr.open + * outside RCU read-side critical section. + * + * The caller needs to make sure that kernfs_open_file_mutex is held. + */ +static struct kernfs_open_node * +kernfs_deref_open_node_protected(struct kernfs_node *kn) +{ + return rcu_dereference_protected(kn->attr.open, + lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); +} + static struct kernfs_open_file *kernfs_of(struct file *file) { return ((struct seq_file *)file->private_data)->private; @@ -156,8 +207,12 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v) static int kernfs_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; + struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn); - of->event = atomic_read(&of->kn->attr.open->event); + if (!on) + return -EINVAL; + + of->event = atomic_read(&on->event); return of->kn->attr.ops->seq_show(sf, v); } @@ -180,6 +235,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); const struct kernfs_ops *ops; + struct kernfs_open_node *on; char *buf; buf = of->prealloc_buf; @@ -201,7 +257,15 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) goto out_free; } - of->event = atomic_read(&of->kn->attr.open->event); + on = kernfs_deref_open_node(of, of->kn); + if (!on) { + len = -EINVAL; + mutex_unlock(&of->mutex); + goto out_free; + } + + of->event = atomic_read(&on->event); + ops = kernfs_ops(of->kn); if (ops->read) len = ops->read(of, buf, len, iocb->ki_pos); @@ -243,7 +307,7 @@ static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) * There is no easy way for us to know if userspace is only doing a partial * write, so we don't support them. We expect the entire buffer to come on * the first write. Hint: if you're writing a value, first read the file, - * modify only the the value you're changing, then write entire buffer + * modify only the value you're changing, then write entire buffer * back. */ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) @@ -484,7 +548,6 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) * It is not possible to successfully wrap close. * So error if someone is trying to use close. */ - rc = -EINVAL; if (vma->vm_ops && vma->vm_ops->close) goto out_put; @@ -518,37 +581,31 @@ static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) { struct kernfs_open_node *on, *new_on = NULL; + struct mutex *mutex = NULL; - retry: - mutex_lock(&kernfs_open_file_mutex); - spin_lock_irq(&kernfs_open_node_lock); - - if (!kn->attr.open && new_on) { - kn->attr.open = new_on; - new_on = NULL; - } - - on = kn->attr.open; - if (on) - list_add_tail(&of->list, &on->files); - - spin_unlock_irq(&kernfs_open_node_lock); - mutex_unlock(&kernfs_open_file_mutex); + mutex = kernfs_open_file_mutex_lock(kn); + on = kernfs_deref_open_node_protected(kn); if (on) { - kfree(new_on); + list_add_tail(&of->list, &on->files); + mutex_unlock(mutex); return 0; + } else { + /* not there, initialize a new one */ + new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); + if (!new_on) { + mutex_unlock(mutex); + return -ENOMEM; + } + atomic_set(&new_on->event, 1); + init_waitqueue_head(&new_on->poll); + INIT_LIST_HEAD(&new_on->files); + list_add_tail(&of->list, &new_on->files); + rcu_assign_pointer(kn->attr.open, new_on); } + mutex_unlock(mutex); - /* not there, initialize a new one and retry */ - new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); - if (!new_on) - return -ENOMEM; - - atomic_set(&new_on->event, 1); - init_waitqueue_head(&new_on->poll); - INIT_LIST_HEAD(&new_on->files); - goto retry; + return 0; } /** @@ -567,24 +624,26 @@ static int kernfs_get_open_node(struct kernfs_node *kn, static void kernfs_unlink_open_file(struct kernfs_node *kn, struct kernfs_open_file *of) { - struct kernfs_open_node *on = kn->attr.open; - unsigned long flags; + struct kernfs_open_node *on; + struct mutex *mutex = NULL; - mutex_lock(&kernfs_open_file_mutex); - spin_lock_irqsave(&kernfs_open_node_lock, flags); + mutex = kernfs_open_file_mutex_lock(kn); + + on = kernfs_deref_open_node_protected(kn); + if (!on) { + mutex_unlock(mutex); + return; + } if (of) list_del(&of->list); - if (list_empty(&on->files)) - kn->attr.open = NULL; - else - on = NULL; - - spin_unlock_irqrestore(&kernfs_open_node_lock, flags); - mutex_unlock(&kernfs_open_file_mutex); + if (list_empty(&on->files)) { + rcu_assign_pointer(kn->attr.open, NULL); + kfree_rcu(on, rcu_head); + } - kfree(on); + mutex_unlock(mutex); } static int kernfs_fop_open(struct inode *inode, struct file *file) @@ -722,11 +781,11 @@ static void kernfs_release_file(struct kernfs_node *kn, /* * @of is guaranteed to have no other file operations in flight and * we just want to synchronize release and drain paths. - * @kernfs_open_file_mutex is enough. @of->mutex can't be used + * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used * here because drain path may be called from places which can * cause circular dependency. */ - lockdep_assert_held(&kernfs_open_file_mutex); + lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); if (!of->released) { /* @@ -743,11 +802,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp) { struct kernfs_node *kn = inode->i_private; struct kernfs_open_file *of = kernfs_of(filp); + struct mutex *mutex = NULL; if (kn->flags & KERNFS_HAS_RELEASE) { - mutex_lock(&kernfs_open_file_mutex); + mutex = kernfs_open_file_mutex_lock(kn); kernfs_release_file(kn, of); - mutex_unlock(&kernfs_open_file_mutex); + mutex_unlock(mutex); } kernfs_unlink_open_file(kn, of); @@ -762,6 +822,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; struct kernfs_open_file *of; + struct mutex *mutex = NULL; if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) return; @@ -771,20 +832,19 @@ void kernfs_drain_open_files(struct kernfs_node *kn) * ->attr.open at this point of time. This check allows early bail out * if ->attr.open is already NULL. kernfs_unlink_open_file makes * ->attr.open NULL only while holding kernfs_open_file_mutex so below - * check under kernfs_open_file_mutex will ensure bailing out if + * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if * ->attr.open became NULL while waiting for the mutex. */ - if (!kn->attr.open) + if (!rcu_access_pointer(kn->attr.open)) return; - mutex_lock(&kernfs_open_file_mutex); - if (!kn->attr.open) { - mutex_unlock(&kernfs_open_file_mutex); + mutex = kernfs_open_file_mutex_lock(kn); + on = kernfs_deref_open_node_protected(kn); + if (!on) { + mutex_unlock(mutex); return; } - on = kn->attr.open; - list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); @@ -795,7 +855,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn) kernfs_release_file(kn, of); } - mutex_unlock(&kernfs_open_file_mutex); + mutex_unlock(mutex); } /* @@ -815,7 +875,10 @@ void kernfs_drain_open_files(struct kernfs_node *kn) __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) { struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); - struct kernfs_open_node *on = kn->attr.open; + struct kernfs_open_node *on = kernfs_deref_open_node(of, kn); + + if (!on) + return EPOLLERR; poll_wait(of->file, &on->poll, wait); @@ -922,13 +985,13 @@ void kernfs_notify(struct kernfs_node *kn) return; /* kick poll immediately */ - spin_lock_irqsave(&kernfs_open_node_lock, flags); - on = kn->attr.open; + rcu_read_lock(); + on = rcu_dereference(kn->attr.open); if (on) { atomic_inc(&on->event); wake_up_interruptible(&on->poll); } - spin_unlock_irqrestore(&kernfs_open_node_lock, flags); + rcu_read_unlock(); /* schedule work to kick fsnotify */ spin_lock_irqsave(&kernfs_notify_lock, flags); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index eeaa779b929c..3ae214d02d44 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -164,4 +164,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn); */ extern const struct inode_operations kernfs_symlink_iops; +/* + * kernfs locks + */ +extern struct kernfs_global_locks *kernfs_locks; #endif /* __KERNFS_INTERNAL_H */ diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index cfa79715fc1a..d0859f72d2d6 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -20,6 +20,7 @@ #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; +struct kernfs_global_locks *kernfs_locks; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { @@ -387,6 +388,22 @@ void kernfs_kill_sb(struct super_block *sb) kfree(info); } +static void __init kernfs_mutex_init(void) +{ + int count; + + for (count = 0; count < NR_KERNFS_LOCKS; count++) + mutex_init(&kernfs_locks->open_file_mutex[count]); +} + +static void __init kernfs_lock_init(void) +{ + kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL); + WARN_ON(!kernfs_locks); + + kernfs_mutex_init(); +} + void __init kernfs_init(void) { kernfs_node_cache = kmem_cache_create("kernfs_node_cache", @@ -397,4 +414,6 @@ void __init kernfs_init(void) kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache", sizeof(struct kernfs_iattrs), 0, SLAB_PANIC, NULL); + + kernfs_lock_init(); } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7e7a33b6c8d7..f6d4539c3895 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1431,7 +1431,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); -int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1453,10 +1452,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } -static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ACPI_PCC diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 58cbe18d825c..a07b510e7dc5 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -68,7 +68,6 @@ struct cpu_topology { int core_id; int cluster_id; int package_id; - int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4ff37cb763ae..00b7a6ae8617 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -82,6 +82,9 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); +bool last_level_cache_is_valid(unsigned int cpu); +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe29ac7cc469..4592d0845941 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, [0] = 1UL \ } } +/* + * Provide a valid theoretical max size for cpumap and cpulist sysfs files + * to avoid breaking userspace which may allocate a buffer based on the size + * reported by e.g. fstat. + * + * for cpumap NR_CPUS * 9/32 - 1 should be an exact length. + * + * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up + * to 2 orders of magnitude larger than 8192. And then we divide by 2 to + * cover a worst-case of every other cpu being on one of two nodes for a + * very large NR_CPUS. + * + * Use PAGE_SIZE as a minimum for smaller configurations. + */ +#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \ + ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) +#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) + #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/device.h b/include/linux/device.h index dc941997795c..424b55df0272 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -905,6 +905,8 @@ struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); struct device *device_find_child_by_name(struct device *parent, const char *name); +struct device *device_find_any_child(struct device *parent); + int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 700453017e1c..7acaabde5396 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -129,6 +129,7 @@ extern struct device_driver *driver_find(const char *name, struct bus_type *bus); extern int driver_probe_done(void); extern void wait_for_device_probe(void); +void __init wait_for_init_devices_probe(void); /* sysfs interface for exporting driver attributes */ @@ -241,7 +242,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); -int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); /** diff --git a/include/linux/firmware/trusted_foundations.h b/include/linux/firmware/trusted_foundations.h index be5984bda592..931b6c5c72df 100644 --- a/include/linux/firmware/trusted_foundations.h +++ b/include/linux/firmware/trusted_foundations.h @@ -71,12 +71,16 @@ static inline void register_trusted_foundations( static inline void of_register_trusted_foundations(void) { + struct device_node *np = of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations"); + + if (!np) + return; + of_node_put(np); /* * If we find the target should enable TF but does not support it, * fail as the system won't be able to do much anyway */ - if (of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations")) - register_trusted_foundations(NULL); + register_trusted_foundations(NULL); } static inline bool trusted_foundations_registered(void) diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9a81c4410b9f..89b9bdfca925 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -27,11 +27,15 @@ struct device; * driver needs its child devices to be bound with * their respective drivers as soon as they are * added. + * BEST_EFFORT: The fwnode/device needs to probe early and might be missing some + * suppliers. Only enforce ordering with suppliers that have + * drivers. */ #define FWNODE_FLAG_LINKS_ADDED BIT(0) #define FWNODE_FLAG_NOT_DEVICE BIT(1) #define FWNODE_FLAG_INITIALIZED BIT(2) #define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) +#define FWNODE_FLAG_BEST_EFFORT BIT(4) struct fwnode_handle { struct fwnode_handle *secondary; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index e2ae15a6225e..367044d7708c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -18,6 +18,7 @@ #include <linux/uidgid.h> #include <linux/wait.h> #include <linux/rwsem.h> +#include <linux/cache.h> struct file; struct dentry; @@ -34,6 +35,62 @@ struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; +/* + * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash + * table of locks. + * Having a small hash table would impact scalability, since + * more and more kernfs_node objects will end up using same lock + * and having a very large hash table would waste memory. + * + * At the moment size of hash table of locks is being set based on + * the number of CPUs as follows: + * + * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS + * 1 1 2 + * 2-3 2 4 + * 4-7 4 16 + * 8-15 6 64 + * 16-31 8 256 + * 32 and more 10 1024 + * + * The above relation between NR_CPU and number of locks is based + * on some internal experimentation which involved booting qemu + * with different values of smp, performing some sysfs operations + * on all CPUs and observing how increase in number of locks impacts + * completion time of these sysfs operations on each CPU. + */ +#ifdef CONFIG_SMP +#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32))) +#else +#define NR_KERNFS_LOCK_BITS 1 +#endif + +#define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS) + +/* + * There's one kernfs_open_file for each open file and one kernfs_open_node + * for each kernfs_node with one or more open files. + * + * filp->private_data points to seq_file whose ->private points to + * kernfs_open_file. + * + * kernfs_open_files are chained at kernfs_open_node->files, which is + * protected by kernfs_global_locks.open_file_mutex[i]. + * + * To reduce possible contention in sysfs access, arising due to single + * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node + * object address as hash keys to get the index of these locks. + * + * Hashed mutexes are safe to use here because operations using these don't + * rely on global exclusion. + * + * In future we intend to replace other global locks with hashed ones as well. + * kernfs_global_locks acts as a holder for all such hash tables. + */ +struct kernfs_global_locks { + struct mutex open_file_mutex[NR_KERNFS_LOCKS]; +}; + enum kernfs_node_type { KERNFS_DIR = 0x0001, KERNFS_FILE = 0x0002, @@ -114,7 +171,7 @@ struct kernfs_elem_symlink { struct kernfs_elem_attr { const struct kernfs_ops *ops; - struct kernfs_open_node *open; + struct kernfs_open_node __rcu *open; loff_t size; struct kernfs_node *notify_next; /* for kernfs_notify() */ }; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 79a71eb96111..35cd8287642a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1560,7 +1560,7 @@ config DEBUG_KOBJECT_RELEASE help kobjects are reference counted objects. This means that their last reference count put is not predictable, and the kobject can - live on past the point at which a driver decides to drop it's + live on past the point at which a driver decides to drop its initial reference to the kobject gained on allocation. An example of this would be a struct device which has just been unregistered. diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f53a0f2453af..e90bc0aa85c7 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1434,6 +1434,7 @@ __be32 __init root_nfs_parse_addr(char *name) static int __init wait_for_devices(void) { int i; + bool try_init_devs = true; for (i = 0; i < DEVICE_WAIT_MAX; i++) { struct net_device *dev; @@ -1452,6 +1453,11 @@ static int __init wait_for_devices(void) rtnl_unlock(); if (found) return 0; + if (try_init_devs && + (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) { + try_init_devs = false; + wait_for_init_devices_probe(); + } ssleep(1); } return -ENODEV; |