aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/kernel/hw_breakpoint.c13
-rw-r--r--arch/arm/kernel/patch.c2
-rw-r--r--arch/arm/probes/kprobes/core.c3
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/kernel/insn.c5
-rw-r--r--arch/mips/kernel/jump_label.c2
-rw-r--r--arch/powerpc/include/asm/topology.h6
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c14
-rw-r--r--arch/powerpc/mm/numa.c22
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c7
-rw-r--r--arch/s390/kernel/jump_label.c2
-rw-r--r--arch/s390/kernel/kprobes.c4
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/sparc/kernel/jump_label.c2
-rw-r--r--arch/tile/kernel/jump_label.c2
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/events/intel/core.c11
-rw-r--r--arch/x86/events/intel/cqm.c16
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--drivers/acpi/processor_driver.c4
-rw-r--r--drivers/acpi/processor_throttling.c16
-rw-r--r--drivers/cpufreq/cpufreq.c21
-rw-r--r--drivers/hwtracing/coresight/coresight-etm3x.c20
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x.c20
-rw-r--r--drivers/pci/pci-driver.c47
-rw-r--r--include/linux/cpu.h36
-rw-r--r--include/linux/cpuhotplug.h38
-rw-r--r--include/linux/padata.h3
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/stop_machine.h26
-rw-r--r--kernel/cpu.c241
-rw-r--r--kernel/events/core.c106
-rw-r--r--kernel/jump_label.c20
-rw-r--r--kernel/kprobes.c59
-rw-r--r--kernel/padata.c43
-rw-r--r--kernel/stop_machine.c11
40 files changed, 474 insertions, 375 deletions
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index be3b3fbd382f..af2a7f1e3103 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1090,7 +1090,7 @@ static int __init arch_hw_breakpoint_init(void)
* driven low on this core and there isn't an architected way to
* determine that.
*/
- get_online_cpus();
+ cpus_read_lock();
register_undef_hook(&debug_reg_hook);
/*
@@ -1098,15 +1098,16 @@ static int __init arch_hw_breakpoint_init(void)
* assume that a halting debugger will leave the world in a nice state
* for us.
*/
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
- dbg_reset_online, NULL);
+ ret = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/hw_breakpoint:online",
+ dbg_reset_online, NULL);
unregister_undef_hook(&debug_reg_hook);
if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
core_num_brps = 0;
core_num_wrps = 0;
if (ret > 0)
- cpuhp_remove_state_nocalls(ret);
- put_online_cpus();
+ cpuhp_remove_state_nocalls_cpuslocked(ret);
+ cpus_read_unlock();
return 0;
}
@@ -1124,7 +1125,7 @@ static int __init arch_hw_breakpoint_init(void)
TRAP_HWBKPT, "watchpoint debug exception");
hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
TRAP_HWBKPT, "breakpoint debug exception");
- put_online_cpus();
+ cpus_read_unlock();
/* Register PM notifiers. */
pm_init();
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index 020560b2dcb7..a1a34722c655 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -124,5 +124,5 @@ void __kprobes patch_text(void *addr, unsigned int insn)
.insn = insn,
};
- stop_machine(patch_text_stop_machine, &patch, NULL);
+ stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
}
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index ad1f4e6a9e33..52d1cd14fda4 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -182,7 +182,8 @@ void __kprobes kprobes_remove_breakpoint(void *addr, unsigned int insn)
.addr = addr,
.insn = insn,
};
- stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
+ stop_machine_cpuslocked(__kprobes_remove_breakpoint, &p,
+ cpu_online_mask);
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 29cb2ca756f6..4214c38d016b 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -433,7 +433,6 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
s32 aarch64_insn_adrp_get_offset(u32 insn);
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index b884a926a632..cd872133e88e 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -255,6 +255,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
return ret;
}
+static
int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
{
struct aarch64_insn_patch patch = {
@@ -267,8 +268,8 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
if (cnt <= 0)
return -EINVAL;
- return stop_machine(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
+ return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+ cpu_online_mask);
}
int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index 3e586daa3a32..32e3168316cd 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -58,7 +58,6 @@ void arch_jump_label_transform(struct jump_entry *e,
insn.word = 0; /* nop */
}
- get_online_cpus();
mutex_lock(&text_mutex);
if (IS_ENABLED(CONFIG_CPU_MICROMIPS)) {
insn_p->halfword[0] = insn.word >> 16;
@@ -70,7 +69,6 @@ void arch_jump_label_transform(struct jump_entry *e,
(unsigned long)insn_p + sizeof(*insn_p));
mutex_unlock(&text_mutex);
- put_online_cpus();
}
#endif /* HAVE_JUMP_LABEL */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 329771559cbb..dc4e15937ccf 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,6 +43,7 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+extern int numa_update_cpu_topology(bool cpus_locked);
static inline int early_cpu_to_node(int cpu)
{
@@ -71,6 +72,11 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
int nid)
{
}
+
+static inline int numa_update_cpu_topology(bool cpus_locked)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 3650732639ed..0f0b1b2f3b60 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -283,7 +283,7 @@ static void prrn_work_fn(struct work_struct *work)
* the RTAS event.
*/
pseries_devicetree_update(-prrn_update_scope);
- arch_update_cpu_topology();
+ numa_update_cpu_topology(false);
}
static DECLARE_WORK(prrn_work, prrn_work_fn);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8d1a365b8edc..773b35d16a0b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3368,7 +3368,7 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
- get_online_cpus();
+ cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
@@ -3390,17 +3390,17 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- put_online_cpus();
+ cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
- cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- "ppc/kvm_book3s:prepare",
- kvmppc_set_host_core,
- kvmppc_clear_host_core);
- put_online_cpus();
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 371792e4418f..b95c584ce19d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data)
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
+ *
+ * cpus_locked says whether we already hold cpu_hotplug_lock.
*/
-int arch_update_cpu_topology(void)
+int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
@@ -1400,15 +1402,23 @@ int arch_update_cpu_topology(void)
if (!cpumask_weight(&updated_cpus))
goto out;
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_cpu_topology, &updates[0],
+ &updated_cpus);
+ else
+ stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
/*
* Update the numa-cpu lookup table with the new mappings, even for
* offline CPUs. It is best to perform this update from the stop-
* machine context.
*/
- stop_machine(update_lookup_table, &updates[0],
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
+ else
+ stop_machine(update_lookup_table, &updates[0],
+ cpumask_of(raw_smp_processor_id()));
for (ud = &updates[0]; ud; ud = ud->next) {
unregister_cpu_under_node(ud->cpu, ud->old_nid);
@@ -1426,6 +1436,12 @@ out:
return changed;
}
+int arch_update_cpu_topology(void)
+{
+ lockdep_assert_cpus_held();
+ return numa_update_cpu_topology(true);
+}
+
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 8c6119280c13..309876d699e9 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -348,7 +348,7 @@ static int set_subcores_per_core(int new_mode)
state->master = 0;
}
- get_online_cpus();
+ cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
@@ -356,9 +356,10 @@ static int set_subcores_per_core(int new_mode)
/* Ensure state is consistent before we call the other cpus */
mb();
- stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 6aa630a8d24f..262506cee4c3 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -93,7 +93,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
args.entry = entry;
args.type = type;
- stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+ stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3d6a99746454..6842e4501e2e 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -196,7 +196,7 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -204,7 +204,7 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index c3a52f9a69a0..192efdfac918 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -636,10 +636,10 @@ static void stp_work_fn(struct work_struct *work)
goto out_unlock;
memset(&stp_sync, 0, sizeof(stp_sync));
- get_online_cpus();
+ cpus_read_lock();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
- put_online_cpus();
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
if (!check_sync_clock())
/*
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 07933b9e9ce0..93adde1ac166 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -41,12 +41,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
val = 0x01000000;
}
- get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
#endif
diff --git a/arch/tile/kernel/jump_label.c b/arch/tile/kernel/jump_label.c
index 07802d586988..93931a46625b 100644
--- a/arch/tile/kernel/jump_label.c
+++ b/arch/tile/kernel/jump_label.c
@@ -45,14 +45,12 @@ static void __jump_label_transform(struct jump_entry *e,
void arch_jump_label_transform(struct jump_entry *e,
enum jump_label_type type)
{
- get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(e, type);
flush_icache_range(e->code, e->code + sizeof(tilegx_bundle_bits));
mutex_unlock(&text_mutex);
- put_online_cpus();
}
__init_or_module void arch_jump_label_transform_static(struct jump_entry *e,
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2de0dd73830a..ff1ea2fb9705 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2233,7 +2233,6 @@ void perf_check_microcode(void)
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 31acf2a98394..aa62437d1aa1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3425,12 +3425,10 @@ static void intel_snb_check_microcode(void)
int pebs_broken = 0;
int cpu;
- get_online_cpus();
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
- put_online_cpus();
if (pebs_broken == x86_pmu.pebs_broken)
return;
@@ -3503,7 +3501,9 @@ static bool check_msr(unsigned long msr, u64 mask)
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
+ cpus_read_lock();
intel_snb_check_microcode();
+ cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -4175,13 +4175,12 @@ static __init int fixup_ht_bug(void)
lockup_detector_resume();
- get_online_cpus();
+ cpus_read_lock();
- for_each_online_cpu(c) {
+ for_each_online_cpu(c)
free_excl_cntrs(c);
- }
- put_online_cpus();
+ cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 8c00dc09a5d2..2521f771f2f5 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -1682,7 +1682,7 @@ static int __init intel_cqm_init(void)
*
* Also, check that the scales match on all cpus.
*/
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1746,14 +1746,14 @@ static int __init intel_cqm_init(void)
* Setup the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.
*/
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
- "perf/x86/cqm:starting",
- intel_cqm_cpu_starting, NULL);
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "perf/x86/cqm:online",
- NULL, intel_cqm_cpu_exit);
-
+ cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_STARTING,
+ "perf/x86/cqm:starting",
+ intel_cqm_cpu_starting, NULL);
+ cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_ONLINE,
+ "perf/x86/cqm:online",
+ NULL, intel_cqm_cpu_exit);
out:
- put_online_cpus();
+ cpus_read_unlock();
if (ret) {
kfree(str);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 2bce84d91c2b..c5bb63be4ba1 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -807,10 +807,8 @@ void mtrr_save_state(void)
if (!mtrr_enabled())
return;
- get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
- put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index c37bd0f39c70..ab4f491da2a9 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -105,11 +105,9 @@ static void __jump_label_transform(struct jump_entry *entry,
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(entry, type, NULL, 0);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
static enum {
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 8697a82bd465..591d1dd3f04e 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -268,9 +268,9 @@ static int acpi_processor_start(struct device *dev)
return -ENODEV;
/* Protect against concurrent CPU hotplug operations */
- get_online_cpus();
+ cpu_hotplug_disable();
ret = __acpi_processor_start(device);
- put_online_cpus();
+ cpu_hotplug_enable();
return ret;
}
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 3de34633f7f9..7f9aff4b8d62 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -909,6 +909,13 @@ static long __acpi_processor_get_throttling(void *data)
return pr->throttling.acpi_processor_get_throttling(pr);
}
+static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
+{
+ if (direct || (is_percpu_thread() && cpu == smp_processor_id()))
+ return fn(arg);
+ return work_on_cpu(cpu, fn, arg);
+}
+
static int acpi_processor_get_throttling(struct acpi_processor *pr)
{
if (!pr)
@@ -926,7 +933,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
if (!cpu_online(pr->id))
return -ENODEV;
- return work_on_cpu(pr->id, __acpi_processor_get_throttling, pr);
+ return call_on_cpu(pr->id, __acpi_processor_get_throttling, pr, false);
}
static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
@@ -1076,13 +1083,6 @@ static long acpi_processor_throttling_fn(void *data)
arg->target_state, arg->force);
}
-static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
-{
- if (direct)
- return fn(arg);
- return work_on_cpu(cpu, fn, arg);
-}
-
static int __acpi_processor_set_throttling(struct acpi_processor *pr,
int state, bool force, bool direct)
{
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 26b643d57847..29c5b0cbad96 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -887,7 +887,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
@@ -895,7 +895,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
up_write(&policy->rwsem);
}
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -2441,7 +2441,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
pr_debug("trying to register driver %s\n", driver_data->name);
/* Protect against concurrent CPU online/offline. */
- get_online_cpus();
+ cpus_read_lock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
@@ -2474,9 +2474,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
goto err_if_unreg;
}
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
- cpuhp_cpufreq_online,
- cpuhp_cpufreq_offline);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "cpufreq:online",
+ cpuhp_cpufreq_online,
+ cpuhp_cpufreq_offline);
if (ret < 0)
goto err_if_unreg;
hp_online = ret;
@@ -2494,7 +2495,7 @@ err_null_driver:
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
out:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
@@ -2517,17 +2518,17 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
pr_debug("unregistering driver %s\n", driver->name);
/* Protect against concurrent cpu hotplug */
- get_online_cpus();
+ cpus_read_lock();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
- cpuhp_remove_state_nocalls(hp_online);
+ cpuhp_remove_state_nocalls_cpuslocked(hp_online);
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index a51b6b64ecdf..93ee8fc539be 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -587,7 +587,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
- get_online_cpus();
+ cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
@@ -597,7 +597,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
- put_online_cpus();
+ cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
@@ -795,7 +795,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
drvdata->cpu = pdata ? pdata->cpu : 0;
- get_online_cpus();
+ cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
@@ -803,17 +803,17 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
dev_err(dev, "ETM arch init failed\n");
if (!etm_count++) {
- cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight:starting",
- etm_starting_cpu, etm_dying_cpu);
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "arm/coresight:online",
- etm_online_cpu, NULL);
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight:starting",
+ etm_starting_cpu, etm_dying_cpu);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight:online",
+ etm_online_cpu, NULL);
if (ret < 0)
goto err_arch_supported;
hp_online = ret;
}
- put_online_cpus();
+ cpus_read_unlock();
if (etm_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index d1340fb4e457..532adc9dd32a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -371,7 +371,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
- get_online_cpus();
+ cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
@@ -381,7 +381,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
- put_online_cpus();
+ cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
@@ -982,7 +982,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
drvdata->cpu = pdata ? pdata->cpu : 0;
- get_online_cpus();
+ cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
@@ -990,18 +990,18 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
dev_err(dev, "ETM arch init failed\n");
if (!etm4_count++) {
- cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight4:starting",
- etm4_starting_cpu, etm4_dying_cpu);
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "arm/coresight4:online",
- etm4_online_cpu, NULL);
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight4:starting",
+ etm4_starting_cpu, etm4_dying_cpu);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight4:online",
+ etm4_online_cpu, NULL);
if (ret < 0)
goto err_arch_supported;
hp_online = ret;
}
- put_online_cpus();
+ cpus_read_unlock();
if (etm4_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 192e7b681b96..fe6be6382505 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -320,10 +320,19 @@ static long local_pci_probe(void *_ddi)
return 0;
}
+static bool pci_physfn_is_probed(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_IOV
+ return dev->is_virtfn && dev->physfn->is_probed;
+#else
+ return false;
+#endif
+}
+
static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
const struct pci_device_id *id)
{
- int error, node;
+ int error, node, cpu;
struct drv_dev_and_id ddi = { drv, dev, id };
/*
@@ -332,33 +341,27 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
* on the right node.
*/
node = dev_to_node(&dev->dev);
+ dev->is_probed = 1;
+
+ cpu_hotplug_disable();
/*
- * On NUMA systems, we are likely to call a PF probe function using
- * work_on_cpu(). If that probe calls pci_enable_sriov() (which
- * adds the VF devices via pci_bus_add_device()), we may re-enter
- * this function to call the VF probe function. Calling
- * work_on_cpu() again will cause a lockdep warning. Since VFs are
- * always on the same node as the PF, we can work around this by
- * avoiding work_on_cpu() when we're already on the correct node.
- *
- * Preemption is enabled, so it's theoretically unsafe to use
- * numa_node_id(), but even if we run the probe function on the
- * wrong node, it should be functionally correct.
+ * Prevent nesting work_on_cpu() for the case where a Virtual Function
+ * device is probed from work_on_cpu() of the Physical device.
*/
- if (node >= 0 && node != numa_node_id()) {
- int cpu;
-
- get_online_cpus();
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
+ pci_physfn_is_probed(dev))
+ cpu = nr_cpu_ids;
+ else
cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
- if (cpu < nr_cpu_ids)
- error = work_on_cpu(cpu, local_pci_probe, &ddi);
- else
- error = local_pci_probe(&ddi);
- put_online_cpus();
- } else
+
+ if (cpu < nr_cpu_ids)
+ error = work_on_cpu(cpu, local_pci_probe, &ddi);
+ else
error = local_pci_probe(&ddi);
+ dev->is_probed = 0;
+ cpu_hotplug_enable();
return error;
}
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f92081234afd..ca73bc1563f4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -99,26 +99,32 @@ static inline void cpu_maps_update_done(void)
extern struct bus_type cpu_subsys;
#ifdef CONFIG_HOTPLUG_CPU
-/* Stop CPUs going up and down. */
-
-extern void cpu_hotplug_begin(void);
-extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+extern void cpus_write_lock(void);
+extern void cpus_write_unlock(void);
+extern void cpus_read_lock(void);
+extern void cpus_read_unlock(void);
+extern void lockdep_assert_cpus_held(void);
extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
void clear_tasks_mm_cpumask(int cpu);
int cpu_down(unsigned int cpu);
-#else /* CONFIG_HOTPLUG_CPU */
-
-static inline void cpu_hotplug_begin(void) {}
-static inline void cpu_hotplug_done(void) {}
-#define get_online_cpus() do { } while (0)
-#define put_online_cpus() do { } while (0)
-#define cpu_hotplug_disable() do { } while (0)
-#define cpu_hotplug_enable() do { } while (0)
-#endif /* CONFIG_HOTPLUG_CPU */
+#else /* CONFIG_HOTPLUG_CPU */
+
+static inline void cpus_write_lock(void) { }
+static inline void cpus_write_unlock(void) { }
+static inline void cpus_read_lock(void) { }
+static inline void cpus_read_unlock(void) { }
+static inline void lockdep_assert_cpus_held(void) { }
+static inline void cpu_hotplug_disable(void) { }
+static inline void cpu_hotplug_enable(void) { }
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/* Wrappers which go away once all code is converted */
+static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
+static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
+static inline void get_online_cpus(void) { cpus_read_lock(); }
+static inline void put_online_cpus(void) { cpus_read_unlock(); }
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7f815d915977..b56573bf440d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -153,6 +153,11 @@ int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu), bool multi_instance);
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
+ bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance);
/**
* cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
* @state: The state for which the calls are installed
@@ -171,6 +176,15 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
+static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
+ teardown, false);
+}
+
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
* callbacks
@@ -191,6 +205,15 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
false);
}
+static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
+ teardown, false);
+}
+
/**
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
@@ -217,6 +240,8 @@ static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
bool invoke);
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node, bool invoke);
/**
* cpuhp_state_add_instance - Add an instance for a state and invoke startup
@@ -249,7 +274,15 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
return __cpuhp_state_add_instance(state, node, false);
}
+static inline int
+cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_add_instance_cpuslocked(state, node, false);
+}
+
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
/**
* cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
@@ -273,6 +306,11 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
__cpuhp_remove_state(state, false);
}
+static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
+{
+ __cpuhp_remove_state_cpuslocked(state, false);
+}
+
/**
* cpuhp_remove_multi_state - Remove hotplug multi state callback
* @state: The state for which the calls are removed
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 0f9e567d5e15..2f9c1f93b1ce 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -166,9 +166,6 @@ struct padata_instance {
extern struct padata_instance *padata_alloc_possible(
struct workqueue_struct *wq);
-extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask);
extern void padata_free(struct padata_instance *pinst);
extern int padata_do_parallel(struct padata_instance *pinst,
struct padata_priv *padata, int cb_cpu);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8039f9f0ca05..58f1ab06c4e8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -376,6 +376,7 @@ struct pci_dev {
unsigned int irq_managed:1;
unsigned int has_secondary_link:1;
unsigned int non_compliant_bars:1; /* broken BARs; ignore them */
+ unsigned int is_probed:1; /* device probing in progress */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..7d6aa29094b2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -801,6 +801,8 @@ struct perf_cpu_context {
struct list_head sched_cb_entry;
int sched_cb_usage;
+
+ int online;
};
struct perf_output_handle {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1f0f427e0292..9c4ca7433d9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1281,6 +1281,16 @@ extern struct pid *cad_pid;
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
+static inline bool is_percpu_thread(void)
+{
+#ifdef CONFIG_SMP
+ return (current->flags & PF_NO_SETAFFINITY) &&
+ (current->nr_cpus_allowed == 1);
+#else
+ return true;
+#endif
+}
+
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3cc9632dcc2a..3d60275e3ba9 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -116,15 +116,29 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
* @fn() runs.
*
* This can be thought of as a very heavy write lock, equivalent to
- * grabbing every spinlock in the kernel. */
+ * grabbing every spinlock in the kernel.
+ *
+ * Protects against CPU hotplug.
+ */
int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+/**
+ * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Same as above. Must be called from with in a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
-static inline int stop_machine(cpu_stop_fn_t fn, void *data,
- const struct cpumask *cpus)
+static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
{
unsigned long flags;
int ret;
@@ -134,6 +148,12 @@ static inline int stop_machine(cpu_stop_fn_t fn, void *data,
return ret;
}
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
+{
+ return stop_machine_cpuslocked(fn, data, cpus);
+}
+
static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b86b32ebb3b2..b03a32595cfe 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -27,6 +27,7 @@
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -65,6 +66,12 @@ struct cpuhp_cpu_state {
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+static struct lock_class_key cpuhp_state_key;
+static struct lockdep_map cpuhp_state_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+#endif
+
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
@@ -196,121 +203,41 @@ void cpu_maps_update_done(void)
mutex_unlock(&cpu_add_remove_lock);
}
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
-static struct {
- struct task_struct *active_writer;
- /* wait queue to wake up the active_writer */
- wait_queue_head_t wq;
- /* verifies that no writer will get active while readers are active */
- struct mutex lock;
- /*
- * Also blocks the new readers during
- * an ongoing cpu hotplug operation.
- */
- atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
- .active_writer = NULL,
- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
- .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
- lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
+DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
-
-void get_online_cpus(void)
+void cpus_read_lock(void)
{
- might_sleep();
- if (cpu_hotplug.active_writer == current)
- return;
- cpuhp_lock_acquire_read();
- mutex_lock(&cpu_hotplug.lock);
- atomic_inc(&cpu_hotplug.refcount);
- mutex_unlock(&cpu_hotplug.lock);
+ percpu_down_read(&cpu_hotplug_lock);
}
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_lock);
-void put_online_cpus(void)
+void cpus_read_unlock(void)
{
- int refcount;
-
- if (cpu_hotplug.active_writer == current)
- return;
-
- refcount = atomic_dec_return(&cpu_hotplug.refcount);
- if (WARN_ON(refcount < 0)) /* try to fix things up */
- atomic_inc(&cpu_hotplug.refcount);
-
- if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
- wake_up(&cpu_hotplug.wq);
-
- cpuhp_lock_release();
-
+ percpu_up_read(&cpu_hotplug_lock);
}
-EXPORT_SYMBOL_GPL(put_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_unlock);
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- * writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- * non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
-void cpu_hotplug_begin(void)
+void cpus_write_lock(void)
{
- DEFINE_WAIT(wait);
-
- cpu_hotplug.active_writer = current;
- cpuhp_lock_acquire();
+ percpu_down_write(&cpu_hotplug_lock);
+}
- for (;;) {
- mutex_lock(&cpu_hotplug.lock);
- prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
- if (likely(!atomic_read(&cpu_hotplug.refcount)))
- break;
- mutex_unlock(&cpu_hotplug.lock);
- schedule();
- }
- finish_wait(&cpu_hotplug.wq, &wait);
+void cpus_write_unlock(void)
+{
+ percpu_up_write(&cpu_hotplug_lock);
}
-void cpu_hotplug_done(void)
+void lockdep_assert_cpus_held(void)
{
- cpu_hotplug.active_writer = NULL;
- mutex_unlock(&cpu_hotplug.lock);
- cpuhp_lock_release();
+ percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
/*
@@ -344,8 +271,6 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
-/* Notifier wrappers for transitioning to state machine */
-
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -484,6 +409,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false;
+ lock_map_acquire(&cpuhp_state_lock_map);
/* Single callback invocation for [un]install ? */
if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
@@ -510,6 +436,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
else if (st->state > st->target)
ret = cpuhp_ap_offline(cpu, st);
}
+ lock_map_release(&cpuhp_state_lock_map);
st->result = ret;
complete(&st->done);
}
@@ -524,6 +451,9 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
if (!cpu_online(cpu))
return 0;
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
+
/*
* If we are up and running, use the hotplug thread. For early calls
* we invoke the thread function directly.
@@ -567,6 +497,8 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
enum cpuhp_state state = st->state;
trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
__cpuhp_kick_ap_work(st);
wait_for_completion(&st->done);
trace_cpuhp_exit(cpu, st->state, state, st->result);
@@ -630,30 +562,6 @@ void clear_tasks_mm_cpumask(int cpu)
rcu_read_unlock();
}
-static inline void check_for_tasks(int dead_cpu)
-{
- struct task_struct *g, *p;
-
- read_lock(&tasklist_lock);
- for_each_process_thread(g, p) {
- if (!p->on_rq)
- continue;
- /*
- * We do the check with unlocked task_rq(p)->lock.
- * Order the reading to do not warn about a task,
- * which was running on this cpu in the past, and
- * it's just been woken on another cpu.
- */
- rmb();
- if (task_cpu(p) != dead_cpu)
- continue;
-
- pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
- p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
- }
- read_unlock(&tasklist_lock);
-}
-
/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
@@ -701,7 +609,7 @@ static int takedown_cpu(unsigned int cpu)
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU refused to die */
irq_unlock_sparse();
@@ -773,7 +681,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu))
return -EINVAL;
- cpu_hotplug_begin();
+ cpus_write_lock();
cpuhp_tasks_frozen = tasks_frozen;
@@ -811,7 +719,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
}
out:
- cpu_hotplug_done();
+ cpus_write_unlock();
return ret;
}
@@ -893,7 +801,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
struct task_struct *idle;
int ret = 0;
- cpu_hotplug_begin();
+ cpus_write_lock();
if (!cpu_present(cpu)) {
ret = -EINVAL;
@@ -941,7 +849,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
target = min((int)target, CPUHP_BRINGUP_CPU);
ret = cpuhp_up_callbacks(cpu, st, target);
out:
- cpu_hotplug_done();
+ cpus_write_unlock();
return ret;
}
@@ -1418,18 +1326,20 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
}
}
-int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
- bool invoke)
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node,
+ bool invoke)
{
struct cpuhp_step *sp;
int cpu;
int ret;
+ lockdep_assert_cpus_held();
+
sp = cpuhp_get_step(state);
if (sp->multi_instance == false)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
@@ -1458,13 +1368,23 @@ add_node:
hlist_add_head(node, &sp->list);
unlock:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ return ret;
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ bool invoke)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
@@ -1473,25 +1393,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* @multi_instance: State is set up for multiple instances which get
* added afterwards.
*
+ * The caller needs to hold cpus read locked while calling this function.
* Returns:
* On success:
* Positive state number if @state is CPUHP_AP_ONLINE_DYN
* 0 for all other states
* On failure: proper (negative) error code
*/
-int __cpuhp_setup_state(enum cpuhp_state state,
- const char *name, bool invoke,
- int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu),
- bool multi_instance)
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
{
int cpu, ret = 0;
bool dynstate;
+ lockdep_assert_cpus_held();
+
if (cpuhp_cb_check(state) || !name)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1527,7 +1449,6 @@ int __cpuhp_setup_state(enum cpuhp_state state,
}
out:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
/*
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
* dynamically allocated state in case of success.
@@ -1536,6 +1457,22 @@ out:
return state;
return ret;
}
+EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+int __cpuhp_setup_state(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+ teardown, multi_instance);
+ cpus_read_unlock();
+ return ret;
+}
EXPORT_SYMBOL(__cpuhp_setup_state);
int __cpuhp_state_remove_instance(enum cpuhp_state state,
@@ -1549,7 +1486,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
if (!sp->multi_instance)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !cpuhp_get_teardown_cb(state))
@@ -1570,29 +1507,30 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
remove:
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
/**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
* @state: The state to remove
* @invoke: If true, the teardown function is invoked for cpus where
* cpu state >= @state
*
+ * The caller needs to hold cpus read locked while calling this function.
* The teardown callback is currently not allowed to fail. Think
* about module removal!
*/
-void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
{
struct cpuhp_step *sp = cpuhp_get_step(state);
int cpu;
BUG_ON(cpuhp_cb_check(state));
- get_online_cpus();
+ lockdep_assert_cpus_held();
mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
@@ -1620,7 +1558,14 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+ cpus_read_lock();
+ __cpuhp_remove_state_cpuslocked(state, invoke);
+ cpus_read_unlock();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
@@ -1689,7 +1634,7 @@ static struct attribute *cpuhp_cpu_attrs[] = {
NULL
};
-static struct attribute_group cpuhp_cpu_attr_group = {
+static const struct attribute_group cpuhp_cpu_attr_group = {
.attrs = cpuhp_cpu_attrs,
.name = "hotplug",
NULL
@@ -1721,7 +1666,7 @@ static struct attribute *cpuhp_cpu_root_attrs[] = {
NULL
};
-static struct attribute_group cpuhp_cpu_root_attr_group = {
+static const struct attribute_group cpuhp_cpu_root_attr_group = {
.attrs = cpuhp_cpu_root_attrs,
.name = "hotplug",
NULL
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bc63f8db1b0d..4d2c32f98482 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
+static cpumask_var_t perf_online_mask;
/*
* perf event paranoia level:
@@ -3807,14 +3808,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
- /*
- * We could be clever and allow to attach a event to an
- * offline CPU and activate it when the CPU comes up, but
- * that's for later.
- */
- if (!cpu_online(cpu))
- return ERR_PTR(-ENODEV);
-
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
@@ -7723,7 +7716,8 @@ static int swevent_hlist_get_cpu(int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
- if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+ if (!swevent_hlist_deref(swhash) &&
+ cpumask_test_cpu(cpu, perf_online_mask)) {
struct swevent_hlist *hlist;
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -7744,7 +7738,7 @@ static int swevent_hlist_get(void)
{
int err, cpu, failed_cpu;
- get_online_cpus();
+ mutex_lock(&pmus_lock);
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(cpu);
if (err) {
@@ -7752,8 +7746,7 @@ static int swevent_hlist_get(void)
goto fail;
}
}
- put_online_cpus();
-
+ mutex_unlock(&pmus_lock);
return 0;
fail:
for_each_possible_cpu(cpu) {
@@ -7761,8 +7754,7 @@ fail:
break;
swevent_hlist_put_cpu(cpu);
}
-
- put_online_cpus();
+ mutex_unlock(&pmus_lock);
return err;
}
@@ -8940,7 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
pmu->hrtimer_interval_ms = timer;
/* update all cpuctx for this PMU */
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -8949,7 +8941,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpu_function_call(cpu,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
- put_online_cpus();
+ cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
return count;
@@ -9079,6 +9071,7 @@ skip_type:
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
+ cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
__perf_mux_hrtimer_init(cpuctx, cpu);
}
@@ -9903,12 +9896,10 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}
- get_online_cpus();
-
if (task) {
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (err)
- goto err_cpus;
+ goto err_task;
/*
* Reuse ptrace permission checks for now.
@@ -10094,6 +10085,23 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_locked;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_locked;
+ }
+ }
+
+
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
@@ -10183,8 +10191,6 @@ SYSCALL_DEFINE5(perf_event_open,
put_task_struct(task);
}
- put_online_cpus();
-
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
@@ -10218,8 +10224,6 @@ err_alloc:
err_cred:
if (task)
mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
- put_online_cpus();
err_task:
if (task)
put_task_struct(task);
@@ -10274,6 +10278,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+ }
+
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
@@ -10941,6 +10960,8 @@ static void __init perf_event_init_all_cpus(void)
struct swevent_htable *swhash;
int cpu;
+ zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
@@ -10956,7 +10977,7 @@ static void __init perf_event_init_all_cpus(void)
}
}
-int perf_event_init_cpu(unsigned int cpu)
+void perf_swevent_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
@@ -10969,7 +10990,6 @@ int perf_event_init_cpu(unsigned int cpu)
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
- return 0;
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -10987,19 +11007,22 @@ static void __perf_event_exit_context(void *__info)
static void perf_event_exit_cpu_context(int cpu)
{
+ struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
- int idx;
- idx = srcu_read_lock(&pmus_srcu);
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+ mutex_lock(&pmus_lock);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+ cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
- srcu_read_unlock(&pmus_srcu, idx);
+ cpumask_clear_cpu(cpu, perf_online_mask);
+ mutex_unlock(&pmus_lock);
}
#else
@@ -11007,6 +11030,29 @@ static void perf_event_exit_cpu_context(int cpu) { }
#endif
+int perf_event_init_cpu(unsigned int cpu)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+
+ perf_swevent_init_cpu(cpu);
+
+ mutex_lock(&pmus_lock);
+ cpumask_set_cpu(cpu, perf_online_mask);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
+
+ mutex_lock(&ctx->mutex);
+ cpuctx->online = 1;
+ mutex_unlock(&ctx->mutex);
+ }
+ mutex_unlock(&pmus_lock);
+
+ return 0;
+}
+
int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 6c9cb208ac48..d11c506a6ac3 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -15,6 +15,7 @@
#include <linux/static_key.h>
#include <linux/jump_label_ratelimit.h>
#include <linux/bug.h>
+#include <linux/cpu.h>
#ifdef HAVE_JUMP_LABEL
@@ -124,6 +125,7 @@ void static_key_slow_inc(struct static_key *key)
return;
}
+ cpus_read_lock();
jump_label_lock();
if (atomic_read(&key->enabled) == 0) {
atomic_set(&key->enabled, -1);
@@ -133,12 +135,14 @@ void static_key_slow_inc(struct static_key *key)
atomic_inc(&key->enabled);
}
jump_label_unlock();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(static_key_slow_inc);
static void __static_key_slow_dec(struct static_key *key,
unsigned long rate_limit, struct delayed_work *work)
{
+ cpus_read_lock();
/*
* The negative count check is valid even when a negative
* key->enabled is in use by static_key_slow_inc(); a
@@ -149,6 +153,7 @@ static void __static_key_slow_dec(struct static_key *key,
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
WARN(atomic_read(&key->enabled) < 0,
"jump label: negative count!\n");
+ cpus_read_unlock();
return;
}
@@ -159,6 +164,7 @@ static void __static_key_slow_dec(struct static_key *key,
jump_label_update(key);
}
jump_label_unlock();
+ cpus_read_unlock();
}
static void jump_label_update_timeout(struct work_struct *work)
@@ -334,6 +340,7 @@ void __init jump_label_init(void)
if (static_key_initialized)
return;
+ cpus_read_lock();
jump_label_lock();
jump_label_sort_entries(iter_start, iter_stop);
@@ -353,6 +360,7 @@ void __init jump_label_init(void)
}
static_key_initialized = true;
jump_label_unlock();
+ cpus_read_unlock();
}
#ifdef CONFIG_MODULES
@@ -590,28 +598,28 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
struct module *mod = data;
int ret = 0;
+ cpus_read_lock();
+ jump_label_lock();
+
switch (val) {
case MODULE_STATE_COMING:
- jump_label_lock();
ret = jump_label_add_module(mod);
if (ret) {
WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n");
jump_label_del_module(mod);
}
- jump_label_unlock();
break;
case MODULE_STATE_GOING:
- jump_label_lock();
jump_label_del_module(mod);
- jump_label_unlock();
break;
case MODULE_STATE_LIVE:
- jump_label_lock();
jump_label_invalidate_module_init(mod);
- jump_label_unlock();
break;
}
+ jump_label_unlock();
+ cpus_read_unlock();
+
return notifier_from_errno(ret);
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index adfe3b4cfe05..6756d750b31b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
*/
static void do_optimize_kprobes(void)
{
- /* Optimization never be done when disarmed */
- if (kprobes_all_disarmed || !kprobes_allow_optimization ||
- list_empty(&optimizing_list))
- return;
-
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
* This combination can cause a deadlock (cpu-hotplug try to lock
* text_mutex but stop_machine can not be done because online_cpus
* has been changed)
- * To avoid this deadlock, we need to call get_online_cpus()
+ * To avoid this deadlock, caller must have locked cpu hotplug
* for preventing cpu-hotplug outside of text_mutex locking.
*/
- get_online_cpus();
+ lockdep_assert_cpus_held();
+
+ /* Optimization never be done when disarmed */
+ if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+ list_empty(&optimizing_list))
+ return;
+
mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/*
@@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
+ /* See comment in do_optimize_kprobes() */
+ lockdep_assert_cpus_held();
+
/* Unoptimization must be done anytime */
if (list_empty(&unoptimizing_list))
return;
- /* Ditto to do_optimize_kprobes */
- get_online_cpus();
mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
@@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
list_del_init(&op->list);
}
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/* Reclaim all kprobes on the free_list */
@@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
static void kprobe_optimizer(struct work_struct *work)
{
mutex_lock(&kprobe_mutex);
+ cpus_read_lock();
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
@@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work_struct *work)
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
@@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprobe *p)
/* Short cut to direct unoptimizing */
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{
- get_online_cpus();
+ lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
- put_online_cpus();
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
}
@@ -791,6 +792,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
return;
/* For preparing optimization, jump_label_text_reserved() is called */
+ cpus_read_lock();
jump_label_lock();
mutex_lock(&text_mutex);
@@ -812,6 +814,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
out:
mutex_unlock(&text_mutex);
jump_label_unlock();
+ cpus_read_unlock();
}
#ifdef CONFIG_SYSCTL
@@ -826,6 +829,7 @@ static void optimize_all_kprobes(void)
if (kprobes_allow_optimization)
goto out;
+ cpus_read_lock();
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
@@ -833,6 +837,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
+ cpus_read_unlock();
printk(KERN_INFO "Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
@@ -851,6 +856,7 @@ static void unoptimize_all_kprobes(void)
return;
}
+ cpus_read_lock();
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
@@ -859,6 +865,7 @@ static void unoptimize_all_kprobes(void)
unoptimize_kprobe(p, false);
}
}
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Wait for unoptimizing completion */
@@ -1010,14 +1017,11 @@ static void arm_kprobe(struct kprobe *kp)
arm_kprobe_ftrace(kp);
return;
}
- /*
- * Here, since __arm_kprobe() doesn't use stop_machine(),
- * this doesn't cause deadlock on text_mutex. So, we don't
- * need get_online_cpus().
- */
+ cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/* Disarm a kprobe with text_mutex */
@@ -1027,10 +1031,12 @@ static void disarm_kprobe(struct kprobe *kp, bool reopt)
disarm_kprobe_ftrace(kp);
return;
}
- /* Ditto */
+
+ cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/*
@@ -1298,13 +1304,10 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
int ret = 0;
struct kprobe *ap = orig_p;
+ cpus_read_lock();
+
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
- /*
- * Get online CPUs to avoid text_mutex deadlock.with stop machine,
- * which is invoked by unoptimize_kprobe() in add_new_kprobe()
- */
- get_online_cpus();
mutex_lock(&text_mutex);
if (!kprobe_aggrprobe(orig_p)) {
@@ -1352,8 +1355,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
out:
mutex_unlock(&text_mutex);
- put_online_cpus();
jump_label_unlock();
+ cpus_read_unlock();
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
@@ -1555,9 +1558,12 @@ int register_kprobe(struct kprobe *p)
goto out;
}
- mutex_lock(&text_mutex); /* Avoiding text modification */
+ cpus_read_lock();
+ /* Prevent text modification */
+ mutex_lock(&text_mutex);
ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
if (ret)
goto out;
@@ -1570,7 +1576,6 @@ int register_kprobe(struct kprobe *p)
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
-
out:
mutex_unlock(&kprobe_mutex);
diff --git a/kernel/padata.c b/kernel/padata.c
index ac8f1e524836..868f947166d7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -934,29 +934,18 @@ static struct kobj_type padata_attr_type = {
};
/**
- * padata_alloc_possible - Allocate and initialize padata instance.
- * Use the cpu_possible_mask for serial and
- * parallel workers.
- *
- * @wq: workqueue to use for the allocated padata instance
- */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
-{
- return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
-
-/**
* padata_alloc - allocate and initialize a padata instance and specify
* cpumasks for serial and parallel workers.
*
* @wq: workqueue to use for the allocated padata instance
* @pcpumask: cpumask that will be used for padata parallelization
* @cbcpumask: cpumask that will be used for padata serialization
+ *
+ * Must be called from a cpus_read_lock() protected region
*/
-struct padata_instance *padata_alloc(struct workqueue_struct *wq,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask)
+static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
{
struct padata_instance *pinst;
struct parallel_data *pd = NULL;
@@ -965,7 +954,6 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
if (!pinst)
goto err;
- get_online_cpus();
if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
goto err_free_inst;
if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
@@ -989,14 +977,12 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
pinst->flags = 0;
- put_online_cpus();
-
BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
kobject_init(&pinst->kobj, &padata_attr_type);
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
#endif
return pinst;
@@ -1005,12 +991,27 @@ err_free_masks:
free_cpumask_var(pinst->cpumask.cbcpu);
err_free_inst:
kfree(pinst);
- put_online_cpus();
err:
return NULL;
}
/**
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ * Use the cpu_possible_mask for serial and
+ * parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ *
+ * Must be called from a cpus_read_lock() protected region
+ */
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+ lockdep_assert_cpus_held();
+ return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
* padata_free - free a padata instance
*
* @padata_inst: padata instance to free
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 1eb82661ecdb..b7591261652d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -552,7 +552,8 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
-static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
{
struct multi_stop_data msdata = {
.fn = fn,
@@ -561,6 +562,8 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp
.active_cpus = cpus,
};
+ lockdep_assert_cpus_held();
+
if (!stop_machine_initialized) {
/*
* Handle the case where stop_machine() is called
@@ -590,9 +593,9 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
int ret;
/* No CPUs can come up or down during this. */
- get_online_cpus();
- ret = __stop_machine(fn, data, cpus);
- put_online_cpus();
+ cpus_read_lock();
+ ret = stop_machine_cpuslocked(fn, data, cpus);
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(stop_machine);