diff options
author | Paul E. McKenney | 2022-07-21 17:46:18 -0700 |
---|---|---|
committer | Paul E. McKenney | 2022-07-21 17:46:18 -0700 |
commit | 34bc7b454dc31f75a0be7ee8ab378135523d7c51 (patch) | |
tree | a1891ee085290746b6835e5e33e3c31cec2489fa /kernel/rcu | |
parent | d38c8fe48354af9c7120291938574e1ebb221d52 (diff) | |
parent | 1dcaa3b462265f688613163a1562a65ee53a3311 (diff) |
Merge branch 'ctxt.2022.07.05a' into HEAD
ctxt.2022.07.05a: Linux-kernel memory model development branch.
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/Kconfig | 2 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 4 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 476 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 8 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 38 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 55 | ||||
-rw-r--r-- | kernel/rcu/update.c | 2 |
8 files changed, 57 insertions, 530 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index c05ca52cdf64..d471d22a5e21 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -8,6 +8,8 @@ menu "RCU Subsystem" config TREE_RCU bool default y if SMP + # Dynticks-idle tracking + select CONTEXT_TRACKING_IDLE help This option selects the RCU implementation that is designed for very large SMP system with hundreds or diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 32291f4eefde..be5979da07f5 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -12,10 +12,6 @@ #include <trace/events/rcu.h> -/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ -#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) - - /* * Grace-period counter management. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 62e514713f7a..3dc968006ad0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -62,6 +62,7 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/kasan.h> +#include <linux/context_tracking.h> #include "../time/tick-internal.h" #include "tree.h" @@ -75,9 +76,6 @@ /* Data structures. */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { - .dynticks_nesting = 1, - .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, - .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_RCU_CORE, #endif @@ -267,56 +265,6 @@ void rcu_softirq_qs(void) } /* - * Increment the current CPU's rcu_data structure's ->dynticks field - * with ordering. Return the new value. - */ -static noinline noinstr unsigned long rcu_dynticks_inc(int incby) -{ - return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks)); -} - -/* - * Record entry into an extended quiescent state. This is only to be - * called when not already in an extended quiescent state, that is, - * RCU is watching prior to the call to this function and is no longer - * watching upon return. - */ -static noinstr void rcu_dynticks_eqs_enter(void) -{ - int seq; - - /* - * CPUs seeing atomic_add_return() must see prior RCU read-side - * critical sections, and we also must force ordering with the - * next idle sojourn. - */ - rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = rcu_dynticks_inc(1); - // RCU is no longer watching. Better be in extended quiescent state! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1)); -} - -/* - * Record exit from an extended quiescent state. This is only to be - * called from an extended quiescent state, that is, RCU is not watching - * prior to the call to this function and is watching upon return. - */ -static noinstr void rcu_dynticks_eqs_exit(void) -{ - int seq; - - /* - * CPUs seeing atomic_add_return() must see prior idle sojourns, - * and we also must force ordering with the next RCU read-side - * critical section. - */ - seq = rcu_dynticks_inc(1); - // RCU is now watching. Better not be in an extended quiescent state! - rcu_dynticks_task_trace_exit(); // After ->dynticks update! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1)); -} - -/* * Reset the current CPU's ->dynticks counter to indicate that the * newly onlined CPU is no longer in an extended quiescent state. * This will either leave the counter unchanged, or increment it @@ -328,31 +276,19 @@ static noinstr void rcu_dynticks_eqs_exit(void) */ static void rcu_dynticks_eqs_online(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - if (atomic_read(&rdp->dynticks) & 0x1) + if (ct_dynticks() & RCU_DYNTICKS_IDX) return; - rcu_dynticks_inc(1); -} - -/* - * Is the current CPU in an extended quiescent state? - * - * No ordering, as we are sampling CPU-local information. - */ -static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) -{ - return !(arch_atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1); + ct_state_inc(RCU_DYNTICKS_IDX); } /* * Snapshot the ->dynticks counter with full ordering so as to allow * stable comparison of this counter with past and future snapshots. */ -static int rcu_dynticks_snap(struct rcu_data *rdp) +static int rcu_dynticks_snap(int cpu) { smp_mb(); // Fundamental RCU ordering guarantee. - return atomic_read_acquire(&rdp->dynticks); + return ct_dynticks_cpu_acquire(cpu); } /* @@ -361,15 +297,13 @@ static int rcu_dynticks_snap(struct rcu_data *rdp) */ static bool rcu_dynticks_in_eqs(int snap) { - return !(snap & 0x1); + return !(snap & RCU_DYNTICKS_IDX); } /* Return true if the specified CPU is currently idle from an RCU viewpoint. */ bool rcu_is_idle_cpu(int cpu) { - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - - return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)); + return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); } /* @@ -379,7 +313,7 @@ bool rcu_is_idle_cpu(int cpu) */ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap) { - return snap != rcu_dynticks_snap(rdp); + return snap != rcu_dynticks_snap(rdp->cpu); } /* @@ -388,19 +322,17 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap) */ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); int snap; // If not quiescent, force back to earlier extended quiescent state. - snap = atomic_read(&rdp->dynticks) & ~0x1; - + snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX; smp_rmb(); // Order ->dynticks and *vp reads. if (READ_ONCE(*vp)) return false; // Non-zero, so report failure; smp_rmb(); // Order *vp read and ->dynticks re-read. // If still in the same extended quiescent state, we are good! - return snap == atomic_read(&rdp->dynticks); + return snap == ct_dynticks_cpu(cpu); } /* @@ -419,9 +351,9 @@ notrace void rcu_momentary_dyntick_idle(void) int seq; raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); - seq = rcu_dynticks_inc(2); + seq = ct_state_inc(2 * RCU_DYNTICKS_IDX); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(seq & 0x1)); + WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX)); rcu_preempt_deferred_qs(current); } EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); @@ -446,13 +378,13 @@ static int rcu_is_cpu_rrupt_from_idle(void) lockdep_assert_irqs_disabled(); /* Check for counter underflows */ - RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0, + RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0, "RCU dynticks_nesting counter underflow!"); - RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0, + RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0, "RCU dynticks_nmi_nesting counter underflow/zero!"); /* Are we at first interrupt nesting level? */ - nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting); + nesting = ct_dynticks_nmi_nesting(); if (nesting > 1) return false; @@ -462,7 +394,7 @@ static int rcu_is_cpu_rrupt_from_idle(void) WARN_ON_ONCE(!nesting && !is_idle_task(current)); /* Does CPU appear to be idle from an RCU standpoint? */ - return __this_cpu_read(rcu_data.dynticks_nesting) == 0; + return ct_dynticks_nesting() == 0; } #define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10) @@ -613,66 +545,7 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, } EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); -/* - * Enter an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - * - * We crowbar the ->dynticks_nmi_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - */ -static noinstr void rcu_eqs_enter(bool user) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE); - WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdp->dynticks_nesting == 0); - if (rdp->dynticks_nesting != 1) { - // RCU will still be watching, so just do accounting and leave. - rdp->dynticks_nesting--; - return; - } - - lockdep_assert_irqs_disabled(); - instrumentation_begin(); - trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); - rcu_preempt_deferred_qs(current); - - // instrumentation for the noinstr rcu_dynticks_eqs_enter() - instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); - - instrumentation_end(); - WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ - // RCU is watching here ... - rcu_dynticks_eqs_enter(); - // ... but is no longer watching here. - rcu_dynticks_task_enter(); -} - -/** - * rcu_idle_enter - inform RCU that current CPU is entering idle - * - * Enter idle mode, in other words, -leave- the mode in which RCU - * read-side critical sections can occur. (Though RCU read-side - * critical sections can occur in irq handlers in idle, a possibility - * handled by irq_enter() and irq_exit().) - * - * If you add or remove a call to rcu_idle_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_idle_enter(void) -{ - lockdep_assert_irqs_disabled(); - rcu_eqs_enter(false); -} -EXPORT_SYMBOL_GPL(rcu_idle_enter); - -#ifdef CONFIG_NO_HZ_FULL - -#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) /* * An empty function that will trigger a reschedule on * IRQ tail once IRQs get re-enabled on userspace/guest resume. @@ -694,7 +567,7 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = * last resort is to fire a local irq_work that will trigger a reschedule once IRQs * get re-enabled again. */ -noinstr static void rcu_irq_work_resched(void) +noinstr void rcu_irq_work_resched(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -710,114 +583,7 @@ noinstr static void rcu_irq_work_resched(void) } instrumentation_end(); } - -#else -static inline void rcu_irq_work_resched(void) { } -#endif - -/** - * rcu_user_enter - inform RCU that we are resuming userspace. - * - * Enter RCU idle mode right before resuming userspace. No use of RCU - * is permitted between this call and rcu_user_exit(). This way the - * CPU doesn't need to maintain the tick for RCU maintenance purposes - * when the CPU runs in userspace. - * - * If you add or remove a call to rcu_user_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_user_enter(void) -{ - lockdep_assert_irqs_disabled(); - - /* - * Other than generic entry implementation, we may be past the last - * rescheduling opportunity in the entry code. Trigger a self IPI - * that will fire and reschedule once we resume in user/guest mode. - */ - rcu_irq_work_resched(); - rcu_eqs_enter(true); -} - -#endif /* CONFIG_NO_HZ_FULL */ - -/** - * rcu_nmi_exit - inform RCU of exit from NMI context - * - * If we are returning from the outermost NMI handler that interrupted an - * RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting - * to let the RCU grace-period handling know that the CPU is back to - * being RCU-idle. - * - * If you add or remove a call to rcu_nmi_exit(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_nmi_exit(void) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - instrumentation_begin(); - /* - * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. - * (We are exiting an NMI handler, so RCU better be paying attention - * to us!) - */ - WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0); - WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); - - /* - * If the nesting level is not 1, the CPU wasn't RCU-idle, so - * leave it in non-RCU-idle state. - */ - if (rdp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, - atomic_read(&rdp->dynticks)); - WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ - rdp->dynticks_nmi_nesting - 2); - instrumentation_end(); - return; - } - - /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); - WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - - // instrumentation for the noinstr rcu_dynticks_eqs_enter() - instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); - instrumentation_end(); - - // RCU is watching here ... - rcu_dynticks_eqs_enter(); - // ... but is no longer watching here. - - if (!in_nmi()) - rcu_dynticks_task_enter(); -} - -/** - * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle - * - * Exit from an interrupt handler, which might possibly result in entering - * idle mode, in other words, leaving the mode in which read-side critical - * sections can occur. The caller must have disabled interrupts. - * - * This code assumes that the idle loop never does anything that might - * result in unbalanced calls to irq_enter() and irq_exit(). If your - * architecture's idle loop violates this assumption, RCU will give you what - * you deserve, good and hard. But very infrequently and irreproducibly. - * - * Use things like work queues to work around this limitation. - * - * You have been warned. - * - * If you add or remove a call to rcu_irq_exit(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void noinstr rcu_irq_exit(void) -{ - lockdep_assert_irqs_disabled(); - rcu_nmi_exit(); -} +#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */ #ifdef CONFIG_PROVE_RCU /** @@ -827,9 +593,9 @@ void rcu_irq_exit_check_preempt(void) { lockdep_assert_irqs_disabled(); - RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0, + RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0, "RCU dynticks_nesting counter underflow/zero!"); - RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) != + RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE, "Bad RCU dynticks_nmi_nesting counter\n"); RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), @@ -837,95 +603,8 @@ void rcu_irq_exit_check_preempt(void) } #endif /* #ifdef CONFIG_PROVE_RCU */ -/* - * Wrapper for rcu_irq_exit() where interrupts are enabled. - * - * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_irq_exit_irqson(void) -{ - unsigned long flags; - - local_irq_save(flags); - rcu_irq_exit(); - local_irq_restore(flags); -} - -/* - * Exit an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - * - * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to - * allow for the possibility of usermode upcalls messing up our count of - * interrupt nesting level during the busy period that is just now starting. - */ -static void noinstr rcu_eqs_exit(bool user) -{ - struct rcu_data *rdp; - long oldval; - - lockdep_assert_irqs_disabled(); - rdp = this_cpu_ptr(&rcu_data); - oldval = rdp->dynticks_nesting; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); - if (oldval) { - // RCU was already watching, so just do accounting and leave. - rdp->dynticks_nesting++; - return; - } - rcu_dynticks_task_exit(); - // RCU is not watching here ... - rcu_dynticks_eqs_exit(); - // ... but is watching here. - instrumentation_begin(); - - // instrumentation for the noinstr rcu_dynticks_eqs_exit() - instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); - - trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); - WRITE_ONCE(rdp->dynticks_nesting, 1); - WARN_ON_ONCE(rdp->dynticks_nmi_nesting); - WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); - instrumentation_end(); -} - -/** - * rcu_idle_exit - inform RCU that current CPU is leaving idle - * - * Exit idle mode, in other words, -enter- the mode in which RCU - * read-side critical sections can occur. - * - * If you add or remove a call to rcu_idle_exit(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_idle_exit(void) -{ - unsigned long flags; - - local_irq_save(flags); - rcu_eqs_exit(false); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(rcu_idle_exit); - #ifdef CONFIG_NO_HZ_FULL /** - * rcu_user_exit - inform RCU that we are exiting userspace. - * - * Exit RCU idle mode while entering the kernel because it can - * run a RCU read side critical section anytime. - * - * If you add or remove a call to rcu_user_exit(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void noinstr rcu_user_exit(void) -{ - rcu_eqs_exit(true); -} - -/** * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it. * * The scheduler tick is not normally enabled when CPUs enter the kernel @@ -987,109 +666,6 @@ void __rcu_irq_enter_check_tick(void) } #endif /* CONFIG_NO_HZ_FULL */ -/** - * rcu_nmi_enter - inform RCU of entry to NMI context - * - * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and - * rdp->dynticks_nmi_nesting to let the RCU grace-period handling know - * that the CPU is active. This implementation permits nested NMIs, as - * long as the nesting level does not overflow an int. (You will probably - * run out of stack space first.) - * - * If you add or remove a call to rcu_nmi_enter(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_nmi_enter(void) -{ - long incby = 2; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - /* Complain about underflow. */ - WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0); - - /* - * If idle from RCU viewpoint, atomically increment ->dynticks - * to mark non-idle and increment ->dynticks_nmi_nesting by one. - * Otherwise, increment ->dynticks_nmi_nesting by two. This means - * if ->dynticks_nmi_nesting is equal to one, we are guaranteed - * to be in the outermost NMI handler that interrupted an RCU-idle - * period (observation due to Andy Lutomirski). - */ - if (rcu_dynticks_curr_cpu_in_eqs()) { - - if (!in_nmi()) - rcu_dynticks_task_exit(); - - // RCU is not watching here ... - rcu_dynticks_eqs_exit(); - // ... but is watching here. - - instrumentation_begin(); - // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() - instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks)); - // instrumentation for the noinstr rcu_dynticks_eqs_exit() - instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); - - incby = 1; - } else if (!in_nmi()) { - instrumentation_begin(); - rcu_irq_enter_check_tick(); - } else { - instrumentation_begin(); - } - - trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), - rdp->dynticks_nmi_nesting, - rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); - instrumentation_end(); - WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */ - rdp->dynticks_nmi_nesting + incby); - barrier(); -} - -/** - * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle - * - * Enter an interrupt handler, which might possibly result in exiting - * idle mode, in other words, entering the mode in which read-side critical - * sections can occur. The caller must have disabled interrupts. - * - * Note that the Linux kernel is fully capable of entering an interrupt - * handler that it never exits, for example when doing upcalls to user mode! - * This code assumes that the idle loop never does upcalls to user mode. - * If your architecture's idle loop does do upcalls to user mode (or does - * anything else that results in unbalanced calls to the irq_enter() and - * irq_exit() functions), RCU will give you what you deserve, good and hard. - * But very infrequently and irreproducibly. - * - * Use things like work queues to work around this limitation. - * - * You have been warned. - * - * If you add or remove a call to rcu_irq_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_irq_enter(void) -{ - lockdep_assert_irqs_disabled(); - rcu_nmi_enter(); -} - -/* - * Wrapper for rcu_irq_enter() where interrupts are enabled. - * - * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_irq_enter_irqson(void) -{ - unsigned long flags; - - local_irq_save(flags); - rcu_irq_enter(); - local_irq_restore(flags); -} - /* * Check to see if any future non-offloaded RCU-related work will need * to be done by the current CPU, even if none need be done immediately, @@ -1227,7 +803,7 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) */ static int dyntick_save_progress_counter(struct rcu_data *rdp) { - rdp->dynticks_snap = rcu_dynticks_snap(rdp); + rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu); if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); rcu_gpnum_ovf(rdp->mynode, rdp); @@ -4328,13 +3904,14 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) static void __init rcu_boot_init_percpu_data(int cpu) { + struct context_tracking *ct = this_cpu_ptr(&context_tracking); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); /* Set up local state, ensuring consistent view of global state. */ rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); INIT_WORK(&rdp->strict_work, strict_work_handler); - WARN_ON_ONCE(rdp->dynticks_nesting != 1); - WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); + WARN_ON_ONCE(ct->dynticks_nesting != 1); + WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu))); rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; @@ -4358,6 +3935,7 @@ rcu_boot_init_percpu_data(int cpu) int rcutree_prepare_cpu(unsigned int cpu) { unsigned long flags; + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rcu_get_root(); @@ -4366,7 +3944,7 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; - rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ + ct->dynticks_nesting = 1; /* CPU not up, no tearing. */ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 3cdc18997a38..d4a97e40ea9c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -191,9 +191,6 @@ struct rcu_data { /* 3) dynticks interface. */ int dynticks_snap; /* Per-GP tracking for dynticks. */ - long dynticks_nesting; /* Track process nesting level. */ - long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ - atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ @@ -438,7 +435,6 @@ static void rcu_cpu_kthread_setup(unsigned int cpu); static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); -static void rcu_preempt_deferred_qs(struct task_struct *t); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); @@ -478,10 +474,6 @@ do { \ static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); -static void rcu_dynticks_task_enter(void); -static void rcu_dynticks_task_exit(void); -static void rcu_dynticks_task_trace_enter(void); -static void rcu_dynticks_task_trace_exit(void); /* Forward declarations for tree_stall.h */ static void record_gp_stall_check_time(void); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f092c7f18a5f..be667583a554 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -358,7 +358,7 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) !(rnp->qsmaskinitnext & mask)) { mask_ofl_test |= mask; } else { - snap = rcu_dynticks_snap(rdp); + snap = rcu_dynticks_snap(cpu); if (rcu_dynticks_in_eqs(snap)) mask_ofl_test |= mask; else diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7ae1551479a2..438ecae6bd7e 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -595,7 +595,7 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t) * evaluate safety in terms of interrupt, softirq, and preemption * disabling. */ -static notrace void rcu_preempt_deferred_qs(struct task_struct *t) +notrace void rcu_preempt_deferred_qs(struct task_struct *t) { unsigned long flags; @@ -935,7 +935,7 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t) // period for a quiescent state from this CPU. Note that requests from // tasks are handled when removing the task from the blocked-tasks list // below. -static notrace void rcu_preempt_deferred_qs(struct task_struct *t) +notrace void rcu_preempt_deferred_qs(struct task_struct *t) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -1296,37 +1296,3 @@ static void rcu_bind_gp_kthread(void) return; housekeeping_affine(current, HK_TYPE_RCU); } - -/* Record the current task on dyntick-idle entry. */ -static __always_inline void rcu_dynticks_task_enter(void) -{ -#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); -#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ -} - -/* Record no current task on dyntick-idle exit. */ -static __always_inline void rcu_dynticks_task_exit(void) -{ -#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); -#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ -} - -/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ -static __always_inline void rcu_dynticks_task_trace_enter(void) -{ -#ifdef CONFIG_TASKS_TRACE_RCU - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) - current->trc_reader_special.b.need_mb = true; -#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ -} - -/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ -static __always_inline void rcu_dynticks_task_trace_exit(void) -{ -#ifdef CONFIG_TASKS_TRACE_RCU - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) - current->trc_reader_special.b.need_mb = false; -#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ -} diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4995c078cff9..195cad14742d 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -409,7 +409,19 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) { - unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity); + int cpu; + struct task_struct *rcuc; + unsigned long j; + + rcuc = rdp->rcu_cpu_kthread_task; + if (!rcuc) + return false; + + cpu = task_cpu(rcuc); + if (cpu_is_offline(cpu) || idle_cpu(cpu)) + return false; + + j = jiffies - READ_ONCE(rdp->rcuc_activity); if (jp) *jp = j; @@ -434,6 +446,9 @@ static void print_cpu_stall_info(int cpu) struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); char *ticks_title; unsigned long ticks_value; + bool rcuc_starved; + unsigned long j; + char buf[32]; /* * We could be printing a lot while holding a spinlock. Avoid @@ -450,8 +465,11 @@ static void print_cpu_stall_info(int cpu) } delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); falsepositive = rcu_is_gp_kthread_starving(NULL) && - rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", + rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); + rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j); + if (rcuc_starved) + sprintf(buf, " rcuc=%ld jiffies(starved)", j); + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%04x/%ld/%#lx softirq=%u/%u fqs=%ld%s%s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], @@ -460,36 +478,14 @@ static void print_cpu_stall_info(int cpu) rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : "!."[!delta], ticks_value, ticks_title, - rcu_dynticks_snap(rdp) & 0xfff, - rdp->dynticks_nesting, rdp->dynticks_nmi_nesting, + rcu_dynticks_snap(cpu) & 0xffff, + ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu), rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, + rcuc_starved ? buf : "", falsepositive ? " (false positive?)" : ""); } -static void rcuc_kthread_dump(struct rcu_data *rdp) -{ - int cpu; - unsigned long j; - struct task_struct *rcuc; - - rcuc = rdp->rcu_cpu_kthread_task; - if (!rcuc) - return; - - cpu = task_cpu(rcuc); - if (cpu_is_offline(cpu) || idle_cpu(cpu)) - return; - - if (!rcu_is_rcuc_kthread_starving(rdp, &j)) - return; - - pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); - sched_show_task(rcuc); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); -} - /* Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) { @@ -662,9 +658,6 @@ static void print_cpu_stall(unsigned long gps) rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); - if (!use_softirq) - rcuc_kthread_dump(rdp); - rcu_dump_cpu_stacks(); raw_spin_lock_irqsave_rcu_node(rnp, flags); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 2e93acad1e31..738842c4886b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -85,7 +85,7 @@ module_param(rcu_normal_after_boot, int, 0444); * and while lockdep is disabled. * * Note that if the CPU is in the idle loop from an RCU point of view (ie: - * that we are in the section between rcu_idle_enter() and rcu_idle_exit()) + * that we are in the section between ct_idle_enter() and ct_idle_exit()) * then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an * rcu_read_lock(). The reason for this is that RCU ignores CPUs that are * in such a section, considering these as in extended quiescent state, |