From 874f670e6088d3bff3972ecd44c1cb00610f9183 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2021 18:54:35 +0200 Subject: sched: Clean up the might_sleep() underscore zoo __might_sleep() vs. ___might_sleep() is hard to distinguish. Aside of that the three underscore variant is exposed to provide a checkpoint for rescheduling points which are distinct from blocking points. They are semantically a preemption point which means that scheduling is state preserving. A real blocking operation, e.g. mutex_lock(), wait*(), which cannot preserve a task state which is not equal to RUNNING. While technically blocking on a "sleeping" spinlock in RT enabled kernels falls into the voluntary scheduling category because it has to wait until the contended spin/rw lock becomes available, the RT lock substitution code can semantically be mapped to a voluntary preemption because the RT lock substitution code and the scheduler are providing mechanisms to preserve the task state and to take regular non-lock related wakeups into account. Rename ___might_sleep() to __might_resched() to make the distinction of these functions clear. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210923165357.928693482@linutronix.de --- kernel/sched/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bba4128a3e6..c3943aa2f60c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9489,11 +9489,11 @@ void __might_sleep(const char *file, int line, int preempt_offset) (void *)current->task_state_change, (void *)current->task_state_change); - ___might_sleep(file, line, preempt_offset); + __might_resched(file, line, preempt_offset); } EXPORT_SYMBOL(__might_sleep); -void ___might_sleep(const char *file, int line, int preempt_offset) +void __might_resched(const char *file, int line, int preempt_offset) { /* Ratelimiting timestamp: */ static unsigned long prev_jiffy; @@ -9538,7 +9538,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } -EXPORT_SYMBOL(___might_sleep); +EXPORT_SYMBOL(__might_resched); void __cant_sleep(const char *file, int line, int preempt_offset) { -- cgit v1.2.3 From 42a387566c567603bafa1ec0c5b71c35cba83e86 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2021 18:54:38 +0200 Subject: sched: Remove preempt_offset argument from __might_sleep() All callers hand in 0 and never will hand in anything else. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210923165358.054321586@linutronix.de --- include/linux/kernel.h | 7 +++---- kernel/sched/core.c | 4 ++-- mm/memory.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/sched') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5e4ae54da73e..f95ee786e4ef 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -112,7 +112,7 @@ static __always_inline void might_resched(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP extern void __might_resched(const char *file, int line, int preempt_offset); -extern void __might_sleep(const char *file, int line, int preempt_offset); +extern void __might_sleep(const char *file, int line); extern void __cant_sleep(const char *file, int line, int preempt_offset); extern void __cant_migrate(const char *file, int line); @@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line); * supposed to. */ # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) /** * cant_sleep - annotation for functions that cannot sleep * @@ -170,8 +170,7 @@ extern void __cant_migrate(const char *file, int line); #else static inline void __might_resched(const char *file, int line, int preempt_offset) { } - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } +static inline void __might_sleep(const char *file, int line) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) # define cant_migrate() do { } while (0) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c3943aa2f60c..2d790df62ec9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9475,7 +9475,7 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == preempt_offset); } -void __might_sleep(const char *file, int line, int preempt_offset) +void __might_sleep(const char *file, int line) { unsigned int state = get_current_state(); /* @@ -9489,7 +9489,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) (void *)current->task_state_change, (void *)current->task_state_change); - __might_resched(file, line, preempt_offset); + __might_resched(file, line, 0); } EXPORT_SYMBOL(__might_sleep); diff --git a/mm/memory.c b/mm/memory.c index 25fc46e87214..1cd1792c00f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5255,7 +5255,7 @@ void __might_fault(const char *file, int line) return; if (pagefault_disabled()) return; - __might_sleep(file, line, 0); + __might_sleep(file, line); #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) if (current->mm) might_lock_read(¤t->mm->mmap_lock); -- cgit v1.2.3 From a45ed302b6e6fe5b03166321c08b4f2ad4a92a35 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2021 18:54:40 +0200 Subject: sched: Cleanup might_sleep() printks Convert them to pr_*(). No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210923165358.117496067@linutronix.de --- kernel/sched/core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d790df62ec9..a7c6069ddf9d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9516,16 +9516,14 @@ void __might_resched(const char *file, int line, int preempt_offset) /* Save this before calling printk(), since that will clobber it: */ preempt_disable_ip = get_preempt_disable_ip(current); - printk(KERN_ERR - "BUG: sleeping function called from invalid context at %s:%d\n", - file, line); - printk(KERN_ERR - "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); + pr_err("BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), current->non_block_count, + current->pid, current->comm); if (task_stack_end_corrupted(current)) - printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); + pr_emerg("Thread overran stack, or stack corrupted\n"); debug_show_held_locks(current); if (irqs_disabled()) -- cgit v1.2.3 From 8d713b699e84aade6b64e241a35f22e166fc8174 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2021 18:54:41 +0200 Subject: sched: Make might_sleep() output less confusing might_sleep() output is pretty informative, but can be confusing at times especially with PREEMPT_RCU when the check triggers due to a voluntary sleep inside a RCU read side critical section: BUG: sleeping function called from invalid context at kernel/test.c:110 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 Preemption disabled at: migrate_disable+0x33/0xa0 in_atomic() is 0, but it still tells that preemption was disabled at migrate_disable(), which is completely useless because preemption is not disabled. But the interesting information to decode the above, i.e. the RCU nesting depth, is not printed. That becomes even more confusing when might_sleep() is invoked from cond_resched_lock() within a RCU read side critical section. Here the expected preemption count is 1 and not 0. BUG: sleeping function called from invalid context at kernel/test.c:131 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 Preemption disabled at: test_cond_lock+0xf3/0x1c0 So in_atomic() is set, which is expected as the caller holds a spinlock, but it's unclear why this is broken and the preempt disable IP is just pointing at the correct place, i.e. spin_lock(), which is obviously not helpful either. Make that more useful in general: - Print preempt_count() and the expected value and for the CONFIG_PREEMPT_RCU case: - Print the RCU read side critical section nesting depth - Print the preempt disable IP only when preempt count does not have the expected value. So the might_sleep() dump from a within a preemptible RCU read side critical section becomes: BUG: sleeping function called from invalid context at kernel/test.c:110 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 and the cond_resched_lock() case becomes: BUG: sleeping function called from invalid context at kernel/test.c:141 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 preempt_count: 1, expected: 1 RCU nest depth: 1, expected: 0 which makes is pretty obvious what's going on. For all other cases the preempt disable IP is still printed as before: BUG: sleeping function called from invalid context at kernel/test.c: 156 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 Preemption disabled at: [] test_might_sleep+0xbe/0xf8 BUG: sleeping function called from invalid context at kernel/test.c: 163 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 1, expected: 0 RCU nest depth: 1, expected: 0 Preemption disabled at: [] test_might_sleep+0x1e4/0x280 This also prepares to provide a better debugging output for RT enabled kernels and their spinlock substitutions. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210923165358.181022656@linutronix.de --- kernel/sched/core.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a7c6069ddf9d..0a27cb8f72a9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9493,6 +9493,18 @@ void __might_sleep(const char *file, int line) } EXPORT_SYMBOL(__might_sleep); +static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) +{ + if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) + return; + + if (preempt_count() == preempt_offset) + return; + + pr_err("Preemption disabled at:"); + print_ip_sym(KERN_ERR, ip); +} + void __might_resched(const char *file, int line, int preempt_offset) { /* Ratelimiting timestamp: */ @@ -9521,6 +9533,13 @@ void __might_resched(const char *file, int line, int preempt_offset) pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", in_atomic(), irqs_disabled(), current->non_block_count, current->pid, current->comm); + pr_err("preempt_count: %x, expected: %x\n", preempt_count(), + preempt_offset); + + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { + pr_err("RCU nest depth: %d, expected: 0\n", + rcu_preempt_depth()); + } if (task_stack_end_corrupted(current)) pr_emerg("Thread overran stack, or stack corrupted\n"); @@ -9528,11 +9547,9 @@ void __might_resched(const char *file, int line, int preempt_offset) debug_show_held_locks(current); if (irqs_disabled()) print_irqtrace_events(current); - if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) - && !preempt_count_equals(preempt_offset)) { - pr_err("Preemption disabled at:"); - print_ip_sym(KERN_ERR, preempt_disable_ip); - } + + print_preempt_disable_ip(preempt_offset, preempt_disable_ip); + dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } -- cgit v1.2.3 From 50e081b96e35e43b65591f40f7376204decd1cb5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2021 18:54:43 +0200 Subject: sched: Make RCU nest depth distinct in __might_resched() For !RT kernels RCU nest depth in __might_resched() is always expected to be 0, but on RT kernels it can be non zero while the preempt count is expected to be always 0. Instead of playing magic games in interpreting the 'preempt_offset' argument, rename it to 'offsets' and use the lower 8 bits for the expected preempt count, allow to hand in the expected RCU nest depth in the upper bits and adopt the __might_resched() code and related checks and printks. The affected call sites are updated in subsequent steps. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210923165358.243232823@linutronix.de --- include/linux/kernel.h | 4 ++-- include/linux/sched.h | 3 +++ kernel/sched/core.c | 28 ++++++++++++++++------------ 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'kernel/sched') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f95ee786e4ef..e8696e4a45aa 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -111,7 +111,7 @@ static __always_inline void might_resched(void) #endif /* CONFIG_PREEMPT_* */ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -extern void __might_resched(const char *file, int line, int preempt_offset); +extern void __might_resched(const char *file, int line, unsigned int offsets); extern void __might_sleep(const char *file, int line); extern void __cant_sleep(const char *file, int line, int preempt_offset); extern void __cant_migrate(const char *file, int line); @@ -169,7 +169,7 @@ extern void __cant_migrate(const char *file, int line); # define non_block_end() WARN_ON(current->non_block_count-- == 0) #else static inline void __might_resched(const char *file, int line, - int preempt_offset) { } + unsigned int offsets) { } static inline void __might_sleep(const char *file, int line) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a989f2487f8..b448c7460577 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2046,6 +2046,9 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); +#define MIGHT_RESCHED_RCU_SHIFT 8 +#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) + #define cond_resched_lock(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ __cond_resched_lock(lock); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0a27cb8f72a9..8d3fa0768e5b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9468,12 +9468,6 @@ void __init sched_init(void) } #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -static inline int preempt_count_equals(int preempt_offset) -{ - int nested = preempt_count() + rcu_preempt_depth(); - - return (nested == preempt_offset); -} void __might_sleep(const char *file, int line) { @@ -9505,7 +9499,16 @@ static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) print_ip_sym(KERN_ERR, ip); } -void __might_resched(const char *file, int line, int preempt_offset) +static inline bool resched_offsets_ok(unsigned int offsets) +{ + unsigned int nested = preempt_count(); + + nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; + + return nested == offsets; +} + +void __might_resched(const char *file, int line, unsigned int offsets) { /* Ratelimiting timestamp: */ static unsigned long prev_jiffy; @@ -9515,7 +9518,7 @@ void __might_resched(const char *file, int line, int preempt_offset) /* WARN_ON_ONCE() by default, no rate limit required: */ rcu_sleep_check(); - if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && + if ((resched_offsets_ok(offsets) && !irqs_disabled() && !is_idle_task(current) && !current->non_block_count) || system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || oops_in_progress) @@ -9534,11 +9537,11 @@ void __might_resched(const char *file, int line, int preempt_offset) in_atomic(), irqs_disabled(), current->non_block_count, current->pid, current->comm); pr_err("preempt_count: %x, expected: %x\n", preempt_count(), - preempt_offset); + offsets & MIGHT_RESCHED_PREEMPT_MASK); if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { - pr_err("RCU nest depth: %d, expected: 0\n", - rcu_preempt_depth()); + pr_err("RCU nest depth: %d, expected: %u\n", + rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); } if (task_stack_end_corrupted(current)) @@ -9548,7 +9551,8 @@ void __might_resched(const char *file, int line, int preempt_offset) if (irqs_disabled()) print_irqtrace_events(current); - print_preempt_disable_ip(preempt_offset, preempt_disable_ip); + print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, + preempt_disable_ip); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -- cgit v1.2.3