diff options
author | Linus Torvalds | 2021-06-28 11:45:29 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-06-28 11:45:29 -0700 |
commit | a15286c63d113d4296c58867994cd266a28f5d6d (patch) | |
tree | aaebbc86918c0c1943c26115d5bb1dd6c2fc2d9b /kernel | |
parent | b89c07dea16137696d0f2d479ef665ef7c1022ab (diff) | |
parent | 0e8a89d49d45197770f2e57fb15f1bc9ded96eb0 (diff) |
Merge tag 'locking-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
- Core locking & atomics:
- Convert all architectures to ARCH_ATOMIC: move every architecture
to ARCH_ATOMIC, then get rid of ARCH_ATOMIC and all the
transitory facilities and #ifdefs.
Much reduction in complexity from that series:
63 files changed, 756 insertions(+), 4094 deletions(-)
- Self-test enhancements
- Futexes:
- Add the new FUTEX_LOCK_PI2 ABI, which is a variant that doesn't
set FLAGS_CLOCKRT (.e. uses CLOCK_MONOTONIC).
[ The temptation to repurpose FUTEX_LOCK_PI's implicit setting of
FLAGS_CLOCKRT & invert the flag's meaning to avoid having to
introduce a new variant was resisted successfully. ]
- Enhance futex self-tests
- Lockdep:
- Fix dependency path printouts
- Optimize trace saving
- Broaden & fix wait-context checks
- Misc cleanups and fixes.
* tag 'locking-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits)
locking/lockdep: Correct the description error for check_redundant()
futex: Provide FUTEX_LOCK_PI2 to support clock selection
futex: Prepare futex_lock_pi() for runtime clock selection
lockdep/selftest: Remove wait-type RCU_CALLBACK tests
lockdep/selftests: Fix selftests vs PROVE_RAW_LOCK_NESTING
lockdep: Fix wait-type for empty stack
locking/selftests: Add a selftest for check_irq_usage()
lockding/lockdep: Avoid to find wrong lock dep path in check_irq_usage()
locking/lockdep: Remove the unnecessary trace saving
locking/lockdep: Fix the dep path printing for backwards BFS
selftests: futex: Add futex compare requeue test
selftests: futex: Add futex wait test
seqlock: Remove trailing semicolon in macros
locking/lockdep: Reduce LOCKDEP dependency list
locking/lockdep,doc: Improve readability of the block matrix
locking/atomics: atomic-instrumented: simplify ifdeffery
locking/atomic: delete !ARCH_ATOMIC remnants
locking/atomic: xtensa: move to ARCH_ATOMIC
locking/atomic: sparc: move to ARCH_ATOMIC
locking/atomic: sh: move to ARCH_ATOMIC
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/futex.c | 29 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 127 |
2 files changed, 135 insertions, 21 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 408cad5e8968..2ecb07575055 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1727,12 +1727,9 @@ retry_private: return ret; } - if (!(flags & FLAGS_SHARED)) { - cond_resched(); - goto retry_private; - } - cond_resched(); + if (!(flags & FLAGS_SHARED)) + goto retry_private; goto retry; } @@ -1873,7 +1870,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * If the caller intends to requeue more than 1 waiter to pifutex, * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, * as we have means to handle the possible fault. If not, don't set - * the bit unecessarily as it will force the subsequent unlock to enter + * the bit unnecessarily as it will force the subsequent unlock to enter * the kernel. */ top_waiter = futex_top_waiter(hb1, key1); @@ -2102,7 +2099,7 @@ retry_private: continue; /* - * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always + * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. * * We should never be requeueing a futex_q with a pi_state, @@ -2317,7 +2314,7 @@ retry: } /* - * PI futexes can not be requeued and must remove themself from the + * PI futexes can not be requeued and must remove themselves from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. */ static void unqueue_me_pi(struct futex_q *q) @@ -2785,7 +2782,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, if (refill_pi_state_cache()) return -ENOMEM; - to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0); + to = futex_setup_timer(time, &timeout, flags, 0); retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); @@ -2902,7 +2899,7 @@ no_block: */ res = fixup_owner(uaddr, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it acquired + * If fixup_owner() returned an error, propagate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. */ if (res) @@ -3279,7 +3276,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ res = fixup_owner(uaddr2, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it + * If fixup_owner() returned an error, propagate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. */ if (res) @@ -3677,7 +3674,7 @@ void futex_exec_release(struct task_struct *tsk) { /* * The state handling is done for consistency, but in the case of - * exec() there is no way to prevent futher damage as the PID stays + * exec() there is no way to prevent further damage as the PID stays * the same. But for the unlikely and arguably buggy case that a * futex is held on exec(), this provides at least as much state * consistency protection which is possible. @@ -3709,12 +3706,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) + if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && + cmd != FUTEX_LOCK_PI2) return -ENOSYS; } switch (cmd) { case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: case FUTEX_UNLOCK_PI: case FUTEX_TRYLOCK_PI: case FUTEX_WAIT_REQUEUE_PI: @@ -3741,6 +3740,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: + flags |= FLAGS_CLOCKRT; + fallthrough; + case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); @@ -3761,6 +3763,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd) switch (cmd) { case FUTEX_WAIT: case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: case FUTEX_WAIT_BITSET: case FUTEX_WAIT_REQUEUE_PI: return true; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e32313072506..0c0524bfff99 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2306,7 +2306,56 @@ static void print_lock_class_header(struct lock_class *class, int depth) } /* - * printk the shortest lock dependencies from @start to @end in reverse order: + * Dependency path printing: + * + * After BFS we get a lock dependency path (linked via ->parent of lock_list), + * printing out each lock in the dependency path will help on understanding how + * the deadlock could happen. Here are some details about dependency path + * printing: + * + * 1) A lock_list can be either forwards or backwards for a lock dependency, + * for a lock dependency A -> B, there are two lock_lists: + * + * a) lock_list in the ->locks_after list of A, whose ->class is B and + * ->links_to is A. In this case, we can say the lock_list is + * "A -> B" (forwards case). + * + * b) lock_list in the ->locks_before list of B, whose ->class is A + * and ->links_to is B. In this case, we can say the lock_list is + * "B <- A" (bacwards case). + * + * The ->trace of both a) and b) point to the call trace where B was + * acquired with A held. + * + * 2) A "helper" lock_list is introduced during BFS, this lock_list doesn't + * represent a certain lock dependency, it only provides an initial entry + * for BFS. For example, BFS may introduce a "helper" lock_list whose + * ->class is A, as a result BFS will search all dependencies starting with + * A, e.g. A -> B or A -> C. + * + * The notation of a forwards helper lock_list is like "-> A", which means + * we should search the forwards dependencies starting with "A", e.g A -> B + * or A -> C. + * + * The notation of a bacwards helper lock_list is like "<- B", which means + * we should search the backwards dependencies ending with "B", e.g. + * B <- A or B <- C. + */ + +/* + * printk the shortest lock dependencies from @root to @leaf in reverse order. + * + * We have a lock dependency path as follow: + * + * @root @leaf + * | | + * V V + * ->parent ->parent + * | lock_list | <--------- | lock_list | ... | lock_list | <--------- | lock_list | + * | -> L1 | | L1 -> L2 | ... |Ln-2 -> Ln-1| | Ln-1 -> Ln| + * + * , so it's natural that we start from @leaf and print every ->class and + * ->trace until we reach the @root. */ static void __used print_shortest_lock_dependencies(struct lock_list *leaf, @@ -2334,6 +2383,61 @@ print_shortest_lock_dependencies(struct lock_list *leaf, } while (entry && (depth >= 0)); } +/* + * printk the shortest lock dependencies from @leaf to @root. + * + * We have a lock dependency path (from a backwards search) as follow: + * + * @leaf @root + * | | + * V V + * ->parent ->parent + * | lock_list | ---------> | lock_list | ... | lock_list | ---------> | lock_list | + * | L2 <- L1 | | L3 <- L2 | ... | Ln <- Ln-1 | | <- Ln | + * + * , so when we iterate from @leaf to @root, we actually print the lock + * dependency path L1 -> L2 -> .. -> Ln in the non-reverse order. + * + * Another thing to notice here is that ->class of L2 <- L1 is L1, while the + * ->trace of L2 <- L1 is the call trace of L2, in fact we don't have the call + * trace of L1 in the dependency path, which is alright, because most of the + * time we can figure out where L1 is held from the call trace of L2. + */ +static void __used +print_shortest_lock_dependencies_backwards(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + const struct lock_trace *trace = NULL; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + if (trace) { + printk("%*s ... acquired at:\n", depth, ""); + print_lock_trace(trace, 2); + printk("\n"); + } + + /* + * Record the pointer to the trace for the next lock_list + * entry, see the comments for the function. + */ + trace = entry->trace; + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); +} + static void print_irq_lock_scenario(struct lock_list *safe_entry, struct lock_list *unsafe_entry, @@ -2448,10 +2552,7 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); - prev_root->trace = save_trace(); - if (!prev_root->trace) - return; - print_shortest_lock_dependencies(backwards_entry, prev_root); + print_shortest_lock_dependencies_backwards(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass); @@ -2669,8 +2770,18 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, * Step 3: we found a bad match! Now retrieve a lock from the backward * list whose usage mask matches the exclusive usage mask from the * lock found on the forward list. + * + * Note, we should only keep the LOCKF_ENABLED_IRQ_ALL bits, considering + * the follow case: + * + * When trying to add A -> B to the graph, we find that there is a + * hardirq-safe L, that L -> ... -> A, and another hardirq-unsafe M, + * that B -> ... -> M. However M is **softirq-safe**, if we use exact + * invert bits of M's usage_mask, we will find another lock N that is + * **softirq-unsafe** and N -> ... -> A, however N -> .. -> M will not + * cause a inversion deadlock. */ - backward_mask = original_mask(target_entry1->class->usage_mask); + backward_mask = original_mask(target_entry1->class->usage_mask & LOCKF_ENABLED_IRQ_ALL); ret = find_usage_backwards(&this, backward_mask, &target_entry); if (bfs_error(ret)) { @@ -2720,7 +2831,7 @@ static inline bool usage_skip(struct lock_list *entry, void *mask) * <target> or not. If it can, <src> -> <target> dependency is already * in the graph. * - * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if + * Return BFS_RMATCH if it does, or BFS_RNOMATCH if it does not, return BFS_E* if * any error appears in the bfs search. */ static noinline enum bfs_result @@ -4579,7 +4690,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) u8 curr_inner; int depth; - if (!curr->lockdep_depth || !next_inner || next->trylock) + if (!next_inner || next->trylock) return 0; if (!next_outer) |