From dfd402a4c4baae42398ce9180ff424d589b8bffc Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:54 +0100 Subject: kcsan: Add Kernel Concurrency Sanitizer infrastructure Kernel Concurrency Sanitizer (KCSAN) is a dynamic data-race detector for kernel space. KCSAN is a sampling watchpoint-based data-race detector. See the included Documentation/dev-tools/kcsan.rst for more details. This patch adds basic infrastructure, but does not yet enable KCSAN for any architecture. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/Makefile | 1 + kernel/kcsan/Makefile | 11 + kernel/kcsan/atomic.h | 27 +++ kernel/kcsan/core.c | 626 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/kcsan/debugfs.c | 275 +++++++++++++++++++++ kernel/kcsan/encoding.h | 94 ++++++++ kernel/kcsan/kcsan.h | 108 +++++++++ kernel/kcsan/report.c | 320 +++++++++++++++++++++++++ kernel/kcsan/test.c | 121 ++++++++++ 9 files changed, 1583 insertions(+) create mode 100644 kernel/kcsan/Makefile create mode 100644 kernel/kcsan/atomic.h create mode 100644 kernel/kcsan/core.c create mode 100644 kernel/kcsan/debugfs.c create mode 100644 kernel/kcsan/encoding.h create mode 100644 kernel/kcsan/kcsan.h create mode 100644 kernel/kcsan/report.c create mode 100644 kernel/kcsan/test.c (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index daad787fb795..74ab46e2ebd1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile new file mode 100644 index 000000000000..dd15b62ec0b5 --- /dev/null +++ b/kernel/kcsan/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +KCSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) + +CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \ + $(call cc-option,-fno-stack-protector,) + +obj-y := core.o debugfs.o report.o +obj-$(CONFIG_KCSAN_SELFTEST) += test.o diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h new file mode 100644 index 000000000000..c9c3fe628011 --- /dev/null +++ b/kernel/kcsan/atomic.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _KERNEL_KCSAN_ATOMIC_H +#define _KERNEL_KCSAN_ATOMIC_H + +#include + +/* + * Helper that returns true if access to ptr should be considered as an atomic + * access, even though it is not explicitly atomic. + * + * List all volatile globals that have been observed in races, to suppress + * data race reports between accesses to these variables. + * + * For now, we assume that volatile accesses of globals are as strong as atomic + * accesses (READ_ONCE, WRITE_ONCE cast to volatile). The situation is still not + * entirely clear, as on some architectures (Alpha) READ_ONCE/WRITE_ONCE do more + * than cast to volatile. Eventually, we hope to be able to remove this + * function. + */ +static inline bool kcsan_is_atomic(const volatile void *ptr) +{ + /* only jiffies for now */ + return ptr == &jiffies; +} + +#endif /* _KERNEL_KCSAN_ATOMIC_H */ diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c new file mode 100644 index 000000000000..d9410d58c93e --- /dev/null +++ b/kernel/kcsan/core.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "atomic.h" +#include "encoding.h" +#include "kcsan.h" + +bool kcsan_enabled; + +/* Per-CPU kcsan_ctx for interrupts */ +static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, +}; + +/* + * Helper macros to index into adjacent slots slots, starting from address slot + * itself, followed by the right and left slots. + * + * The purpose is 2-fold: + * + * 1. if during insertion the address slot is already occupied, check if + * any adjacent slots are free; + * 2. accesses that straddle a slot boundary due to size that exceeds a + * slot's range may check adjacent slots if any watchpoint matches. + * + * Note that accesses with very large size may still miss a watchpoint; however, + * given this should be rare, this is a reasonable trade-off to make, since this + * will avoid: + * + * 1. excessive contention between watchpoint checks and setup; + * 2. larger number of simultaneous watchpoints without sacrificing + * performance. + * + * Example: SLOT_IDX values for KCSAN_CHECK_ADJACENT=1, where i is [0, 1, 2]: + * + * slot=0: [ 1, 2, 0] + * slot=9: [10, 11, 9] + * slot=63: [64, 65, 63] + */ +#define NUM_SLOTS (1 + 2 * KCSAN_CHECK_ADJACENT) +#define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) + +/* + * SLOT_IDX_FAST is used in fast-path. Not first checking the address's primary + * slot (middle) is fine if we assume that data races occur rarely. The set of + * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to + * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. + */ +#define SLOT_IDX_FAST(slot, i) (slot + i) + +/* + * Watchpoints, with each entry encoded as defined in encoding.h: in order to be + * able to safely update and access a watchpoint without introducing locking + * overhead, we encode each watchpoint as a single atomic long. The initial + * zero-initialized state matches INVALID_WATCHPOINT. + * + * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to + * use more complicated SLOT_IDX_FAST calculation with modulo in fast-path. + */ +static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; + +/* + * Instructions to skip watching counter, used in should_watch(). We use a + * per-CPU counter to avoid excessive contention. + */ +static DEFINE_PER_CPU(long, kcsan_skip); + +static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, + bool expect_write, + long *encoded_watchpoint) +{ + const int slot = watchpoint_slot(addr); + const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK; + atomic_long_t *watchpoint; + unsigned long wp_addr_masked; + size_t wp_size; + bool is_write; + int i; + + BUILD_BUG_ON(CONFIG_KCSAN_NUM_WATCHPOINTS < NUM_SLOTS); + + for (i = 0; i < NUM_SLOTS; ++i) { + watchpoint = &watchpoints[SLOT_IDX_FAST(slot, i)]; + *encoded_watchpoint = atomic_long_read(watchpoint); + if (!decode_watchpoint(*encoded_watchpoint, &wp_addr_masked, + &wp_size, &is_write)) + continue; + + if (expect_write && !is_write) + continue; + + /* Check if the watchpoint matches the access. */ + if (matching_access(wp_addr_masked, wp_size, addr_masked, size)) + return watchpoint; + } + + return NULL; +} + +static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, + bool is_write) +{ + const int slot = watchpoint_slot(addr); + const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); + atomic_long_t *watchpoint; + int i; + + /* Check slot index logic, ensuring we stay within array bounds. */ + BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT); + BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT + 1) != 0); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, + KCSAN_CHECK_ADJACENT) != + ARRAY_SIZE(watchpoints) - 1); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, + KCSAN_CHECK_ADJACENT + 1) != + ARRAY_SIZE(watchpoints) - NUM_SLOTS); + + for (i = 0; i < NUM_SLOTS; ++i) { + long expect_val = INVALID_WATCHPOINT; + + /* Try to acquire this slot. */ + watchpoint = &watchpoints[SLOT_IDX(slot, i)]; + if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, + encoded_watchpoint)) + return watchpoint; + } + + return NULL; +} + +/* + * Return true if watchpoint was successfully consumed, false otherwise. + * + * This may return false if: + * + * 1. another thread already consumed the watchpoint; + * 2. the thread that set up the watchpoint already removed it; + * 3. the watchpoint was removed and then re-used. + */ +static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, + long encoded_watchpoint) +{ + return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, + CONSUMED_WATCHPOINT); +} + +/* + * Return true if watchpoint was not touched, false if consumed. + */ +static inline bool remove_watchpoint(atomic_long_t *watchpoint) +{ + return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != + CONSUMED_WATCHPOINT; +} + +static inline struct kcsan_ctx *get_ctx(void) +{ + /* + * In interrupt, use raw_cpu_ptr to avoid unnecessary checks, that would + * also result in calls that generate warnings in uaccess regions. + */ + return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); +} + +static inline bool is_atomic(const volatile void *ptr) +{ + struct kcsan_ctx *ctx = get_ctx(); + + if (unlikely(ctx->atomic_next > 0)) { + /* + * Because we do not have separate contexts for nested + * interrupts, in case atomic_next is set, we simply assume that + * the outer interrupt set atomic_next. In the worst case, we + * will conservatively consider operations as atomic. This is a + * reasonable trade-off to make, since this case should be + * extremely rare; however, even if extremely rare, it could + * lead to false positives otherwise. + */ + if ((hardirq_count() >> HARDIRQ_SHIFT) < 2) + --ctx->atomic_next; /* in task, or outer interrupt */ + return true; + } + if (unlikely(ctx->atomic_nest_count > 0 || ctx->in_flat_atomic)) + return true; + + return kcsan_is_atomic(ptr); +} + +static inline bool should_watch(const volatile void *ptr, int type) +{ + /* + * Never set up watchpoints when memory operations are atomic. + * + * Need to check this first, before kcsan_skip check below: (1) atomics + * should not count towards skipped instructions, and (2) to actually + * decrement kcsan_atomic_next for consecutive instruction stream. + */ + if ((type & KCSAN_ACCESS_ATOMIC) != 0 || is_atomic(ptr)) + return false; + + if (this_cpu_dec_return(kcsan_skip) >= 0) + return false; + + /* + * NOTE: If we get here, kcsan_skip must always be reset in slow path + * via reset_kcsan_skip() to avoid underflow. + */ + + /* this operation should be watched */ + return true; +} + +static inline void reset_kcsan_skip(void) +{ + long skip_count = CONFIG_KCSAN_SKIP_WATCH - + (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ? + prandom_u32_max(CONFIG_KCSAN_SKIP_WATCH) : + 0); + this_cpu_write(kcsan_skip, skip_count); +} + +static inline bool kcsan_is_enabled(void) +{ + return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0; +} + +static inline unsigned int get_delay(void) +{ + unsigned int delay = in_task() ? CONFIG_KCSAN_UDELAY_TASK : + CONFIG_KCSAN_UDELAY_INTERRUPT; + return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ? + prandom_u32_max(delay) : + 0); +} + +/* + * Pull everything together: check_access() below contains the performance + * critical operations; the fast-path (including check_access) functions should + * all be inlinable by the instrumentation functions. + * + * The slow-path (kcsan_found_watchpoint, kcsan_setup_watchpoint) are + * non-inlinable -- note that, we prefix these with "kcsan_" to ensure they can + * be filtered from the stacktrace, as well as give them unique names for the + * UACCESS whitelist of objtool. Each function uses user_access_save/restore(), + * since they do not access any user memory, but instrumentation is still + * emitted in UACCESS regions. + */ + +static noinline void kcsan_found_watchpoint(const volatile void *ptr, + size_t size, bool is_write, + atomic_long_t *watchpoint, + long encoded_watchpoint) +{ + unsigned long flags; + bool consumed; + + if (!kcsan_is_enabled()) + return; + /* + * Consume the watchpoint as soon as possible, to minimize the chances + * of !consumed. Consuming the watchpoint must always be guarded by + * kcsan_is_enabled() check, as otherwise we might erroneously + * triggering reports when disabled. + */ + consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint); + + /* keep this after try_consume_watchpoint */ + flags = user_access_save(); + + if (consumed) { + kcsan_report(ptr, size, is_write, true, raw_smp_processor_id(), + KCSAN_REPORT_CONSUMED_WATCHPOINT); + } else { + /* + * The other thread may not print any diagnostics, as it has + * already removed the watchpoint, or another thread consumed + * the watchpoint before this thread. + */ + kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES); + } + kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); + + user_access_restore(flags); +} + +static noinline void kcsan_setup_watchpoint(const volatile void *ptr, + size_t size, bool is_write) +{ + atomic_long_t *watchpoint; + union { + u8 _1; + u16 _2; + u32 _4; + u64 _8; + } expect_value; + bool value_change = false; + unsigned long ua_flags = user_access_save(); + unsigned long irq_flags; + + /* + * Always reset kcsan_skip counter in slow-path to avoid underflow; see + * should_watch(). + */ + reset_kcsan_skip(); + + if (!kcsan_is_enabled()) + goto out; + + if (!check_encodable((unsigned long)ptr, size)) { + kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES); + goto out; + } + + /* + * Disable interrupts & preemptions to avoid another thread on the same + * CPU accessing memory locations for the set up watchpoint; this is to + * avoid reporting races to e.g. CPU-local data. + * + * An alternative would be adding the source CPU to the watchpoint + * encoding, and checking that watchpoint-CPU != this-CPU. There are + * several problems with this: + * 1. we should avoid stealing more bits from the watchpoint encoding + * as it would affect accuracy, as well as increase performance + * overhead in the fast-path; + * 2. if we are preempted, but there *is* a genuine data race, we + * would *not* report it -- since this is the common case (vs. + * CPU-local data accesses), it makes more sense (from a data race + * detection point of view) to simply disable preemptions to ensure + * as many tasks as possible run on other CPUs. + */ + local_irq_save(irq_flags); + + watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); + if (watchpoint == NULL) { + /* + * Out of capacity: the size of `watchpoints`, and the frequency + * with which `should_watch()` returns true should be tweaked so + * that this case happens very rarely. + */ + kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY); + goto out_unlock; + } + + kcsan_counter_inc(KCSAN_COUNTER_SETUP_WATCHPOINTS); + kcsan_counter_inc(KCSAN_COUNTER_USED_WATCHPOINTS); + + /* + * Read the current value, to later check and infer a race if the data + * was modified via a non-instrumented access, e.g. from a device. + */ + switch (size) { + case 1: + expect_value._1 = READ_ONCE(*(const u8 *)ptr); + break; + case 2: + expect_value._2 = READ_ONCE(*(const u16 *)ptr); + break; + case 4: + expect_value._4 = READ_ONCE(*(const u32 *)ptr); + break; + case 8: + expect_value._8 = READ_ONCE(*(const u64 *)ptr); + break; + default: + break; /* ignore; we do not diff the values */ + } + + if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) { + kcsan_disable_current(); + pr_err("KCSAN: watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n", + is_write ? "write" : "read", size, ptr, + watchpoint_slot((unsigned long)ptr), + encode_watchpoint((unsigned long)ptr, size, is_write)); + kcsan_enable_current(); + } + + /* + * Delay this thread, to increase probability of observing a racy + * conflicting access. + */ + udelay(get_delay()); + + /* + * Re-read value, and check if it is as expected; if not, we infer a + * racy access. + */ + switch (size) { + case 1: + value_change = expect_value._1 != READ_ONCE(*(const u8 *)ptr); + break; + case 2: + value_change = expect_value._2 != READ_ONCE(*(const u16 *)ptr); + break; + case 4: + value_change = expect_value._4 != READ_ONCE(*(const u32 *)ptr); + break; + case 8: + value_change = expect_value._8 != READ_ONCE(*(const u64 *)ptr); + break; + default: + break; /* ignore; we do not diff the values */ + } + + /* Check if this access raced with another. */ + if (!remove_watchpoint(watchpoint)) { + /* + * No need to increment 'data_races' counter, as the racing + * thread already did. + */ + kcsan_report(ptr, size, is_write, size > 8 || value_change, + smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); + } else if (value_change) { + /* Inferring a race, since the value should not have changed. */ + kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN)) + kcsan_report(ptr, size, is_write, true, + smp_processor_id(), + KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); + } + + kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); +out_unlock: + local_irq_restore(irq_flags); +out: + user_access_restore(ua_flags); +} + +static __always_inline void check_access(const volatile void *ptr, size_t size, + int type) +{ + const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; + atomic_long_t *watchpoint; + long encoded_watchpoint; + + /* + * Avoid user_access_save in fast-path: find_watchpoint is safe without + * user_access_save, as the address that ptr points to is only used to + * check if a watchpoint exists; ptr is never dereferenced. + */ + watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write, + &encoded_watchpoint); + /* + * It is safe to check kcsan_is_enabled() after find_watchpoint in the + * slow-path, as long as no state changes that cause a data race to be + * detected and reported have occurred until kcsan_is_enabled() is + * checked. + */ + + if (unlikely(watchpoint != NULL)) + kcsan_found_watchpoint(ptr, size, is_write, watchpoint, + encoded_watchpoint); + else if (unlikely(should_watch(ptr, type))) + kcsan_setup_watchpoint(ptr, size, is_write); +} + +/* === Public interface ===================================================== */ + +void __init kcsan_init(void) +{ + BUG_ON(!in_task()); + + kcsan_debugfs_init(); + + /* + * We are in the init task, and no other tasks should be running; + * WRITE_ONCE without memory barrier is sufficient. + */ + if (IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE)) + WRITE_ONCE(kcsan_enabled, true); +} + +/* === Exported interface =================================================== */ + +void kcsan_disable_current(void) +{ + ++get_ctx()->disable_count; +} +EXPORT_SYMBOL(kcsan_disable_current); + +void kcsan_enable_current(void) +{ + if (get_ctx()->disable_count-- == 0) { + /* + * Warn if kcsan_enable_current() calls are unbalanced with + * kcsan_disable_current() calls, which causes disable_count to + * become negative and should not happen. + */ + kcsan_disable_current(); /* restore to 0, KCSAN still enabled */ + kcsan_disable_current(); /* disable to generate warning */ + WARN(1, "Unbalanced %s()", __func__); + kcsan_enable_current(); + } +} +EXPORT_SYMBOL(kcsan_enable_current); + +void kcsan_nestable_atomic_begin(void) +{ + /* + * Do *not* check and warn if we are in a flat atomic region: nestable + * and flat atomic regions are independent from each other. + * See include/linux/kcsan.h: struct kcsan_ctx comments for more + * comments. + */ + + ++get_ctx()->atomic_nest_count; +} +EXPORT_SYMBOL(kcsan_nestable_atomic_begin); + +void kcsan_nestable_atomic_end(void) +{ + if (get_ctx()->atomic_nest_count-- == 0) { + /* + * Warn if kcsan_nestable_atomic_end() calls are unbalanced with + * kcsan_nestable_atomic_begin() calls, which causes + * atomic_nest_count to become negative and should not happen. + */ + kcsan_nestable_atomic_begin(); /* restore to 0 */ + kcsan_disable_current(); /* disable to generate warning */ + WARN(1, "Unbalanced %s()", __func__); + kcsan_enable_current(); + } +} +EXPORT_SYMBOL(kcsan_nestable_atomic_end); + +void kcsan_flat_atomic_begin(void) +{ + get_ctx()->in_flat_atomic = true; +} +EXPORT_SYMBOL(kcsan_flat_atomic_begin); + +void kcsan_flat_atomic_end(void) +{ + get_ctx()->in_flat_atomic = false; +} +EXPORT_SYMBOL(kcsan_flat_atomic_end); + +void kcsan_atomic_next(int n) +{ + get_ctx()->atomic_next = n; +} +EXPORT_SYMBOL(kcsan_atomic_next); + +void __kcsan_check_access(const volatile void *ptr, size_t size, int type) +{ + check_access(ptr, size, type); +} +EXPORT_SYMBOL(__kcsan_check_access); + +/* + * KCSAN uses the same instrumentation that is emitted by supported compilers + * for ThreadSanitizer (TSAN). + * + * When enabled, the compiler emits instrumentation calls (the functions + * prefixed with "__tsan" below) for all loads and stores that it generated; + * inline asm is not instrumented. + * + * Note that, not all supported compiler versions distinguish aligned/unaligned + * accesses, but e.g. recent versions of Clang do. We simply alias the unaligned + * version to the generic version, which can handle both. + */ + +#define DEFINE_TSAN_READ_WRITE(size) \ + void __tsan_read##size(void *ptr) \ + { \ + check_access(ptr, size, 0); \ + } \ + EXPORT_SYMBOL(__tsan_read##size); \ + void __tsan_unaligned_read##size(void *ptr) \ + __alias(__tsan_read##size); \ + EXPORT_SYMBOL(__tsan_unaligned_read##size); \ + void __tsan_write##size(void *ptr) \ + { \ + check_access(ptr, size, KCSAN_ACCESS_WRITE); \ + } \ + EXPORT_SYMBOL(__tsan_write##size); \ + void __tsan_unaligned_write##size(void *ptr) \ + __alias(__tsan_write##size); \ + EXPORT_SYMBOL(__tsan_unaligned_write##size) + +DEFINE_TSAN_READ_WRITE(1); +DEFINE_TSAN_READ_WRITE(2); +DEFINE_TSAN_READ_WRITE(4); +DEFINE_TSAN_READ_WRITE(8); +DEFINE_TSAN_READ_WRITE(16); + +void __tsan_read_range(void *ptr, size_t size) +{ + check_access(ptr, size, 0); +} +EXPORT_SYMBOL(__tsan_read_range); + +void __tsan_write_range(void *ptr, size_t size) +{ + check_access(ptr, size, KCSAN_ACCESS_WRITE); +} +EXPORT_SYMBOL(__tsan_write_range); + +/* + * The below are not required by KCSAN, but can still be emitted by the + * compiler. + */ +void __tsan_func_entry(void *call_pc) +{ +} +EXPORT_SYMBOL(__tsan_func_entry); +void __tsan_func_exit(void) +{ +} +EXPORT_SYMBOL(__tsan_func_exit); +void __tsan_init(void) +{ +} +EXPORT_SYMBOL(__tsan_init); diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c new file mode 100644 index 000000000000..041d520a0183 --- /dev/null +++ b/kernel/kcsan/debugfs.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcsan.h" + +/* + * Statistics counters. + */ +static atomic_long_t counters[KCSAN_COUNTER_COUNT]; + +/* + * Addresses for filtering functions from reporting. This list can be used as a + * whitelist or blacklist. + */ +static struct { + unsigned long *addrs; /* array of addresses */ + size_t size; /* current size */ + int used; /* number of elements used */ + bool sorted; /* if elements are sorted */ + bool whitelist; /* if list is a blacklist or whitelist */ +} report_filterlist = { + .addrs = NULL, + .size = 8, /* small initial size */ + .used = 0, + .sorted = false, + .whitelist = false, /* default is blacklist */ +}; +static DEFINE_SPINLOCK(report_filterlist_lock); + +static const char *counter_to_name(enum kcsan_counter_id id) +{ + switch (id) { + case KCSAN_COUNTER_USED_WATCHPOINTS: + return "used_watchpoints"; + case KCSAN_COUNTER_SETUP_WATCHPOINTS: + return "setup_watchpoints"; + case KCSAN_COUNTER_DATA_RACES: + return "data_races"; + case KCSAN_COUNTER_NO_CAPACITY: + return "no_capacity"; + case KCSAN_COUNTER_REPORT_RACES: + return "report_races"; + case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: + return "races_unknown_origin"; + case KCSAN_COUNTER_UNENCODABLE_ACCESSES: + return "unencodable_accesses"; + case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: + return "encoding_false_positives"; + case KCSAN_COUNTER_COUNT: + BUG(); + } + return NULL; +} + +void kcsan_counter_inc(enum kcsan_counter_id id) +{ + atomic_long_inc(&counters[id]); +} + +void kcsan_counter_dec(enum kcsan_counter_id id) +{ + atomic_long_dec(&counters[id]); +} + +/* + * The microbenchmark allows benchmarking KCSAN core runtime only. To run + * multiple threads, pipe 'microbench=' from multiple tasks into the + * debugfs file. + */ +static void microbenchmark(unsigned long iters) +{ + cycles_t cycles; + + pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); + + cycles = get_cycles(); + while (iters--) { + /* + * We can run this benchmark from multiple tasks; this address + * calculation increases likelyhood of some accesses overlapping + * (they still won't conflict because all are reads). + */ + unsigned long addr = + iters % (CONFIG_KCSAN_NUM_WATCHPOINTS * PAGE_SIZE); + __kcsan_check_read((void *)addr, sizeof(long)); + } + cycles = get_cycles() - cycles; + + pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); +} + +static int cmp_filterlist_addrs(const void *rhs, const void *lhs) +{ + const unsigned long a = *(const unsigned long *)rhs; + const unsigned long b = *(const unsigned long *)lhs; + + return a < b ? -1 : a == b ? 0 : 1; +} + +bool kcsan_skip_report_debugfs(unsigned long func_addr) +{ + unsigned long symbolsize, offset; + unsigned long flags; + bool ret = false; + + if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset)) + return false; + func_addr -= offset; /* get function start */ + + spin_lock_irqsave(&report_filterlist_lock, flags); + if (report_filterlist.used == 0) + goto out; + + /* Sort array if it is unsorted, and then do a binary search. */ + if (!report_filterlist.sorted) { + sort(report_filterlist.addrs, report_filterlist.used, + sizeof(unsigned long), cmp_filterlist_addrs, NULL); + report_filterlist.sorted = true; + } + ret = !!bsearch(&func_addr, report_filterlist.addrs, + report_filterlist.used, sizeof(unsigned long), + cmp_filterlist_addrs); + if (report_filterlist.whitelist) + ret = !ret; + +out: + spin_unlock_irqrestore(&report_filterlist_lock, flags); + return ret; +} + +static void set_report_filterlist_whitelist(bool whitelist) +{ + unsigned long flags; + + spin_lock_irqsave(&report_filterlist_lock, flags); + report_filterlist.whitelist = whitelist; + spin_unlock_irqrestore(&report_filterlist_lock, flags); +} + +/* Returns 0 on success, error-code otherwise. */ +static ssize_t insert_report_filterlist(const char *func) +{ + unsigned long flags; + unsigned long addr = kallsyms_lookup_name(func); + ssize_t ret = 0; + + if (!addr) { + pr_err("KCSAN: could not find function: '%s'\n", func); + return -ENOENT; + } + + spin_lock_irqsave(&report_filterlist_lock, flags); + + if (report_filterlist.addrs == NULL) { + /* initial allocation */ + report_filterlist.addrs = + kmalloc_array(report_filterlist.size, + sizeof(unsigned long), GFP_KERNEL); + if (report_filterlist.addrs == NULL) { + ret = -ENOMEM; + goto out; + } + } else if (report_filterlist.used == report_filterlist.size) { + /* resize filterlist */ + size_t new_size = report_filterlist.size * 2; + unsigned long *new_addrs = + krealloc(report_filterlist.addrs, + new_size * sizeof(unsigned long), GFP_KERNEL); + + if (new_addrs == NULL) { + /* leave filterlist itself untouched */ + ret = -ENOMEM; + goto out; + } + + report_filterlist.size = new_size; + report_filterlist.addrs = new_addrs; + } + + /* Note: deduplicating should be done in userspace. */ + report_filterlist.addrs[report_filterlist.used++] = + kallsyms_lookup_name(func); + report_filterlist.sorted = false; + +out: + spin_unlock_irqrestore(&report_filterlist_lock, flags); + return ret; +} + +static int show_info(struct seq_file *file, void *v) +{ + int i; + unsigned long flags; + + /* show stats */ + seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled)); + for (i = 0; i < KCSAN_COUNTER_COUNT; ++i) + seq_printf(file, "%s: %ld\n", counter_to_name(i), + atomic_long_read(&counters[i])); + + /* show filter functions, and filter type */ + spin_lock_irqsave(&report_filterlist_lock, flags); + seq_printf(file, "\n%s functions: %s\n", + report_filterlist.whitelist ? "whitelisted" : "blacklisted", + report_filterlist.used == 0 ? "none" : ""); + for (i = 0; i < report_filterlist.used; ++i) + seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]); + spin_unlock_irqrestore(&report_filterlist_lock, flags); + + return 0; +} + +static int debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_info, NULL); +} + +static ssize_t debugfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + char kbuf[KSYM_NAME_LEN]; + char *arg; + int read_len = count < (sizeof(kbuf) - 1) ? count : (sizeof(kbuf) - 1); + + if (copy_from_user(kbuf, buf, read_len)) + return -EFAULT; + kbuf[read_len] = '\0'; + arg = strstrip(kbuf); + + if (!strcmp(arg, "on")) { + WRITE_ONCE(kcsan_enabled, true); + } else if (!strcmp(arg, "off")) { + WRITE_ONCE(kcsan_enabled, false); + } else if (!strncmp(arg, "microbench=", sizeof("microbench=") - 1)) { + unsigned long iters; + + if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters)) + return -EINVAL; + microbenchmark(iters); + } else if (!strcmp(arg, "whitelist")) { + set_report_filterlist_whitelist(true); + } else if (!strcmp(arg, "blacklist")) { + set_report_filterlist_whitelist(false); + } else if (arg[0] == '!') { + ssize_t ret = insert_report_filterlist(&arg[1]); + + if (ret < 0) + return ret; + } else { + return -EINVAL; + } + + return count; +} + +static const struct file_operations debugfs_ops = { .read = seq_read, + .open = debugfs_open, + .write = debugfs_write, + .release = single_release }; + +void __init kcsan_debugfs_init(void) +{ + debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops); +} diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h new file mode 100644 index 000000000000..e17bdac0e54b --- /dev/null +++ b/kernel/kcsan/encoding.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _KERNEL_KCSAN_ENCODING_H +#define _KERNEL_KCSAN_ENCODING_H + +#include +#include +#include + +#include "kcsan.h" + +#define SLOT_RANGE PAGE_SIZE +#define INVALID_WATCHPOINT 0 +#define CONSUMED_WATCHPOINT 1 + +/* + * The maximum useful size of accesses for which we set up watchpoints is the + * max range of slots we check on an access. + */ +#define MAX_ENCODABLE_SIZE (SLOT_RANGE * (1 + KCSAN_CHECK_ADJACENT)) + +/* + * Number of bits we use to store size info. + */ +#define WATCHPOINT_SIZE_BITS bits_per(MAX_ENCODABLE_SIZE) +/* + * This encoding for addresses discards the upper (1 for is-write + SIZE_BITS); + * however, most 64-bit architectures do not use the full 64-bit address space. + * Also, in order for a false positive to be observable 2 things need to happen: + * + * 1. different addresses but with the same encoded address race; + * 2. and both map onto the same watchpoint slots; + * + * Both these are assumed to be very unlikely. However, in case it still happens + * happens, the report logic will filter out the false positive (see report.c). + */ +#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG - 1 - WATCHPOINT_SIZE_BITS) + +/* + * Masks to set/retrieve the encoded data. + */ +#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG - 1) +#define WATCHPOINT_SIZE_MASK \ + GENMASK(BITS_PER_LONG - 2, BITS_PER_LONG - 2 - WATCHPOINT_SIZE_BITS) +#define WATCHPOINT_ADDR_MASK \ + GENMASK(BITS_PER_LONG - 3 - WATCHPOINT_SIZE_BITS, 0) + +static inline bool check_encodable(unsigned long addr, size_t size) +{ + return size <= MAX_ENCODABLE_SIZE; +} + +static inline long encode_watchpoint(unsigned long addr, size_t size, + bool is_write) +{ + return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) | + (size << WATCHPOINT_ADDR_BITS) | + (addr & WATCHPOINT_ADDR_MASK)); +} + +static inline bool decode_watchpoint(long watchpoint, + unsigned long *addr_masked, size_t *size, + bool *is_write) +{ + if (watchpoint == INVALID_WATCHPOINT || + watchpoint == CONSUMED_WATCHPOINT) + return false; + + *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; + *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> + WATCHPOINT_ADDR_BITS; + *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); + + return true; +} + +/* + * Return watchpoint slot for an address. + */ +static inline int watchpoint_slot(unsigned long addr) +{ + return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS; +} + +static inline bool matching_access(unsigned long addr1, size_t size1, + unsigned long addr2, size_t size2) +{ + unsigned long end_range1 = addr1 + size1 - 1; + unsigned long end_range2 = addr2 + size2 - 1; + + return addr1 <= end_range2 && addr2 <= end_range1; +} + +#endif /* _KERNEL_KCSAN_ENCODING_H */ diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h new file mode 100644 index 000000000000..1bb2f1c0d61e --- /dev/null +++ b/kernel/kcsan/kcsan.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. For more info please + * see Documentation/dev-tools/kcsan.rst. + */ + +#ifndef _KERNEL_KCSAN_KCSAN_H +#define _KERNEL_KCSAN_KCSAN_H + +#include + +/* The number of adjacent watchpoints to check. */ +#define KCSAN_CHECK_ADJACENT 1 + +/* + * Globally enable and disable KCSAN. + */ +extern bool kcsan_enabled; + +/* + * Initialize debugfs file. + */ +void kcsan_debugfs_init(void); + +enum kcsan_counter_id { + /* + * Number of watchpoints currently in use. + */ + KCSAN_COUNTER_USED_WATCHPOINTS, + + /* + * Total number of watchpoints set up. + */ + KCSAN_COUNTER_SETUP_WATCHPOINTS, + + /* + * Total number of data races. + */ + KCSAN_COUNTER_DATA_RACES, + + /* + * Number of times no watchpoints were available. + */ + KCSAN_COUNTER_NO_CAPACITY, + + /* + * A thread checking a watchpoint raced with another checking thread; + * only one will be reported. + */ + KCSAN_COUNTER_REPORT_RACES, + + /* + * Observed data value change, but writer thread unknown. + */ + KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN, + + /* + * The access cannot be encoded to a valid watchpoint. + */ + KCSAN_COUNTER_UNENCODABLE_ACCESSES, + + /* + * Watchpoint encoding caused a watchpoint to fire on mismatching + * accesses. + */ + KCSAN_COUNTER_ENCODING_FALSE_POSITIVES, + + KCSAN_COUNTER_COUNT, /* number of counters */ +}; + +/* + * Increment/decrement counter with given id; avoid calling these in fast-path. + */ +void kcsan_counter_inc(enum kcsan_counter_id id); +void kcsan_counter_dec(enum kcsan_counter_id id); + +/* + * Returns true if data races in the function symbol that maps to func_addr + * (offsets are ignored) should *not* be reported. + */ +bool kcsan_skip_report_debugfs(unsigned long func_addr); + +enum kcsan_report_type { + /* + * The thread that set up the watchpoint and briefly stalled was + * signalled that another thread triggered the watchpoint. + */ + KCSAN_REPORT_RACE_SIGNAL, + + /* + * A thread found and consumed a matching watchpoint. + */ + KCSAN_REPORT_CONSUMED_WATCHPOINT, + + /* + * No other thread was observed to race with the access, but the data + * value before and after the stall differs. + */ + KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, +}; +/* + * Print a race report from thread that encountered the race. + */ +void kcsan_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, enum kcsan_report_type type); + +#endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c new file mode 100644 index 000000000000..ead5610bafa7 --- /dev/null +++ b/kernel/kcsan/report.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include "kcsan.h" +#include "encoding.h" + +/* + * Max. number of stack entries to show in the report. + */ +#define NUM_STACK_ENTRIES 64 + +/* + * Other thread info: communicated from other racing thread to thread that set + * up the watchpoint, which then prints the complete report atomically. Only + * need one struct, as all threads should to be serialized regardless to print + * the reports, with reporting being in the slow-path. + */ +static struct { + const volatile void *ptr; + size_t size; + bool is_write; + int task_pid; + int cpu_id; + unsigned long stack_entries[NUM_STACK_ENTRIES]; + int num_stack_entries; +} other_info = { .ptr = NULL }; + +/* + * This spinlock protects reporting and other_info, since other_info is usually + * required when reporting. + */ +static DEFINE_SPINLOCK(report_lock); + +/* + * Special rules to skip reporting. + */ +static bool skip_report(bool is_write, bool value_change, + unsigned long top_frame) +{ + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && + !value_change) { + /* + * The access is a write, but the data value did not change. + * + * We opt-out of this filter for certain functions at request of + * maintainers. + */ + char buf[64]; + + snprintf(buf, sizeof(buf), "%ps", (void *)top_frame); + if (!strnstr(buf, "rcu_", sizeof(buf)) && + !strnstr(buf, "_rcu", sizeof(buf)) && + !strnstr(buf, "_srcu", sizeof(buf))) + return true; + } + + return kcsan_skip_report_debugfs(top_frame); +} + +static inline const char *get_access_type(bool is_write) +{ + return is_write ? "write" : "read"; +} + +/* Return thread description: in task or interrupt. */ +static const char *get_thread_desc(int task_id) +{ + if (task_id != -1) { + static char buf[32]; /* safe: protected by report_lock */ + + snprintf(buf, sizeof(buf), "task %i", task_id); + return buf; + } + return "interrupt"; +} + +/* Helper to skip KCSAN-related functions in stack-trace. */ +static int get_stack_skipnr(unsigned long stack_entries[], int num_entries) +{ + char buf[64]; + int skip = 0; + + for (; skip < num_entries; ++skip) { + snprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); + if (!strnstr(buf, "csan_", sizeof(buf)) && + !strnstr(buf, "tsan_", sizeof(buf)) && + !strnstr(buf, "_once_size", sizeof(buf))) { + break; + } + } + return skip; +} + +/* Compares symbolized strings of addr1 and addr2. */ +static int sym_strcmp(void *addr1, void *addr2) +{ + char buf1[64]; + char buf2[64]; + + snprintf(buf1, sizeof(buf1), "%pS", addr1); + snprintf(buf2, sizeof(buf2), "%pS", addr2); + return strncmp(buf1, buf2, sizeof(buf1)); +} + +/* + * Returns true if a report was generated, false otherwise. + */ +static bool print_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, + enum kcsan_report_type type) +{ + unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; + int num_stack_entries = + stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); + int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); + int other_skipnr; + + /* + * Must check report filter rules before starting to print. + */ + if (skip_report(is_write, true, stack_entries[skipnr])) + return false; + + if (type == KCSAN_REPORT_RACE_SIGNAL) { + other_skipnr = get_stack_skipnr(other_info.stack_entries, + other_info.num_stack_entries); + + /* value_change is only known for the other thread */ + if (skip_report(other_info.is_write, value_change, + other_info.stack_entries[other_skipnr])) + return false; + } + + /* Print report header. */ + pr_err("==================================================================\n"); + switch (type) { + case KCSAN_REPORT_RACE_SIGNAL: { + void *this_fn = (void *)stack_entries[skipnr]; + void *other_fn = (void *)other_info.stack_entries[other_skipnr]; + int cmp; + + /* + * Order functions lexographically for consistent bug titles. + * Do not print offset of functions to keep title short. + */ + cmp = sym_strcmp(other_fn, this_fn); + pr_err("BUG: KCSAN: data-race in %ps / %ps\n", + cmp < 0 ? other_fn : this_fn, + cmp < 0 ? this_fn : other_fn); + } break; + + case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: + pr_err("BUG: KCSAN: data-race in %pS\n", + (void *)stack_entries[skipnr]); + break; + + default: + BUG(); + } + + pr_err("\n"); + + /* Print information about the racing accesses. */ + switch (type) { + case KCSAN_REPORT_RACE_SIGNAL: + pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", + get_access_type(other_info.is_write), other_info.ptr, + other_info.size, get_thread_desc(other_info.task_pid), + other_info.cpu_id); + + /* Print the other thread's stack trace. */ + stack_trace_print(other_info.stack_entries + other_skipnr, + other_info.num_stack_entries - other_skipnr, + 0); + + pr_err("\n"); + pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", + get_access_type(is_write), ptr, size, + get_thread_desc(in_task() ? task_pid_nr(current) : -1), + cpu_id); + break; + + case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: + pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", + get_access_type(is_write), ptr, size, + get_thread_desc(in_task() ? task_pid_nr(current) : -1), + cpu_id); + break; + + default: + BUG(); + } + /* Print stack trace of this thread. */ + stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, + 0); + + /* Print report footer. */ + pr_err("\n"); + pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); + dump_stack_print_info(KERN_DEFAULT); + pr_err("==================================================================\n"); + + return true; +} + +static void release_report(unsigned long *flags, enum kcsan_report_type type) +{ + if (type == KCSAN_REPORT_RACE_SIGNAL) + other_info.ptr = NULL; /* mark for reuse */ + + spin_unlock_irqrestore(&report_lock, *flags); +} + +/* + * Depending on the report type either sets other_info and returns false, or + * acquires the matching other_info and returns true. If other_info is not + * required for the report type, simply acquires report_lock and returns true. + */ +static bool prepare_report(unsigned long *flags, const volatile void *ptr, + size_t size, bool is_write, int cpu_id, + enum kcsan_report_type type) +{ + if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && + type != KCSAN_REPORT_RACE_SIGNAL) { + /* other_info not required; just acquire report_lock */ + spin_lock_irqsave(&report_lock, *flags); + return true; + } + +retry: + spin_lock_irqsave(&report_lock, *flags); + + switch (type) { + case KCSAN_REPORT_CONSUMED_WATCHPOINT: + if (other_info.ptr != NULL) + break; /* still in use, retry */ + + other_info.ptr = ptr; + other_info.size = size; + other_info.is_write = is_write; + other_info.task_pid = in_task() ? task_pid_nr(current) : -1; + other_info.cpu_id = cpu_id; + other_info.num_stack_entries = stack_trace_save( + other_info.stack_entries, NUM_STACK_ENTRIES, 1); + + spin_unlock_irqrestore(&report_lock, *flags); + + /* + * The other thread will print the summary; other_info may now + * be consumed. + */ + return false; + + case KCSAN_REPORT_RACE_SIGNAL: + if (other_info.ptr == NULL) + break; /* no data available yet, retry */ + + /* + * First check if this is the other_info we are expecting, i.e. + * matches based on how watchpoint was encoded. + */ + if (!matching_access((unsigned long)other_info.ptr & + WATCHPOINT_ADDR_MASK, + other_info.size, + (unsigned long)ptr & WATCHPOINT_ADDR_MASK, + size)) + break; /* mismatching watchpoint, retry */ + + if (!matching_access((unsigned long)other_info.ptr, + other_info.size, (unsigned long)ptr, + size)) { + /* + * If the actual accesses to not match, this was a false + * positive due to watchpoint encoding. + */ + kcsan_counter_inc( + KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + + /* discard this other_info */ + release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + return false; + } + + /* + * Matching & usable access in other_info: keep other_info_lock + * locked, as this thread consumes it to print the full report; + * unlocked in release_report. + */ + return true; + + default: + BUG(); + } + + spin_unlock_irqrestore(&report_lock, *flags); + goto retry; +} + +void kcsan_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, enum kcsan_report_type type) +{ + unsigned long flags = 0; + + kcsan_disable_current(); + if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) { + if (print_report(ptr, size, is_write, value_change, cpu_id, + type) && + panic_on_warn) + panic("panic_on_warn set ...\n"); + + release_report(&flags, type); + } + kcsan_enable_current(); +} diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c new file mode 100644 index 000000000000..0bae63c5ca65 --- /dev/null +++ b/kernel/kcsan/test.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#include "encoding.h" + +#define ITERS_PER_TEST 2000 + +/* Test requirements. */ +static bool test_requires(void) +{ + /* random should be initialized for the below tests */ + return prandom_u32() + prandom_u32() != 0; +} + +/* + * Test watchpoint encode and decode: check that encoding some access's info, + * and then subsequent decode preserves the access's info. + */ +static bool test_encode_decode(void) +{ + int i; + + for (i = 0; i < ITERS_PER_TEST; ++i) { + size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1; + bool is_write = !!prandom_u32_max(2); + unsigned long addr; + + prandom_bytes(&addr, sizeof(addr)); + if (WARN_ON(!check_encodable(addr, size))) + return false; + + /* encode and decode */ + { + const long encoded_watchpoint = + encode_watchpoint(addr, size, is_write); + unsigned long verif_masked_addr; + size_t verif_size; + bool verif_is_write; + + /* check special watchpoints */ + if (WARN_ON(decode_watchpoint( + INVALID_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(decode_watchpoint( + CONSUMED_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + + /* check decoding watchpoint returns same data */ + if (WARN_ON(!decode_watchpoint( + encoded_watchpoint, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(verif_masked_addr != + (addr & WATCHPOINT_ADDR_MASK))) + goto fail; + if (WARN_ON(verif_size != size)) + goto fail; + if (WARN_ON(is_write != verif_is_write)) + goto fail; + + continue; +fail: + pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", + __func__, is_write ? "write" : "read", size, + addr, encoded_watchpoint, + verif_is_write ? "write" : "read", verif_size, + verif_masked_addr); + return false; + } + } + + return true; +} + +/* Test access matching function. */ +static bool test_matching_access(void) +{ + if (WARN_ON(!matching_access(10, 1, 10, 1))) + return false; + if (WARN_ON(!matching_access(10, 2, 11, 1))) + return false; + if (WARN_ON(!matching_access(10, 1, 9, 2))) + return false; + if (WARN_ON(matching_access(10, 1, 11, 1))) + return false; + if (WARN_ON(matching_access(9, 1, 10, 1))) + return false; + return true; +} + +static int __init kcsan_selftest(void) +{ + int passed = 0; + int total = 0; + +#define RUN_TEST(do_test) \ + do { \ + ++total; \ + if (do_test()) \ + ++passed; \ + else \ + pr_err("KCSAN selftest: " #do_test " failed"); \ + } while (0) + + RUN_TEST(test_requires); + RUN_TEST(test_encode_decode); + RUN_TEST(test_matching_access); + + pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total); + if (passed != total) + panic("KCSAN selftests failed"); + return 0; +} +postcore_initcall(kcsan_selftest); -- cgit v1.2.3 From 0ebba7141eadc4804ec5b4bb5106331b0c3abf4c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:58 +0100 Subject: build, kcsan: Add KCSAN build exceptions This blacklists several compilation units from KCSAN. See the respective inline comments for the reasoning. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- drivers/firmware/efi/libstub/Makefile | 2 ++ kernel/Makefile | 5 +++++ kernel/sched/Makefile | 6 ++++++ mm/Makefile | 8 ++++++++ 4 files changed, 21 insertions(+) (limited to 'kernel') diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ee0661ddb25b..5d0a645c0de8 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -31,7 +31,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__DISABLE_EXPORTS GCOV_PROFILE := n +# Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n +KCSAN_SANITIZE := n UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y diff --git a/kernel/Makefile b/kernel/Makefile index 74ab46e2ebd1..cc53f7c25446 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -23,6 +23,9 @@ endif # Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip() # in coverage traces. KCOV_INSTRUMENT_softirq.o := n +# Avoid KCSAN instrumentation in softirq ("No shared variables, all the data +# are CPU local" => assume no data races), to reduce overhead in interrupts. +KCSAN_SANITIZE_softirq.o = n # These are called from save_stack_trace() on slub debug path, # and produce insane amounts of uninteresting coverage. KCOV_INSTRUMENT_module.o := n @@ -30,6 +33,7 @@ KCOV_INSTRUMENT_extable.o := n # Don't self-instrument. KCOV_INSTRUMENT_kcov.o := n KASAN_SANITIZE_kcov.o := n +KCSAN_SANITIZE_kcov.o := n CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) # cond_syscall is currently not LTO compatible @@ -118,6 +122,7 @@ obj-$(CONFIG_RSEQ) += rseq.o obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o KASAN_SANITIZE_stackleak.o := n +KCSAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_stackleak.o := n $(obj)/configs.o: $(obj)/config_data.gz diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5662b5..e9307a9c54e7 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -7,6 +7,12 @@ endif # that is not a function of syscall inputs. E.g. involuntary context switches. KCOV_INSTRUMENT := n +# There are numerous races here, however, most of them due to plain accesses. +# This would make it even harder for syzbot to find reproducers, because these +# bugs trigger without specific input. Disable by default, but should re-enable +# eventually. +KCSAN_SANITIZE := n + ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond diff --git a/mm/Makefile b/mm/Makefile index d996846697ef..56c1964bb3a1 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -7,6 +7,14 @@ KASAN_SANITIZE_slab_common.o := n KASAN_SANITIZE_slab.o := n KASAN_SANITIZE_slub.o := n +# These produce frequent data race reports: most of them are due to races on +# the same word but accesses to different bits of that word. Re-enable KCSAN +# for these when we have more consensus on what to do about them. +KCSAN_SANITIZE_slab_common.o := n +KCSAN_SANITIZE_slab.o := n +KCSAN_SANITIZE_slub.o := n +KCSAN_SANITIZE_page_alloc.o := n + # These files are disabled because they produce non-interesting and/or # flaky coverage that is not a function of syscall inputs. E.g. slab is out of # free pages, or a task is migrated between nodes. -- cgit v1.2.3 From 5cbaefe9743bf14c9d3106db0cc19f8cb0a3ca22 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 10:41:43 +0100 Subject: kcsan: Improve various small stylistic details Tidy up a few bits: - Fix typos and grammar, improve wording. - Remove spurious newlines that are col80 warning artifacts where the resulting line-break is worse than the disease it's curing. - Use core kernel coding style to improve readability and reduce spurious code pattern variations. - Use better vertical alignment for structure definitions and initialization sequences. - Misc other small details. No change in functionality intended. Cc: linux-kernel@vger.kernel.org Cc: Marco Elver Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Will Deacon Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler.h | 2 +- include/linux/kcsan-checks.h | 22 ++++++--------- include/linux/kcsan.h | 23 ++++++---------- include/linux/seqlock.h | 8 +++--- kernel/kcsan/atomic.h | 2 +- kernel/kcsan/core.c | 59 ++++++++++++++++++---------------------- kernel/kcsan/debugfs.c | 62 ++++++++++++++++++++---------------------- kernel/kcsan/encoding.h | 25 +++++++++-------- kernel/kcsan/kcsan.h | 11 ++++---- kernel/kcsan/report.c | 42 ++++++++++++++-------------- kernel/kcsan/test.c | 6 ++-- kernel/sched/Makefile | 2 +- lib/Kconfig.kcsan | 16 +++++------ 15 files changed, 131 insertions(+), 153 deletions(-) (limited to 'kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9933ca8ffe16..9cfa4a5c6f42 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -226,7 +226,7 @@ config X86 select VIRT_TO_BUS select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS - select HAVE_ARCH_KCSAN if X86_64 + select HAVE_ARCH_KCSAN if X86_64 config INSTRUCTION_DECODER def_bool y diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index a213eb55e725..2cb42d8bdedc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,7 +16,7 @@ #define KASAN_ABI_VERSION 5 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* emulate gcc's __SANITIZE_ADDRESS__ flag */ +/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ #define __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d3e77781578..ad8c76144a3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr) #include /* - * data_race: macro to document that accesses in an expression may conflict with + * data_race(): macro to document that accesses in an expression may conflict with * other concurrent accesses resulting in data races, but the resulting * behaviour is deemed safe regardless. * diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index e78220661086..ef3ee233a3fa 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -8,17 +8,17 @@ /* * Access type modifiers. */ -#define KCSAN_ACCESS_WRITE 0x1 +#define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 /* - * __kcsan_*: Always calls into runtime when KCSAN is enabled. This may be used + * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used * even in compilation units that selectively disable KCSAN, but must use KCSAN - * to validate access to an address. Never use these in header files! + * to validate access to an address. Never use these in header files! */ #ifdef CONFIG_KCSAN /** - * __kcsan_check_access - check generic access for data race + * __kcsan_check_access - check generic access for data races * * @ptr address of access * @size size of access @@ -32,7 +32,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, #endif /* - * kcsan_*: Only calls into runtime when the particular compilation unit has + * kcsan_*: Only calls into the runtime when the particular compilation unit has * KCSAN instrumentation enabled. May be used in header files. */ #ifdef __SANITIZE_THREAD__ @@ -77,16 +77,12 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) /* - * Check for atomic accesses: if atomic access are not ignored, this simply - * aliases to kcsan_check_access, otherwise becomes a no-op. + * Check for atomic accesses: if atomic accesses are not ignored, this simply + * aliases to kcsan_check_access(), otherwise becomes a no-op. */ #ifdef CONFIG_KCSAN_IGNORE_ATOMICS -#define kcsan_check_atomic_read(...) \ - do { \ - } while (0) -#define kcsan_check_atomic_write(...) \ - do { \ - } while (0) +#define kcsan_check_atomic_read(...) do { } while (0) +#define kcsan_check_atomic_write(...) do { } while (0) #else #define kcsan_check_atomic_read(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 9047048fee84..1019e3a2c689 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -94,21 +94,14 @@ void kcsan_atomic_next(int n); #else /* CONFIG_KCSAN */ -static inline void kcsan_init(void) { } - -static inline void kcsan_disable_current(void) { } - -static inline void kcsan_enable_current(void) { } - -static inline void kcsan_nestable_atomic_begin(void) { } - -static inline void kcsan_nestable_atomic_end(void) { } - -static inline void kcsan_flat_atomic_begin(void) { } - -static inline void kcsan_flat_atomic_end(void) { } - -static inline void kcsan_atomic_next(int n) { } +static inline void kcsan_init(void) { } +static inline void kcsan_disable_current(void) { } +static inline void kcsan_enable_current(void) { } +static inline void kcsan_nestable_atomic_begin(void) { } +static inline void kcsan_nestable_atomic_end(void) { } +static inline void kcsan_flat_atomic_begin(void) { } +static inline void kcsan_flat_atomic_end(void) { } +static inline void kcsan_atomic_next(int n) { } #endif /* CONFIG_KCSAN */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f52c91be8939..f80d50cac199 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -48,7 +48,7 @@ * * As a consequence, we take the following best-effort approach for raw usage * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, - * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following * memory operations are considered atomic. Usage of seqlocks via seqlock_t * interface is not affected. @@ -265,7 +265,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * - * Note that, writes surrounding the barrier should be declared atomic (e.g. + * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because @@ -465,7 +465,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) { unsigned ret = read_seqcount_begin(&sl->seqcount); - kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ kcsan_flat_atomic_begin(); return ret; } @@ -473,7 +473,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* - * Assume not nested: read_seqretry may be called multiple times when + * Assume not nested: read_seqretry() may be called multiple times when * completing read critical section. */ kcsan_flat_atomic_end(); diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index c9c3fe628011..576e03ddd6a3 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -6,7 +6,7 @@ #include /* - * Helper that returns true if access to ptr should be considered as an atomic + * Helper that returns true if access to @ptr should be considered an atomic * access, even though it is not explicitly atomic. * * List all volatile globals that have been observed in races, to suppress diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index d9410d58c93e..3314fc29e236 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -19,10 +19,10 @@ bool kcsan_enabled; /* Per-CPU kcsan_ctx for interrupts */ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { - .disable_count = 0, - .atomic_next = 0, - .atomic_nest_count = 0, - .in_flat_atomic = false, + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, }; /* @@ -50,11 +50,11 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * slot=9: [10, 11, 9] * slot=63: [64, 65, 63] */ -#define NUM_SLOTS (1 + 2 * KCSAN_CHECK_ADJACENT) +#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) /* - * SLOT_IDX_FAST is used in fast-path. Not first checking the address's primary + * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary * slot (middle) is fine if we assume that data races occur rarely. The set of * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. @@ -68,9 +68,9 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * zero-initialized state matches INVALID_WATCHPOINT. * * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to - * use more complicated SLOT_IDX_FAST calculation with modulo in fast-path. + * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path. */ -static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; +static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; /* * Instructions to skip watching counter, used in should_watch(). We use a @@ -78,7 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; */ static DEFINE_PER_CPU(long, kcsan_skip); -static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, +static inline atomic_long_t *find_watchpoint(unsigned long addr, + size_t size, bool expect_write, long *encoded_watchpoint) { @@ -110,8 +111,8 @@ static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, return NULL; } -static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline atomic_long_t * +insert_watchpoint(unsigned long addr, size_t size, bool is_write) { const int slot = watchpoint_slot(addr); const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -120,21 +121,16 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, /* Check slot index logic, ensuring we stay within array bounds. */ BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT); - BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT + 1) != 0); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT) != - ARRAY_SIZE(watchpoints) - 1); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT + 1) != - ARRAY_SIZE(watchpoints) - NUM_SLOTS); + BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS); for (i = 0; i < NUM_SLOTS; ++i) { long expect_val = INVALID_WATCHPOINT; /* Try to acquire this slot. */ watchpoint = &watchpoints[SLOT_IDX(slot, i)]; - if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, - encoded_watchpoint)) + if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint)) return watchpoint; } @@ -150,11 +146,10 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, * 2. the thread that set up the watchpoint already removed it; * 3. the watchpoint was removed and then re-used. */ -static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, - long encoded_watchpoint) +static inline bool +try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) { - return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, - CONSUMED_WATCHPOINT); + return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); } /* @@ -162,14 +157,13 @@ static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, */ static inline bool remove_watchpoint(atomic_long_t *watchpoint) { - return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != - CONSUMED_WATCHPOINT; + return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; } static inline struct kcsan_ctx *get_ctx(void) { /* - * In interrupt, use raw_cpu_ptr to avoid unnecessary checks, that would + * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would * also result in calls that generate warnings in uaccess regions. */ return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); @@ -260,7 +254,8 @@ static inline unsigned int get_delay(void) */ static noinline void kcsan_found_watchpoint(const volatile void *ptr, - size_t size, bool is_write, + size_t size, + bool is_write, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -296,8 +291,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, user_access_restore(flags); } -static noinline void kcsan_setup_watchpoint(const volatile void *ptr, - size_t size, bool is_write) +static noinline void +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write) { atomic_long_t *watchpoint; union { @@ -346,8 +341,8 @@ static noinline void kcsan_setup_watchpoint(const volatile void *ptr, watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { /* - * Out of capacity: the size of `watchpoints`, and the frequency - * with which `should_watch()` returns true should be tweaked so + * Out of capacity: the size of 'watchpoints', and the frequency + * with which should_watch() returns true should be tweaked so * that this case happens very rarely. */ kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY); diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 041d520a0183..bec42dab32ee 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -24,39 +24,31 @@ static atomic_long_t counters[KCSAN_COUNTER_COUNT]; * whitelist or blacklist. */ static struct { - unsigned long *addrs; /* array of addresses */ - size_t size; /* current size */ - int used; /* number of elements used */ - bool sorted; /* if elements are sorted */ - bool whitelist; /* if list is a blacklist or whitelist */ + unsigned long *addrs; /* array of addresses */ + size_t size; /* current size */ + int used; /* number of elements used */ + bool sorted; /* if elements are sorted */ + bool whitelist; /* if list is a blacklist or whitelist */ } report_filterlist = { - .addrs = NULL, - .size = 8, /* small initial size */ - .used = 0, - .sorted = false, - .whitelist = false, /* default is blacklist */ + .addrs = NULL, + .size = 8, /* small initial size */ + .used = 0, + .sorted = false, + .whitelist = false, /* default is blacklist */ }; static DEFINE_SPINLOCK(report_filterlist_lock); static const char *counter_to_name(enum kcsan_counter_id id) { switch (id) { - case KCSAN_COUNTER_USED_WATCHPOINTS: - return "used_watchpoints"; - case KCSAN_COUNTER_SETUP_WATCHPOINTS: - return "setup_watchpoints"; - case KCSAN_COUNTER_DATA_RACES: - return "data_races"; - case KCSAN_COUNTER_NO_CAPACITY: - return "no_capacity"; - case KCSAN_COUNTER_REPORT_RACES: - return "report_races"; - case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: - return "races_unknown_origin"; - case KCSAN_COUNTER_UNENCODABLE_ACCESSES: - return "unencodable_accesses"; - case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: - return "encoding_false_positives"; + case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; + case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; + case KCSAN_COUNTER_DATA_RACES: return "data_races"; + case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; + case KCSAN_COUNTER_REPORT_RACES: return "report_races"; + case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; + case KCSAN_COUNTER_UNENCODABLE_ACCESSES: return "unencodable_accesses"; + case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: return "encoding_false_positives"; case KCSAN_COUNTER_COUNT: BUG(); } @@ -116,7 +108,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr) if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset)) return false; - func_addr -= offset; /* get function start */ + func_addr -= offset; /* Get function start */ spin_lock_irqsave(&report_filterlist_lock, flags); if (report_filterlist.used == 0) @@ -195,6 +187,7 @@ static ssize_t insert_report_filterlist(const char *func) out: spin_unlock_irqrestore(&report_filterlist_lock, flags); + return ret; } @@ -226,8 +219,8 @@ static int debugfs_open(struct inode *inode, struct file *file) return single_open(file, show_info, NULL); } -static ssize_t debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) +static ssize_t +debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { char kbuf[KSYM_NAME_LEN]; char *arg; @@ -264,10 +257,13 @@ static ssize_t debugfs_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_ops = { .read = seq_read, - .open = debugfs_open, - .write = debugfs_write, - .release = single_release }; +static const struct file_operations debugfs_ops = +{ + .read = seq_read, + .open = debugfs_open, + .write = debugfs_write, + .release = single_release +}; void __init kcsan_debugfs_init(void) { diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h index e17bdac0e54b..b63890e86449 100644 --- a/kernel/kcsan/encoding.h +++ b/kernel/kcsan/encoding.h @@ -10,7 +10,8 @@ #include "kcsan.h" #define SLOT_RANGE PAGE_SIZE -#define INVALID_WATCHPOINT 0 + +#define INVALID_WATCHPOINT 0 #define CONSUMED_WATCHPOINT 1 /* @@ -34,24 +35,24 @@ * Both these are assumed to be very unlikely. However, in case it still happens * happens, the report logic will filter out the false positive (see report.c). */ -#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG - 1 - WATCHPOINT_SIZE_BITS) +#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS) /* * Masks to set/retrieve the encoded data. */ -#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG - 1) +#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1) #define WATCHPOINT_SIZE_MASK \ - GENMASK(BITS_PER_LONG - 2, BITS_PER_LONG - 2 - WATCHPOINT_SIZE_BITS) + GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS) #define WATCHPOINT_ADDR_MASK \ - GENMASK(BITS_PER_LONG - 3 - WATCHPOINT_SIZE_BITS, 0) + GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0) static inline bool check_encodable(unsigned long addr, size_t size) { return size <= MAX_ENCODABLE_SIZE; } -static inline long encode_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline long +encode_watchpoint(unsigned long addr, size_t size, bool is_write) { return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) | (size << WATCHPOINT_ADDR_BITS) | @@ -59,17 +60,17 @@ static inline long encode_watchpoint(unsigned long addr, size_t size, } static inline bool decode_watchpoint(long watchpoint, - unsigned long *addr_masked, size_t *size, + unsigned long *addr_masked, + size_t *size, bool *is_write) { if (watchpoint == INVALID_WATCHPOINT || watchpoint == CONSUMED_WATCHPOINT) return false; - *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; - *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> - WATCHPOINT_ADDR_BITS; - *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); + *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; + *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> WATCHPOINT_ADDR_BITS; + *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); return true; } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 1bb2f1c0d61e..d3b9a96ac8a4 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -72,14 +72,14 @@ enum kcsan_counter_id { /* * Increment/decrement counter with given id; avoid calling these in fast-path. */ -void kcsan_counter_inc(enum kcsan_counter_id id); -void kcsan_counter_dec(enum kcsan_counter_id id); +extern void kcsan_counter_inc(enum kcsan_counter_id id); +extern void kcsan_counter_dec(enum kcsan_counter_id id); /* * Returns true if data races in the function symbol that maps to func_addr * (offsets are ignored) should *not* be reported. */ -bool kcsan_skip_report_debugfs(unsigned long func_addr); +extern bool kcsan_skip_report_debugfs(unsigned long func_addr); enum kcsan_report_type { /* @@ -99,10 +99,11 @@ enum kcsan_report_type { */ KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, }; + /* * Print a race report from thread that encountered the race. */ -void kcsan_report(const volatile void *ptr, size_t size, bool is_write, - bool value_change, int cpu_id, enum kcsan_report_type type); +extern void kcsan_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ead5610bafa7..0eea05a3135b 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -22,13 +22,13 @@ * the reports, with reporting being in the slow-path. */ static struct { - const volatile void *ptr; - size_t size; - bool is_write; - int task_pid; - int cpu_id; - unsigned long stack_entries[NUM_STACK_ENTRIES]; - int num_stack_entries; + const volatile void *ptr; + size_t size; + bool is_write; + int task_pid; + int cpu_id; + unsigned long stack_entries[NUM_STACK_ENTRIES]; + int num_stack_entries; } other_info = { .ptr = NULL }; /* @@ -40,8 +40,8 @@ static DEFINE_SPINLOCK(report_lock); /* * Special rules to skip reporting. */ -static bool skip_report(bool is_write, bool value_change, - unsigned long top_frame) +static bool +skip_report(bool is_write, bool value_change, unsigned long top_frame) { if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && !value_change) { @@ -105,6 +105,7 @@ static int sym_strcmp(void *addr1, void *addr2) snprintf(buf1, sizeof(buf1), "%pS", addr1); snprintf(buf2, sizeof(buf2), "%pS", addr2); + return strncmp(buf1, buf2, sizeof(buf1)); } @@ -116,8 +117,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, enum kcsan_report_type type) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; - int num_stack_entries = - stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); + int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); int other_skipnr; @@ -131,7 +131,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, other_skipnr = get_stack_skipnr(other_info.stack_entries, other_info.num_stack_entries); - /* value_change is only known for the other thread */ + /* @value_change is only known for the other thread */ if (skip_report(other_info.is_write, value_change, other_info.stack_entries[other_skipnr])) return false; @@ -241,13 +241,12 @@ retry: if (other_info.ptr != NULL) break; /* still in use, retry */ - other_info.ptr = ptr; - other_info.size = size; - other_info.is_write = is_write; - other_info.task_pid = in_task() ? task_pid_nr(current) : -1; - other_info.cpu_id = cpu_id; - other_info.num_stack_entries = stack_trace_save( - other_info.stack_entries, NUM_STACK_ENTRIES, 1); + other_info.ptr = ptr; + other_info.size = size; + other_info.is_write = is_write; + other_info.task_pid = in_task() ? task_pid_nr(current) : -1; + other_info.cpu_id = cpu_id; + other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); spin_unlock_irqrestore(&report_lock, *flags); @@ -299,6 +298,7 @@ retry: } spin_unlock_irqrestore(&report_lock, *flags); + goto retry; } @@ -309,9 +309,7 @@ void kcsan_report(const volatile void *ptr, size_t size, bool is_write, kcsan_disable_current(); if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) { - if (print_report(ptr, size, is_write, value_change, cpu_id, - type) && - panic_on_warn) + if (print_report(ptr, size, is_write, value_change, cpu_id, type) && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c index 0bae63c5ca65..cc6000239dc0 100644 --- a/kernel/kcsan/test.c +++ b/kernel/kcsan/test.c @@ -34,7 +34,7 @@ static bool test_encode_decode(void) if (WARN_ON(!check_encodable(addr, size))) return false; - /* encode and decode */ + /* Encode and decode */ { const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -42,7 +42,7 @@ static bool test_encode_decode(void) size_t verif_size; bool verif_is_write; - /* check special watchpoints */ + /* Check special watchpoints */ if (WARN_ON(decode_watchpoint( INVALID_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) @@ -52,7 +52,7 @@ static bool test_encode_decode(void) &verif_size, &verif_is_write))) return false; - /* check decoding watchpoint returns same data */ + /* Check decoding watchpoint returns same data */ if (WARN_ON(!decode_watchpoint( encoded_watchpoint, &verif_masked_addr, &verif_size, &verif_is_write))) diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9307a9c54e7..5fc9c9b70862 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -7,7 +7,7 @@ endif # that is not a function of syscall inputs. E.g. involuntary context switches. KCOV_INSTRUMENT := n -# There are numerous races here, however, most of them due to plain accesses. +# There are numerous data races here, however, most of them are due to plain accesses. # This would make it even harder for syzbot to find reproducers, because these # bugs trigger without specific input. Disable by default, but should re-enable # eventually. diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 5dd464e52ab4..3f78b1434375 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -6,7 +6,6 @@ config HAVE_ARCH_KCSAN menuconfig KCSAN bool "KCSAN: watchpoint-based dynamic data race detector" depends on HAVE_ARCH_KCSAN && !KASAN && STACKTRACE - default n help Kernel Concurrency Sanitizer is a dynamic data race detector, which uses a watchpoint-based sampling approach to detect races. See @@ -16,13 +15,12 @@ if KCSAN config KCSAN_DEBUG bool "Debugging of KCSAN internals" - default n config KCSAN_SELFTEST bool "Perform short selftests on boot" default y help - Run KCSAN selftests on boot. On test failure, causes kernel to panic. + Run KCSAN selftests on boot. On test failure, causes the kernel to panic. config KCSAN_EARLY_ENABLE bool "Early enable during boot" @@ -62,7 +60,8 @@ config KCSAN_DELAY_RANDOMIZE default y help If delays should be randomized, where the maximum is KCSAN_UDELAY_*. - If false, the chosen delays are always KCSAN_UDELAY_* defined above. + If false, the chosen delays are always the KCSAN_UDELAY_* values + as defined above. config KCSAN_SKIP_WATCH int "Skip instructions before setting up watchpoint" @@ -86,9 +85,9 @@ config KCSAN_SKIP_WATCH_RANDOMIZE # parameters, to optimize for the common use-case, we avoid this because: (a) # it would impact performance (and we want to avoid static branch for all # {READ,WRITE}_ONCE, atomic_*, bitops, etc.), and (b) complicate the design -# without real benefit. The main purpose of the below options are for use in -# fuzzer configs to control reported data races, and are not expected to be -# switched frequently by a user. +# without real benefit. The main purpose of the below options is for use in +# fuzzer configs to control reported data races, and they are not expected +# to be switched frequently by a user. config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN bool "Report races of unknown origin" @@ -103,13 +102,12 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY bool "Only report races where watcher observed a data value change" default y help - If enabled and a conflicting write is observed via watchpoint, but + If enabled and a conflicting write is observed via a watchpoint, but the data value of the memory location was observed to remain unchanged, do not report the data race. config KCSAN_IGNORE_ATOMICS bool "Do not instrument marked atomic accesses" - default n help If enabled, never instruments marked atomic accesses. This results in not reporting data races where one access is atomic and the other is -- cgit v1.2.3 From d47715f50e833f12c5e829ce9dcc4a65104fa74f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 19 Nov 2019 19:57:42 +0100 Subject: kcsan, ubsan: Make KCSAN+UBSAN work together Context: http://lkml.kernel.org/r/fb7e25d8-aba4-3dcf-7761-cb7ecb3ebb71@infradead.org Reported-by: Randy Dunlap Signed-off-by: Marco Elver Acked-by: Randy Dunlap # build-tested Signed-off-by: Paul E. McKenney --- kernel/kcsan/Makefile | 1 + lib/Makefile | 1 + 2 files changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index dd15b62ec0b5..df6b7799e492 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 KCSAN_SANITIZE := n KCOV_INSTRUMENT := n +UBSAN_SANITIZE := n CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) diff --git a/lib/Makefile b/lib/Makefile index 778ab704e3ad..9d5bda950f5f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -279,6 +279,7 @@ obj-$(CONFIG_UBSAN) += ubsan.o UBSAN_SANITIZE_ubsan.o := n KASAN_SANITIZE_ubsan.o := n +KCSAN_SANITIZE_ubsan.o := n CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN) obj-$(CONFIG_SBITMAP) += sbitmap.o -- cgit v1.2.3 From 5c361425744d1e3b03d835dde659708683ca27d1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 7 Jan 2020 17:31:04 +0100 Subject: kcsan: Prefer __always_inline for fast-path Prefer __always_inline for fast-path functions that are called outside of user_access_save, to avoid generating UACCESS warnings when optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future surprises with compiler versions that change the inlining heuristic even when optimizing for performance. Reported-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Link: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org --- kernel/kcsan/atomic.h | 2 +- kernel/kcsan/core.c | 18 +++++++++--------- kernel/kcsan/encoding.h | 14 +++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index 576e03ddd6a3..a9c193053491 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -18,7 +18,7 @@ * than cast to volatile. Eventually, we hope to be able to remove this * function. */ -static inline bool kcsan_is_atomic(const volatile void *ptr) +static __always_inline bool kcsan_is_atomic(const volatile void *ptr) { /* only jiffies for now */ return ptr == &jiffies; diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 3314fc29e236..4d4ab5c5dc53 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -78,10 +78,10 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; */ static DEFINE_PER_CPU(long, kcsan_skip); -static inline atomic_long_t *find_watchpoint(unsigned long addr, - size_t size, - bool expect_write, - long *encoded_watchpoint) +static __always_inline atomic_long_t *find_watchpoint(unsigned long addr, + size_t size, + bool expect_write, + long *encoded_watchpoint) { const int slot = watchpoint_slot(addr); const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK; @@ -146,7 +146,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write) * 2. the thread that set up the watchpoint already removed it; * 3. the watchpoint was removed and then re-used. */ -static inline bool +static __always_inline bool try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) { return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); @@ -160,7 +160,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint) return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; } -static inline struct kcsan_ctx *get_ctx(void) +static __always_inline struct kcsan_ctx *get_ctx(void) { /* * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would @@ -169,7 +169,7 @@ static inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } -static inline bool is_atomic(const volatile void *ptr) +static __always_inline bool is_atomic(const volatile void *ptr) { struct kcsan_ctx *ctx = get_ctx(); @@ -193,7 +193,7 @@ static inline bool is_atomic(const volatile void *ptr) return kcsan_is_atomic(ptr); } -static inline bool should_watch(const volatile void *ptr, int type) +static __always_inline bool should_watch(const volatile void *ptr, int type) { /* * Never set up watchpoints when memory operations are atomic. @@ -226,7 +226,7 @@ static inline void reset_kcsan_skip(void) this_cpu_write(kcsan_skip, skip_count); } -static inline bool kcsan_is_enabled(void) +static __always_inline bool kcsan_is_enabled(void) { return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0; } diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h index b63890e86449..f03562aaf2eb 100644 --- a/kernel/kcsan/encoding.h +++ b/kernel/kcsan/encoding.h @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write) (addr & WATCHPOINT_ADDR_MASK)); } -static inline bool decode_watchpoint(long watchpoint, - unsigned long *addr_masked, - size_t *size, - bool *is_write) +static __always_inline bool decode_watchpoint(long watchpoint, + unsigned long *addr_masked, + size_t *size, + bool *is_write) { if (watchpoint == INVALID_WATCHPOINT || watchpoint == CONSUMED_WATCHPOINT) @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint, /* * Return watchpoint slot for an address. */ -static inline int watchpoint_slot(unsigned long addr) +static __always_inline int watchpoint_slot(unsigned long addr) { return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS; } -static inline bool matching_access(unsigned long addr1, size_t size1, - unsigned long addr2, size_t size2) +static __always_inline bool matching_access(unsigned long addr1, size_t size1, + unsigned long addr2, size_t size2) { unsigned long end_range1 = addr1 + size1 - 1; unsigned long end_range2 = addr2 + size2 - 1; -- cgit v1.2.3 From 47144eca282189afcf34ef25aee8408c168765d4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 10 Jan 2020 19:48:33 +0100 Subject: kcsan: Show full access type in report This commit adds access-type information to KCSAN's reports as follows: "read", "read (marked)", "write", and "write (marked)". Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/core.c | 15 ++++++++------- kernel/kcsan/kcsan.h | 2 +- kernel/kcsan/report.c | 43 ++++++++++++++++++++++++++++--------------- 3 files changed, 37 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 4d4ab5c5dc53..87bf857c8893 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -255,7 +255,7 @@ static inline unsigned int get_delay(void) static noinline void kcsan_found_watchpoint(const volatile void *ptr, size_t size, - bool is_write, + int type, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -276,7 +276,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, flags = user_access_save(); if (consumed) { - kcsan_report(ptr, size, is_write, true, raw_smp_processor_id(), + kcsan_report(ptr, size, type, true, raw_smp_processor_id(), KCSAN_REPORT_CONSUMED_WATCHPOINT); } else { /* @@ -292,8 +292,9 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, } static noinline void -kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write) +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) { + const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; atomic_long_t *watchpoint; union { u8 _1; @@ -415,13 +416,13 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write) * No need to increment 'data_races' counter, as the racing * thread already did. */ - kcsan_report(ptr, size, is_write, size > 8 || value_change, + kcsan_report(ptr, size, type, size > 8 || value_change, smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); } else if (value_change) { /* Inferring a race, since the value should not have changed. */ kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN)) - kcsan_report(ptr, size, is_write, true, + kcsan_report(ptr, size, type, true, smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } @@ -455,10 +456,10 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, */ if (unlikely(watchpoint != NULL)) - kcsan_found_watchpoint(ptr, size, is_write, watchpoint, + kcsan_found_watchpoint(ptr, size, type, watchpoint, encoded_watchpoint); else if (unlikely(should_watch(ptr, type))) - kcsan_setup_watchpoint(ptr, size, is_write); + kcsan_setup_watchpoint(ptr, size, type); } /* === Public interface ===================================================== */ diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index d3b9a96ac8a4..8492da45494b 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -103,7 +103,7 @@ enum kcsan_report_type { /* * Print a race report from thread that encountered the race. */ -extern void kcsan_report(const volatile void *ptr, size_t size, bool is_write, +extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, bool value_change, int cpu_id, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 0eea05a3135b..9f503ca2ff7a 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -24,7 +24,7 @@ static struct { const volatile void *ptr; size_t size; - bool is_write; + int access_type; int task_pid; int cpu_id; unsigned long stack_entries[NUM_STACK_ENTRIES]; @@ -41,8 +41,10 @@ static DEFINE_SPINLOCK(report_lock); * Special rules to skip reporting. */ static bool -skip_report(bool is_write, bool value_change, unsigned long top_frame) +skip_report(int access_type, bool value_change, unsigned long top_frame) { + const bool is_write = (access_type & KCSAN_ACCESS_WRITE) != 0; + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && !value_change) { /* @@ -63,9 +65,20 @@ skip_report(bool is_write, bool value_change, unsigned long top_frame) return kcsan_skip_report_debugfs(top_frame); } -static inline const char *get_access_type(bool is_write) +static const char *get_access_type(int type) { - return is_write ? "write" : "read"; + switch (type) { + case 0: + return "read"; + case KCSAN_ACCESS_ATOMIC: + return "read (marked)"; + case KCSAN_ACCESS_WRITE: + return "write"; + case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "write (marked)"; + default: + BUG(); + } } /* Return thread description: in task or interrupt. */ @@ -112,7 +125,7 @@ static int sym_strcmp(void *addr1, void *addr2) /* * Returns true if a report was generated, false otherwise. */ -static bool print_report(const volatile void *ptr, size_t size, bool is_write, +static bool print_report(const volatile void *ptr, size_t size, int access_type, bool value_change, int cpu_id, enum kcsan_report_type type) { @@ -124,7 +137,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, /* * Must check report filter rules before starting to print. */ - if (skip_report(is_write, true, stack_entries[skipnr])) + if (skip_report(access_type, true, stack_entries[skipnr])) return false; if (type == KCSAN_REPORT_RACE_SIGNAL) { @@ -132,7 +145,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, other_info.num_stack_entries); /* @value_change is only known for the other thread */ - if (skip_report(other_info.is_write, value_change, + if (skip_report(other_info.access_type, value_change, other_info.stack_entries[other_skipnr])) return false; } @@ -170,7 +183,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, switch (type) { case KCSAN_REPORT_RACE_SIGNAL: pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(other_info.is_write), other_info.ptr, + get_access_type(other_info.access_type), other_info.ptr, other_info.size, get_thread_desc(other_info.task_pid), other_info.cpu_id); @@ -181,14 +194,14 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, pr_err("\n"); pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(is_write), ptr, size, + get_access_type(access_type), ptr, size, get_thread_desc(in_task() ? task_pid_nr(current) : -1), cpu_id); break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(is_write), ptr, size, + get_access_type(access_type), ptr, size, get_thread_desc(in_task() ? task_pid_nr(current) : -1), cpu_id); break; @@ -223,7 +236,7 @@ static void release_report(unsigned long *flags, enum kcsan_report_type type) * required for the report type, simply acquires report_lock and returns true. */ static bool prepare_report(unsigned long *flags, const volatile void *ptr, - size_t size, bool is_write, int cpu_id, + size_t size, int access_type, int cpu_id, enum kcsan_report_type type) { if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && @@ -243,7 +256,7 @@ retry: other_info.ptr = ptr; other_info.size = size; - other_info.is_write = is_write; + other_info.access_type = access_type; other_info.task_pid = in_task() ? task_pid_nr(current) : -1; other_info.cpu_id = cpu_id; other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); @@ -302,14 +315,14 @@ retry: goto retry; } -void kcsan_report(const volatile void *ptr, size_t size, bool is_write, +void kcsan_report(const volatile void *ptr, size_t size, int access_type, bool value_change, int cpu_id, enum kcsan_report_type type) { unsigned long flags = 0; kcsan_disable_current(); - if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) { - if (print_report(ptr, size, is_write, value_change, cpu_id, type) && panic_on_warn) + if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { + if (print_report(ptr, size, access_type, value_change, cpu_id, type) && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); -- cgit v1.2.3 From 05f9a4067964e3f864210271a6299f13d2eeea55 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 10 Jan 2020 19:48:34 +0100 Subject: kcsan: Rate-limit reporting per data races KCSAN data-race reports can occur quite frequently, so much so as to render the system useless. This commit therefore adds support for time-based rate-limiting KCSAN reports, with the time interval specified by a new KCSAN_REPORT_ONCE_IN_MS Kconfig option. The default is 3000 milliseconds, also known as three seconds. Because KCSAN must detect data races in allocators and in other contexts where use of allocation is ill-advised, a fixed-size array is used to buffer reports during each reporting interval. To reduce the number of reports lost due to array overflow, this commit stores only one instance of duplicate reports, which has the benefit of further reducing KCSAN's console output rate. Reported-by: Qian Cai Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/report.c | 110 +++++++++++++++++++++++++++++++++++++++++++++----- lib/Kconfig.kcsan | 10 +++++ 2 files changed, 110 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 9f503ca2ff7a..b5b4feea49de 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -31,12 +32,99 @@ static struct { int num_stack_entries; } other_info = { .ptr = NULL }; +/* + * Information about reported data races; used to rate limit reporting. + */ +struct report_time { + /* + * The last time the data race was reported. + */ + unsigned long time; + + /* + * The frames of the 2 threads; if only 1 thread is known, one frame + * will be 0. + */ + unsigned long frame1; + unsigned long frame2; +}; + +/* + * Since we also want to be able to debug allocators with KCSAN, to avoid + * deadlock, report_times cannot be dynamically resized with krealloc in + * rate_limit_report. + * + * Therefore, we use a fixed-size array, which at most will occupy a page. This + * still adequately rate limits reports, assuming that a) number of unique data + * races is not excessive, and b) occurrence of unique data races within the + * same time window is limited. + */ +#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) +#define REPORT_TIMES_SIZE \ + (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ + REPORT_TIMES_MAX : \ + CONFIG_KCSAN_REPORT_ONCE_IN_MS) +static struct report_time report_times[REPORT_TIMES_SIZE]; + /* * This spinlock protects reporting and other_info, since other_info is usually * required when reporting. */ static DEFINE_SPINLOCK(report_lock); +/* + * Checks if the data race identified by thread frames frame1 and frame2 has + * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). + */ +static bool rate_limit_report(unsigned long frame1, unsigned long frame2) +{ + struct report_time *use_entry = &report_times[0]; + unsigned long invalid_before; + int i; + + BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); + + if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) + return false; + + invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); + + /* Check if a matching data race report exists. */ + for (i = 0; i < REPORT_TIMES_SIZE; ++i) { + struct report_time *rt = &report_times[i]; + + /* + * Must always select an entry for use to store info as we + * cannot resize report_times; at the end of the scan, use_entry + * will be the oldest entry, which ideally also happened before + * KCSAN_REPORT_ONCE_IN_MS ago. + */ + if (time_before(rt->time, use_entry->time)) + use_entry = rt; + + /* + * Initially, no need to check any further as this entry as well + * as following entries have never been used. + */ + if (rt->time == 0) + break; + + /* Check if entry expired. */ + if (time_before(rt->time, invalid_before)) + continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ + + /* Reported recently, check if data race matches. */ + if ((rt->frame1 == frame1 && rt->frame2 == frame2) || + (rt->frame1 == frame2 && rt->frame2 == frame1)) + return true; + } + + use_entry->time = jiffies; + use_entry->frame1 = frame1; + use_entry->frame2 = frame2; + return false; +} + /* * Special rules to skip reporting. */ @@ -132,7 +220,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); - int other_skipnr; + unsigned long this_frame = stack_entries[skipnr]; + unsigned long other_frame = 0; + int other_skipnr = 0; /* silence uninit warnings */ /* * Must check report filter rules before starting to print. @@ -143,34 +233,34 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, if (type == KCSAN_REPORT_RACE_SIGNAL) { other_skipnr = get_stack_skipnr(other_info.stack_entries, other_info.num_stack_entries); + other_frame = other_info.stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ - if (skip_report(other_info.access_type, value_change, - other_info.stack_entries[other_skipnr])) + if (skip_report(other_info.access_type, value_change, other_frame)) return false; } + if (rate_limit_report(this_frame, other_frame)) + return false; + /* Print report header. */ pr_err("==================================================================\n"); switch (type) { case KCSAN_REPORT_RACE_SIGNAL: { - void *this_fn = (void *)stack_entries[skipnr]; - void *other_fn = (void *)other_info.stack_entries[other_skipnr]; int cmp; /* * Order functions lexographically for consistent bug titles. * Do not print offset of functions to keep title short. */ - cmp = sym_strcmp(other_fn, this_fn); + cmp = sym_strcmp((void *)other_frame, (void *)this_frame); pr_err("BUG: KCSAN: data-race in %ps / %ps\n", - cmp < 0 ? other_fn : this_fn, - cmp < 0 ? this_fn : other_fn); + (void *)(cmp < 0 ? other_frame : this_frame), + (void *)(cmp < 0 ? this_frame : other_frame)); } break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: - pr_err("BUG: KCSAN: data-race in %pS\n", - (void *)stack_entries[skipnr]); + pr_err("BUG: KCSAN: data-race in %pS\n", (void *)this_frame); break; default: diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 3f78b1434375..3552990abcfe 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -81,6 +81,16 @@ config KCSAN_SKIP_WATCH_RANDOMIZE KCSAN_WATCH_SKIP. If false, the chosen value is always KCSAN_WATCH_SKIP. +config KCSAN_REPORT_ONCE_IN_MS + int "Duration in milliseconds, in which any given data race is only reported once" + default 3000 + help + Any given data race is only reported once in the defined time window. + Different data races may still generate reports within a duration + that is smaller than the duration defined here. This allows rate + limiting reporting to avoid flooding the console with reports. + Setting this to 0 disables rate limiting. + # Note that, while some of the below options could be turned into boot # parameters, to optimize for the common use-case, we avoid this because: (a) # it would impact performance (and we want to avoid static branch for all -- cgit v1.2.3 From f1bc96210c6a6b853b4b2eec808141956e8fbc5d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 15 Jan 2020 17:25:12 +0100 Subject: kcsan: Make KCSAN compatible with lockdep We must avoid any recursion into lockdep if KCSAN is enabled on utilities used by lockdep. One manifestation of this is corruption of lockdep's IRQ trace state (if TRACE_IRQFLAGS), resulting in spurious warnings (see below). This commit fixes this by: 1. Using raw_local_irq{save,restore} in kcsan_setup_watchpoint(). 2. Disabling lockdep in kcsan_report(). Tested with: CONFIG_LOCKDEP=y CONFIG_DEBUG_LOCKDEP=y CONFIG_TRACE_IRQFLAGS=y This fix eliminates spurious warnings such as the following one: WARNING: CPU: 0 PID: 2 at kernel/locking/lockdep.c:4406 check_flags.part.0+0x101/0x220 Modules linked in: CPU: 0 PID: 2 Comm: kthreadd Not tainted 5.5.0-rc1+ #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:check_flags.part.0+0x101/0x220 Call Trace: lock_is_held_type+0x69/0x150 freezer_fork+0x20b/0x370 cgroup_post_fork+0x2c9/0x5c0 copy_process+0x2675/0x3b40 _do_fork+0xbe/0xa30 ? _raw_spin_unlock_irqrestore+0x40/0x50 ? match_held_lock+0x56/0x250 ? kthread_park+0xf0/0xf0 kernel_thread+0xa6/0xd0 ? kthread_park+0xf0/0xf0 kthreadd+0x321/0x3d0 ? kthread_create_on_cpu+0x130/0x130 ret_from_fork+0x3a/0x50 irq event stamp: 64 hardirqs last enabled at (63): [] _raw_spin_unlock_irqrestore+0x40/0x50 hardirqs last disabled at (64): [] kcsan_setup_watchpoint+0x92/0x460 softirqs last enabled at (32): [] fpu__copy+0xe8/0x470 softirqs last disabled at (30): [] fpu__copy+0x69/0x470 Reported-by: Qian Cai Signed-off-by: Marco Elver Acked-by: Alexander Potapenko Tested-by: Qian Cai Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/core.c | 6 ++++-- kernel/kcsan/report.c | 11 +++++++++++ kernel/locking/Makefile | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 87bf857c8893..64b30f7716a1 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -336,8 +336,10 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * CPU-local data accesses), it makes more sense (from a data race * detection point of view) to simply disable preemptions to ensure * as many tasks as possible run on other CPUs. + * + * Use raw versions, to avoid lockdep recursion via IRQ flags tracing. */ - local_irq_save(irq_flags); + raw_local_irq_save(irq_flags); watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { @@ -429,7 +431,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: - local_irq_restore(irq_flags); + raw_local_irq_restore(irq_flags); out: user_access_restore(ua_flags); } diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index b5b4feea49de..33bdf8b229b5 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -410,6 +411,14 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, { unsigned long flags = 0; + /* + * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if + * we do not turn off lockdep here; this could happen due to recursion + * into lockdep via KCSAN if we detect a data race in utilities used by + * lockdep. + */ + lockdep_off(); + kcsan_disable_current(); if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { if (print_report(ptr, size, access_type, value_change, cpu_id, type) && panic_on_warn) @@ -418,4 +427,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, release_report(&flags, type); } kcsan_enable_current(); + + lockdep_on(); } diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 45452facff3b..6d11cfb9b41f 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -5,6 +5,9 @@ KCOV_INSTRUMENT := n obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o +# Avoid recursion lockdep -> KCSAN -> ... -> lockdep. +KCSAN_SANITIZE_lockdep.o := n + ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE) -- cgit v1.2.3 From ad4f8eeca8eaa24afb6059c241a2f4baf86378f3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 29 Jan 2020 16:01:02 +0100 Subject: kcsan: Address missing case with KCSAN_REPORT_VALUE_CHANGE_ONLY Even with KCSAN_REPORT_VALUE_CHANGE_ONLY, KCSAN still reports data races between reads and watchpointed writes, even if the writes wrote values already present. This commit causes KCSAN to unconditionally skip reporting in this case. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/report.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 33bdf8b229b5..7cd34285df74 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -130,12 +130,25 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) * Special rules to skip reporting. */ static bool -skip_report(int access_type, bool value_change, unsigned long top_frame) +skip_report(bool value_change, unsigned long top_frame) { - const bool is_write = (access_type & KCSAN_ACCESS_WRITE) != 0; - - if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && - !value_change) { + /* + * The first call to skip_report always has value_change==true, since we + * cannot know the value written of an instrumented access. For the 2nd + * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: + * + * 1. read watchpoint, conflicting write (value_change==true): report; + * 2. read watchpoint, conflicting write (value_change==false): skip; + * 3. write watchpoint, conflicting write (value_change==true): report; + * 4. write watchpoint, conflicting write (value_change==false): skip; + * 5. write watchpoint, conflicting read (value_change==false): skip; + * 6. write watchpoint, conflicting read (value_change==true): impossible; + * + * Cases 1-4 are intuitive and expected; case 5 ensures we do not report + * data races where the write may have rewritten the same value; and + * case 6 is simply impossible. + */ + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && !value_change) { /* * The access is a write, but the data value did not change. * @@ -228,7 +241,7 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, /* * Must check report filter rules before starting to print. */ - if (skip_report(access_type, true, stack_entries[skipnr])) + if (skip_report(true, stack_entries[skipnr])) return false; if (type == KCSAN_REPORT_RACE_SIGNAL) { @@ -237,7 +250,7 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, other_frame = other_info.stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ - if (skip_report(other_info.access_type, value_change, other_frame)) + if (skip_report(value_change, other_frame)) return false; } -- cgit v1.2.3 From 1e6ee2f0fe8ae682757960edf455e99f611268a0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 4 Feb 2020 18:21:10 +0100 Subject: kcsan: Add option to assume plain aligned writes up to word size are atomic This adds option KCSAN_ASSUME_PLAIN_WRITES_ATOMIC. If enabled, plain aligned writes up to word size are assumed to be atomic, and also not subject to other unsafe compiler optimizations resulting in data races. This option has been enabled by default to reflect current kernel-wide preferences. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/core.c | 22 +++++++++++++++++----- lib/Kconfig.kcsan | 27 ++++++++++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 64b30f7716a1..e3c7d8f34f2f 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -169,10 +170,20 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } -static __always_inline bool is_atomic(const volatile void *ptr) +static __always_inline bool +is_atomic(const volatile void *ptr, size_t size, int type) { - struct kcsan_ctx *ctx = get_ctx(); + struct kcsan_ctx *ctx; + + if ((type & KCSAN_ACCESS_ATOMIC) != 0) + return true; + if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && + (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) && + IS_ALIGNED((unsigned long)ptr, size)) + return true; /* Assume aligned writes up to word size are atomic. */ + + ctx = get_ctx(); if (unlikely(ctx->atomic_next > 0)) { /* * Because we do not have separate contexts for nested @@ -193,7 +204,8 @@ static __always_inline bool is_atomic(const volatile void *ptr) return kcsan_is_atomic(ptr); } -static __always_inline bool should_watch(const volatile void *ptr, int type) +static __always_inline bool +should_watch(const volatile void *ptr, size_t size, int type) { /* * Never set up watchpoints when memory operations are atomic. @@ -202,7 +214,7 @@ static __always_inline bool should_watch(const volatile void *ptr, int type) * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if ((type & KCSAN_ACCESS_ATOMIC) != 0 || is_atomic(ptr)) + if (is_atomic(ptr, size, type)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -460,7 +472,7 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, if (unlikely(watchpoint != NULL)) kcsan_found_watchpoint(ptr, size, type, watchpoint, encoded_watchpoint); - else if (unlikely(should_watch(ptr, type))) + else if (unlikely(should_watch(ptr, size, type))) kcsan_setup_watchpoint(ptr, size, type); } diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 3552990abcfe..66126853dab0 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -91,13 +91,13 @@ config KCSAN_REPORT_ONCE_IN_MS limiting reporting to avoid flooding the console with reports. Setting this to 0 disables rate limiting. -# Note that, while some of the below options could be turned into boot -# parameters, to optimize for the common use-case, we avoid this because: (a) -# it would impact performance (and we want to avoid static branch for all -# {READ,WRITE}_ONCE, atomic_*, bitops, etc.), and (b) complicate the design -# without real benefit. The main purpose of the below options is for use in -# fuzzer configs to control reported data races, and they are not expected -# to be switched frequently by a user. +# The main purpose of the below options is to control reported data races (e.g. +# in fuzzer configs), and are not expected to be switched frequently by other +# users. We could turn some of them into boot parameters, but given they should +# not be switched normally, let's keep them here to simplify configuration. +# +# The defaults below are chosen to be very conservative, and may miss certain +# bugs. config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN bool "Report races of unknown origin" @@ -116,6 +116,19 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY the data value of the memory location was observed to remain unchanged, do not report the data race. +config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC + bool "Assume that plain aligned writes up to word size are atomic" + default y + help + Assume that plain aligned writes up to word size are atomic by + default, and also not subject to other unsafe compiler optimizations + resulting in data races. This will cause KCSAN to not report data + races due to conflicts where the only plain accesses are aligned + writes up to word size: conflicts between marked reads and plain + aligned writes up to word size will not be reported as data races; + notice that data races between two conflicting plain aligned writes + will also not be reported. + config KCSAN_IGNORE_ATOMICS bool "Do not instrument marked atomic accesses" help -- cgit v1.2.3 From ed95f95c86cd53621103d865d62b5e1f96e60edb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 5 Feb 2020 11:14:19 +0100 Subject: kcsan: Fix 0-sized checks Instrumentation of arbitrary memory-copy functions, such as user-copies, may be called with size of 0, which could lead to false positives. To avoid this, add a comparison in check_access() for size==0, which will be optimized out for constant sized instrumentation (__tsan_{read,write}N), and therefore not affect the common-case fast-path. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/core.c | 7 +++++++ kernel/kcsan/test.c | 10 ++++++++++ 2 files changed, 17 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index e3c7d8f34f2f..82c2bef827d4 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -455,6 +455,13 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, atomic_long_t *watchpoint; long encoded_watchpoint; + /* + * Do nothing for 0 sized check; this comparison will be optimized out + * for constant sized instrumentation (__tsan_{read,write}N). + */ + if (unlikely(size == 0)) + return; + /* * Avoid user_access_save in fast-path: find_watchpoint is safe without * user_access_save, as the address that ptr points to is only used to diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c index cc6000239dc0..d26a052d3383 100644 --- a/kernel/kcsan/test.c +++ b/kernel/kcsan/test.c @@ -92,6 +92,16 @@ static bool test_matching_access(void) return false; if (WARN_ON(matching_access(9, 1, 10, 1))) return false; + + /* + * An access of size 0 could match another access, as demonstrated here. + * Rather than add more comparisons to 'matching_access()', which would + * end up in the fast-path for *all* checks, check_access() simply + * returns for all accesses of size 0. + */ + if (WARN_ON(!matching_access(8, 8, 12, 0))) + return false; + return true; } -- cgit v1.2.3 From d591ec3db75f9eadfa7976ff8796c674c0027715 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Feb 2020 16:46:24 +0100 Subject: kcsan: Introduce KCSAN_ACCESS_ASSERT access type The KCSAN_ACCESS_ASSERT access type may be used to introduce dummy reads and writes to assert certain properties of concurrent code, where bugs could not be detected as normal data races. For example, a variable that is only meant to be written by a single CPU, but may be read (without locking) by other CPUs must still be marked properly to avoid data races. However, concurrent writes, regardless if WRITE_ONCE() or not, would be a bug. Using kcsan_check_access(&x, sizeof(x), KCSAN_ACCESS_ASSERT) would allow catching such bugs. To support KCSAN_ACCESS_ASSERT the following notable changes were made: * If an access is of type KCSAN_ASSERT_ACCESS, disable various filters that only apply to data races, so that all races that KCSAN observes are reported. * Bug reports that involve an ASSERT access type will be reported as "KCSAN: assert: race in ..." instead of "data-race"; this will help more easily distinguish them. * Update a few comments to just mention 'races' where we do not always mean pure data races. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 18 ++++++++++++------ kernel/kcsan/core.c | 44 ++++++++++++++++++++++++++++++++++++++------ kernel/kcsan/debugfs.c | 1 + kernel/kcsan/kcsan.h | 7 +++++++ kernel/kcsan/report.c | 43 +++++++++++++++++++++++++++++++------------ lib/Kconfig.kcsan | 24 ++++++++++++++---------- 6 files changed, 103 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index ef3ee233a3fa..5dcadc221026 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -6,10 +6,16 @@ #include /* - * Access type modifiers. + * ACCESS TYPE MODIFIERS + * + * : normal read access; + * WRITE : write access; + * ATOMIC: access is atomic; + * ASSERT: access is not a regular access, but an assertion; */ #define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 +#define KCSAN_ACCESS_ASSERT 0x4 /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -18,7 +24,7 @@ */ #ifdef CONFIG_KCSAN /** - * __kcsan_check_access - check generic access for data races + * __kcsan_check_access - check generic access for races * * @ptr address of access * @size size of access @@ -43,7 +49,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #endif /** - * __kcsan_check_read - check regular read access for data races + * __kcsan_check_read - check regular read access for races * * @ptr address of access * @size size of access @@ -51,7 +57,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0) /** - * __kcsan_check_write - check regular write access for data races + * __kcsan_check_write - check regular write access for races * * @ptr address of access * @size size of access @@ -60,7 +66,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) /** - * kcsan_check_read - check regular read access for data races + * kcsan_check_read - check regular read access for races * * @ptr address of access * @size size of access @@ -68,7 +74,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0) /** - * kcsan_check_write - check regular write access for data races + * kcsan_check_write - check regular write access for races * * @ptr address of access * @size size of access diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 82c2bef827d4..87ef01e40199 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -56,7 +56,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { /* * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary - * slot (middle) is fine if we assume that data races occur rarely. The set of + * slot (middle) is fine if we assume that races occur rarely. The set of * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. */ @@ -178,6 +178,14 @@ is_atomic(const volatile void *ptr, size_t size, int type) if ((type & KCSAN_ACCESS_ATOMIC) != 0) return true; + /* + * Unless explicitly declared atomic, never consider an assertion access + * as atomic. This allows using them also in atomic regions, such as + * seqlocks, without implicitly changing their semantics. + */ + if ((type & KCSAN_ACCESS_ASSERT) != 0) + return false; + if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) && IS_ALIGNED((unsigned long)ptr, size)) @@ -298,7 +306,11 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, */ kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES); } - kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); + + if ((type & KCSAN_ACCESS_ASSERT) != 0) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + else + kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); user_access_restore(flags); } @@ -307,6 +319,7 @@ static noinline void kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) { const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; + const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; atomic_long_t *watchpoint; union { u8 _1; @@ -429,13 +442,32 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) /* * No need to increment 'data_races' counter, as the racing * thread already did. + * + * Count 'assert_failures' for each failed ASSERT access, + * therefore both this thread and the racing thread may + * increment this counter. */ - kcsan_report(ptr, size, type, size > 8 || value_change, - smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); + if (is_assert) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + + /* + * - If we were not able to observe a value change due to size + * constraints, always assume a value change. + * - If the access type is an assertion, we also always assume a + * value change to always report the race. + */ + value_change = value_change || size > 8 || is_assert; + + kcsan_report(ptr, size, type, value_change, smp_processor_id(), + KCSAN_REPORT_RACE_SIGNAL); } else if (value_change) { /* Inferring a race, since the value should not have changed. */ + kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); - if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN)) + if (is_assert) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, true, smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); @@ -471,7 +503,7 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, &encoded_watchpoint); /* * It is safe to check kcsan_is_enabled() after find_watchpoint in the - * slow-path, as long as no state changes that cause a data race to be + * slow-path, as long as no state changes that cause a race to be * detected and reported have occurred until kcsan_is_enabled() is * checked. */ diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index bec42dab32ee..a9dad44130e6 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -44,6 +44,7 @@ static const char *counter_to_name(enum kcsan_counter_id id) case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; case KCSAN_COUNTER_DATA_RACES: return "data_races"; + case KCSAN_COUNTER_ASSERT_FAILURES: return "assert_failures"; case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; case KCSAN_COUNTER_REPORT_RACES: return "report_races"; case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 8492da45494b..50078e7d43c3 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -39,6 +39,13 @@ enum kcsan_counter_id { */ KCSAN_COUNTER_DATA_RACES, + /* + * Total number of ASSERT failures due to races. If the observed race is + * due to two conflicting ASSERT type accesses, then both will be + * counted. + */ + KCSAN_COUNTER_ASSERT_FAILURES, + /* * Number of times no watchpoints were available. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 7cd34285df74..3bc590e6be7e 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -34,11 +34,11 @@ static struct { } other_info = { .ptr = NULL }; /* - * Information about reported data races; used to rate limit reporting. + * Information about reported races; used to rate limit reporting. */ struct report_time { /* - * The last time the data race was reported. + * The last time the race was reported. */ unsigned long time; @@ -57,7 +57,7 @@ struct report_time { * * Therefore, we use a fixed-size array, which at most will occupy a page. This * still adequately rate limits reports, assuming that a) number of unique data - * races is not excessive, and b) occurrence of unique data races within the + * races is not excessive, and b) occurrence of unique races within the * same time window is limited. */ #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) @@ -74,7 +74,7 @@ static struct report_time report_times[REPORT_TIMES_SIZE]; static DEFINE_SPINLOCK(report_lock); /* - * Checks if the data race identified by thread frames frame1 and frame2 has + * Checks if the race identified by thread frames frame1 and frame2 has * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). */ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) @@ -90,7 +90,7 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); - /* Check if a matching data race report exists. */ + /* Check if a matching race report exists. */ for (i = 0; i < REPORT_TIMES_SIZE; ++i) { struct report_time *rt = &report_times[i]; @@ -114,7 +114,7 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) if (time_before(rt->time, invalid_before)) continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ - /* Reported recently, check if data race matches. */ + /* Reported recently, check if race matches. */ if ((rt->frame1 == frame1 && rt->frame2 == frame2) || (rt->frame1 == frame2 && rt->frame2 == frame1)) return true; @@ -142,11 +142,12 @@ skip_report(bool value_change, unsigned long top_frame) * 3. write watchpoint, conflicting write (value_change==true): report; * 4. write watchpoint, conflicting write (value_change==false): skip; * 5. write watchpoint, conflicting read (value_change==false): skip; - * 6. write watchpoint, conflicting read (value_change==true): impossible; + * 6. write watchpoint, conflicting read (value_change==true): report; * * Cases 1-4 are intuitive and expected; case 5 ensures we do not report - * data races where the write may have rewritten the same value; and - * case 6 is simply impossible. + * data races where the write may have rewritten the same value; case 6 + * is possible either if the size is larger than what we check value + * changes for or the access type is KCSAN_ACCESS_ASSERT. */ if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && !value_change) { /* @@ -178,11 +179,27 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; + + /* + * ASSERT variants: + */ + case KCSAN_ACCESS_ASSERT: + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: + return "assert no writes"; + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "assert no accesses"; + default: BUG(); } } +static const char *get_bug_type(int type) +{ + return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; +} + /* Return thread description: in task or interrupt. */ static const char *get_thread_desc(int task_id) { @@ -268,13 +285,15 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, * Do not print offset of functions to keep title short. */ cmp = sym_strcmp((void *)other_frame, (void *)this_frame); - pr_err("BUG: KCSAN: data-race in %ps / %ps\n", + pr_err("BUG: KCSAN: %s in %ps / %ps\n", + get_bug_type(access_type | other_info.access_type), (void *)(cmp < 0 ? other_frame : this_frame), (void *)(cmp < 0 ? this_frame : other_frame)); } break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: - pr_err("BUG: KCSAN: data-race in %pS\n", (void *)this_frame); + pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type), + (void *)this_frame); break; default: @@ -427,7 +446,7 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if * we do not turn off lockdep here; this could happen due to recursion - * into lockdep via KCSAN if we detect a data race in utilities used by + * into lockdep via KCSAN if we detect a race in utilities used by * lockdep. */ lockdep_off(); diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 9785bbf9a1d1..f0b791143c6a 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -4,13 +4,17 @@ config HAVE_ARCH_KCSAN bool menuconfig KCSAN - bool "KCSAN: dynamic data race detector" + bool "KCSAN: dynamic race detector" depends on HAVE_ARCH_KCSAN && DEBUG_KERNEL && !KASAN select STACKTRACE help - The Kernel Concurrency Sanitizer (KCSAN) is a dynamic data race - detector, which relies on compile-time instrumentation, and uses a - watchpoint-based sampling approach to detect data races. + The Kernel Concurrency Sanitizer (KCSAN) is a dynamic race detector, + which relies on compile-time instrumentation, and uses a + watchpoint-based sampling approach to detect races. + + KCSAN's primary purpose is to detect data races. KCSAN can also be + used to check properties, with the help of provided assertions, of + concurrent code where bugs do not manifest as data races. See for more details. @@ -85,14 +89,14 @@ config KCSAN_SKIP_WATCH_RANDOMIZE KCSAN_WATCH_SKIP. config KCSAN_REPORT_ONCE_IN_MS - int "Duration in milliseconds, in which any given data race is only reported once" + int "Duration in milliseconds, in which any given race is only reported once" default 3000 help - Any given data race is only reported once in the defined time window. - Different data races may still generate reports within a duration - that is smaller than the duration defined here. This allows rate - limiting reporting to avoid flooding the console with reports. - Setting this to 0 disables rate limiting. + Any given race is only reported once in the defined time window. + Different races may still generate reports within a duration that is + smaller than the duration defined here. This allows rate limiting + reporting to avoid flooding the console with reports. Setting this + to 0 disables rate limiting. # The main purpose of the below options is to control reported data races (e.g. # in fuzzer configs), and are not expected to be switched frequently by other -- cgit v1.2.3 From a312013578e4775003689e31c1f487df11f362a3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Feb 2020 16:46:26 +0100 Subject: kcsan: Add test to generate conflicts via debugfs Add 'test=' option to KCSAN's debugfs interface to invoke KCSAN checks on a dummy variable. By writing 'test=' to the debugfs file from multiple tasks, we can generate real conflicts, and trigger data race reports. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/debugfs.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index a9dad44130e6..9bbba0e57c9b 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -69,9 +70,9 @@ void kcsan_counter_dec(enum kcsan_counter_id id) /* * The microbenchmark allows benchmarking KCSAN core runtime only. To run * multiple threads, pipe 'microbench=' from multiple tasks into the - * debugfs file. + * debugfs file. This will not generate any conflicts, and tests fast-path only. */ -static void microbenchmark(unsigned long iters) +static noinline void microbenchmark(unsigned long iters) { cycles_t cycles; @@ -81,18 +82,52 @@ static void microbenchmark(unsigned long iters) while (iters--) { /* * We can run this benchmark from multiple tasks; this address - * calculation increases likelyhood of some accesses overlapping - * (they still won't conflict because all are reads). + * calculation increases likelyhood of some accesses + * overlapping. Make the access type an atomic read, to never + * set up watchpoints and test the fast-path only. */ unsigned long addr = iters % (CONFIG_KCSAN_NUM_WATCHPOINTS * PAGE_SIZE); - __kcsan_check_read((void *)addr, sizeof(long)); + __kcsan_check_access((void *)addr, sizeof(long), KCSAN_ACCESS_ATOMIC); } cycles = get_cycles() - cycles; pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); } +/* + * Simple test to create conflicting accesses. Write 'test=' to KCSAN's + * debugfs file from multiple tasks to generate real conflicts and show reports. + */ +static long test_dummy; +static noinline void test_thread(unsigned long iters) +{ + const struct kcsan_ctx ctx_save = current->kcsan_ctx; + cycles_t cycles; + + /* We may have been called from an atomic region; reset context. */ + memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); + + pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); + + cycles = get_cycles(); + while (iters--) { + __kcsan_check_read(&test_dummy, sizeof(test_dummy)); + __kcsan_check_write(&test_dummy, sizeof(test_dummy)); + ASSERT_EXCLUSIVE_WRITER(test_dummy); + ASSERT_EXCLUSIVE_ACCESS(test_dummy); + + /* not actually instrumented */ + WRITE_ONCE(test_dummy, iters); /* to observe value-change */ + } + cycles = get_cycles() - cycles; + + pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); + + /* restore context */ + current->kcsan_ctx = ctx_save; +} + static int cmp_filterlist_addrs(const void *rhs, const void *lhs) { const unsigned long a = *(const unsigned long *)rhs; @@ -242,6 +277,12 @@ debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *o if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters)) return -EINVAL; microbenchmark(iters); + } else if (!strncmp(arg, "test=", sizeof("test=") - 1)) { + unsigned long iters; + + if (kstrtoul(&arg[sizeof("test=") - 1], 0, &iters)) + return -EINVAL; + test_thread(iters); } else if (!strcmp(arg, "whitelist")) { set_report_filterlist_whitelist(true); } else if (!strcmp(arg, "blacklist")) { -- cgit v1.2.3 From 80d4c4775216602ccdc9e761ce251c8451d0c6ca Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 7 Feb 2020 19:59:10 +0100 Subject: kcsan: Expose core configuration parameters as module params This adds early_boot, udelay_{task,interrupt}, and skip_watch as module params. The latter parameters are useful to modify at runtime to tune KCSAN's performance on new systems. This will also permit auto-tuning these parameters to maximize overall system performance and KCSAN's race detection ability. None of the parameters are used in the fast-path and referring to them via static variables instead of CONFIG constants will not affect performance. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Cc: Qian Cai --- kernel/kcsan/core.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 87ef01e40199..498b1eb3c1cd 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,20 @@ #include "encoding.h" #include "kcsan.h" +static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); +static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; +static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; +static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH; + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "kcsan." +module_param_named(early_enable, kcsan_early_enable, bool, 0); +module_param_named(udelay_task, kcsan_udelay_task, uint, 0644); +module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644); +module_param_named(skip_watch, kcsan_skip_watch, long, 0644); + bool kcsan_enabled; /* Per-CPU kcsan_ctx for interrupts */ @@ -239,9 +254,9 @@ should_watch(const volatile void *ptr, size_t size, int type) static inline void reset_kcsan_skip(void) { - long skip_count = CONFIG_KCSAN_SKIP_WATCH - + long skip_count = kcsan_skip_watch - (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ? - prandom_u32_max(CONFIG_KCSAN_SKIP_WATCH) : + prandom_u32_max(kcsan_skip_watch) : 0); this_cpu_write(kcsan_skip, skip_count); } @@ -253,8 +268,7 @@ static __always_inline bool kcsan_is_enabled(void) static inline unsigned int get_delay(void) { - unsigned int delay = in_task() ? CONFIG_KCSAN_UDELAY_TASK : - CONFIG_KCSAN_UDELAY_INTERRUPT; + unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt; return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ? prandom_u32_max(delay) : 0); @@ -527,7 +541,7 @@ void __init kcsan_init(void) * We are in the init task, and no other tasks should be running; * WRITE_ONCE without memory barrier is sufficient. */ - if (IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE)) + if (kcsan_early_enable) WRITE_ONCE(kcsan_enabled, true); } -- cgit v1.2.3 From 3a5b45e5031fa395ab6e53545fb726b2d5b104f0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 10 Feb 2020 15:56:39 +0100 Subject: kcsan: Fix misreporting if concurrent races on same address If there are at least 4 threads racing on the same address, it can happen that one of the readers may observe another matching reader in other_info. To avoid locking up, we have to consume 'other_info' regardless, but skip the report. See the added comment for more details. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/report.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 3bc590e6be7e..abf6852dff72 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -422,6 +422,44 @@ retry: return false; } + access_type |= other_info.access_type; + if ((access_type & KCSAN_ACCESS_WRITE) == 0) { + /* + * While the address matches, this is not the other_info + * from the thread that consumed our watchpoint, since + * neither this nor the access in other_info is a write. + * It is invalid to continue with the report, since we + * only have information about reads. + * + * This can happen due to concurrent races on the same + * address, with at least 4 threads. To avoid locking up + * other_info and all other threads, we have to consume + * it regardless. + * + * A concrete case to illustrate why we might lock up if + * we do not consume other_info: + * + * We have 4 threads, all accessing the same address + * (or matching address ranges). Assume the following + * watcher and watchpoint consumer pairs: + * write1-read1, read2-write2. The first to populate + * other_info is write2, however, write1 consumes it, + * resulting in a report of write1-write2. This report + * is valid, however, now read1 populates other_info; + * read2-read1 is an invalid conflict, yet, no other + * conflicting access is left. Therefore, we must + * consume read1's other_info. + * + * Since this case is assumed to be rare, it is + * reasonable to omit this report: one of the other + * reports includes information about the same shared + * data, and at this point the likelihood that we + * re-report the same race again is high. + */ + release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + return false; + } + /* * Matching & usable access in other_info: keep other_info_lock * locked, as this thread consumes it to print the full report; -- cgit v1.2.3 From b738f6169f1260b4ed5bd9f220b1c84d79f3ab8d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:21 +0100 Subject: kcsan: Introduce kcsan_value_change type Introduces kcsan_value_change type, which explicitly points out if we either observed a value-change (TRUE), or we could not observe one but cannot rule out a value-change happened (MAYBE). The MAYBE state can either be reported or not, depending on configuration preferences. A follow-up patch introduces the FALSE state, which should never be reported. No functional change intended. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/kcsan/core.c | 38 ++++++++++++++++++++++---------------- kernel/kcsan/kcsan.h | 19 ++++++++++++++++++- kernel/kcsan/report.c | 26 ++++++++++++++------------ 3 files changed, 54 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 498b1eb3c1cd..3f89801161d3 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -341,7 +341,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) u32 _4; u64 _8; } expect_value; - bool value_change = false; + enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; unsigned long ua_flags = user_access_save(); unsigned long irq_flags; @@ -398,6 +398,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * Read the current value, to later check and infer a race if the data * was modified via a non-instrumented access, e.g. from a device. */ + expect_value._8 = 0; switch (size) { case 1: expect_value._1 = READ_ONCE(*(const u8 *)ptr); @@ -436,23 +437,36 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) */ switch (size) { case 1: - value_change = expect_value._1 != READ_ONCE(*(const u8 *)ptr); + expect_value._1 ^= READ_ONCE(*(const u8 *)ptr); break; case 2: - value_change = expect_value._2 != READ_ONCE(*(const u16 *)ptr); + expect_value._2 ^= READ_ONCE(*(const u16 *)ptr); break; case 4: - value_change = expect_value._4 != READ_ONCE(*(const u32 *)ptr); + expect_value._4 ^= READ_ONCE(*(const u32 *)ptr); break; case 8: - value_change = expect_value._8 != READ_ONCE(*(const u64 *)ptr); + expect_value._8 ^= READ_ONCE(*(const u64 *)ptr); break; default: break; /* ignore; we do not diff the values */ } + /* Were we able to observe a value-change? */ + if (expect_value._8 != 0) + value_change = KCSAN_VALUE_CHANGE_TRUE; + /* Check if this access raced with another. */ if (!remove_watchpoint(watchpoint)) { + /* + * Depending on the access type, map a value_change of MAYBE to + * TRUE (require reporting). + */ + if (value_change == KCSAN_VALUE_CHANGE_MAYBE && (size > 8 || is_assert)) { + /* Always assume a value-change. */ + value_change = KCSAN_VALUE_CHANGE_TRUE; + } + /* * No need to increment 'data_races' counter, as the racing * thread already did. @@ -461,20 +475,12 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * therefore both this thread and the racing thread may * increment this counter. */ - if (is_assert) + if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - /* - * - If we were not able to observe a value change due to size - * constraints, always assume a value change. - * - If the access type is an assertion, we also always assume a - * value change to always report the race. - */ - value_change = value_change || size > 8 || is_assert; - kcsan_report(ptr, size, type, value_change, smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); - } else if (value_change) { + } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); @@ -482,7 +488,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) - kcsan_report(ptr, size, type, true, + kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 50078e7d43c3..83a79b08b550 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -88,6 +88,22 @@ extern void kcsan_counter_dec(enum kcsan_counter_id id); */ extern bool kcsan_skip_report_debugfs(unsigned long func_addr); +/* + * Value-change states. + */ +enum kcsan_value_change { + /* + * Did not observe a value-change, however, it is valid to report the + * race, depending on preferences. + */ + KCSAN_VALUE_CHANGE_MAYBE, + + /* + * The value was observed to change, and the race should be reported. + */ + KCSAN_VALUE_CHANGE_TRUE, +}; + enum kcsan_report_type { /* * The thread that set up the watchpoint and briefly stalled was @@ -111,6 +127,7 @@ enum kcsan_report_type { * Print a race report from thread that encountered the race. */ extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, - bool value_change, int cpu_id, enum kcsan_report_type type); + enum kcsan_value_change value_change, int cpu_id, + enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index abf6852dff72..d871476dc134 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -130,26 +130,27 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) * Special rules to skip reporting. */ static bool -skip_report(bool value_change, unsigned long top_frame) +skip_report(enum kcsan_value_change value_change, unsigned long top_frame) { /* - * The first call to skip_report always has value_change==true, since we + * The first call to skip_report always has value_change==TRUE, since we * cannot know the value written of an instrumented access. For the 2nd * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: * - * 1. read watchpoint, conflicting write (value_change==true): report; - * 2. read watchpoint, conflicting write (value_change==false): skip; - * 3. write watchpoint, conflicting write (value_change==true): report; - * 4. write watchpoint, conflicting write (value_change==false): skip; - * 5. write watchpoint, conflicting read (value_change==false): skip; - * 6. write watchpoint, conflicting read (value_change==true): report; + * 1. read watchpoint, conflicting write (value_change==TRUE): report; + * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; + * 3. write watchpoint, conflicting write (value_change==TRUE): report; + * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; + * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; + * 6. write watchpoint, conflicting read (value_change==TRUE): report; * * Cases 1-4 are intuitive and expected; case 5 ensures we do not report * data races where the write may have rewritten the same value; case 6 * is possible either if the size is larger than what we check value * changes for or the access type is KCSAN_ACCESS_ASSERT. */ - if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && !value_change) { + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && + value_change == KCSAN_VALUE_CHANGE_MAYBE) { /* * The access is a write, but the data value did not change. * @@ -245,7 +246,7 @@ static int sym_strcmp(void *addr1, void *addr2) * Returns true if a report was generated, false otherwise. */ static bool print_report(const volatile void *ptr, size_t size, int access_type, - bool value_change, int cpu_id, + enum kcsan_value_change value_change, int cpu_id, enum kcsan_report_type type) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; @@ -258,7 +259,7 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, /* * Must check report filter rules before starting to print. */ - if (skip_report(true, stack_entries[skipnr])) + if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) return false; if (type == KCSAN_REPORT_RACE_SIGNAL) { @@ -477,7 +478,8 @@ retry: } void kcsan_report(const volatile void *ptr, size_t size, int access_type, - bool value_change, int cpu_id, enum kcsan_report_type type) + enum kcsan_value_change value_change, int cpu_id, + enum kcsan_report_type type) { unsigned long flags = 0; -- cgit v1.2.3 From 81af89e15862909881ff010a0adb67148487e88a Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:22 +0100 Subject: kcsan: Add kcsan_set_access_mask() support When setting up an access mask with kcsan_set_access_mask(), KCSAN will only report races if concurrent changes to bits set in access_mask are observed. Conveying access_mask via a separate call avoids introducing overhead in the common-case fast-path. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 11 +++++++++++ include/linux/kcsan.h | 5 +++++ init/init_task.c | 1 + kernel/kcsan/core.c | 43 +++++++++++++++++++++++++++++++++++++++---- kernel/kcsan/kcsan.h | 5 +++++ kernel/kcsan/report.c | 13 ++++++++++++- 6 files changed, 73 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 8675411c8dbc..4ef5233ff3f0 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -68,6 +68,16 @@ void kcsan_flat_atomic_end(void); */ void kcsan_atomic_next(int n); +/** + * kcsan_set_access_mask - set access mask + * + * Set the access mask for all accesses for the current context if non-zero. + * Only value changes to bits set in the mask will be reported. + * + * @mask bitmask + */ +void kcsan_set_access_mask(unsigned long mask); + #else /* CONFIG_KCSAN */ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, @@ -78,6 +88,7 @@ static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } static inline void kcsan_flat_atomic_end(void) { } static inline void kcsan_atomic_next(int n) { } +static inline void kcsan_set_access_mask(unsigned long mask) { } #endif /* CONFIG_KCSAN */ diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 7a614ca558f6..3b84606e1e67 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -35,6 +35,11 @@ struct kcsan_ctx { */ int atomic_nest_count; bool in_flat_atomic; + + /* + * Access mask for all accesses if non-zero. + */ + unsigned long access_mask; }; /** diff --git a/init/init_task.c b/init/init_task.c index 2b4fe98b0f09..096191d177d5 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -167,6 +167,7 @@ struct task_struct init_task .atomic_next = 0, .atomic_nest_count = 0, .in_flat_atomic = false, + .access_mask = 0, }, #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 3f89801161d3..589b1e7f0f25 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -39,6 +39,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { .atomic_next = 0, .atomic_nest_count = 0, .in_flat_atomic = false, + .access_mask = 0, }; /* @@ -298,6 +299,15 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (!kcsan_is_enabled()) return; + + /* + * The access_mask check relies on value-change comparison. To avoid + * reporting a race where e.g. the writer set up the watchpoint, but the + * reader has access_mask!=0, we have to ignore the found watchpoint. + */ + if (get_ctx()->access_mask != 0) + return; + /* * Consume the watchpoint as soon as possible, to minimize the chances * of !consumed. Consuming the watchpoint must always be guarded by @@ -341,6 +351,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) u32 _4; u64 _8; } expect_value; + unsigned long access_mask; enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; unsigned long ua_flags = user_access_save(); unsigned long irq_flags; @@ -435,18 +446,27 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * Re-read value, and check if it is as expected; if not, we infer a * racy access. */ + access_mask = get_ctx()->access_mask; switch (size) { case 1: expect_value._1 ^= READ_ONCE(*(const u8 *)ptr); + if (access_mask) + expect_value._1 &= (u8)access_mask; break; case 2: expect_value._2 ^= READ_ONCE(*(const u16 *)ptr); + if (access_mask) + expect_value._2 &= (u16)access_mask; break; case 4: expect_value._4 ^= READ_ONCE(*(const u32 *)ptr); + if (access_mask) + expect_value._4 &= (u32)access_mask; break; case 8: expect_value._8 ^= READ_ONCE(*(const u64 *)ptr); + if (access_mask) + expect_value._8 &= (u64)access_mask; break; default: break; /* ignore; we do not diff the values */ @@ -460,11 +480,20 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (!remove_watchpoint(watchpoint)) { /* * Depending on the access type, map a value_change of MAYBE to - * TRUE (require reporting). + * TRUE (always report) or FALSE (never report). */ - if (value_change == KCSAN_VALUE_CHANGE_MAYBE && (size > 8 || is_assert)) { - /* Always assume a value-change. */ - value_change = KCSAN_VALUE_CHANGE_TRUE; + if (value_change == KCSAN_VALUE_CHANGE_MAYBE) { + if (access_mask != 0) { + /* + * For access with access_mask, we require a + * value-change, as it is likely that races on + * ~access_mask bits are expected. + */ + value_change = KCSAN_VALUE_CHANGE_FALSE; + } else if (size > 8 || is_assert) { + /* Always assume a value-change. */ + value_change = KCSAN_VALUE_CHANGE_TRUE; + } } /* @@ -622,6 +651,12 @@ void kcsan_atomic_next(int n) } EXPORT_SYMBOL(kcsan_atomic_next); +void kcsan_set_access_mask(unsigned long mask) +{ + get_ctx()->access_mask = mask; +} +EXPORT_SYMBOL(kcsan_set_access_mask); + void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { check_access(ptr, size, type); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 83a79b08b550..892de5120c1b 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -98,6 +98,11 @@ enum kcsan_value_change { */ KCSAN_VALUE_CHANGE_MAYBE, + /* + * Did not observe a value-change, and it is invalid to report the race. + */ + KCSAN_VALUE_CHANGE_FALSE, + /* * The value was observed to change, and the race should be reported. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index d871476dc134..11c791b886f3 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -132,6 +132,9 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) static bool skip_report(enum kcsan_value_change value_change, unsigned long top_frame) { + /* Should never get here if value_change==FALSE. */ + WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); + /* * The first call to skip_report always has value_change==TRUE, since we * cannot know the value written of an instrumented access. For the 2nd @@ -493,7 +496,15 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, kcsan_disable_current(); if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { - if (print_report(ptr, size, access_type, value_change, cpu_id, type) && panic_on_warn) + /* + * Never report if value_change is FALSE, only if we it is + * either TRUE or MAYBE. In case of MAYBE, further filtering may + * be done once we know the full stack trace in print_report(). + */ + bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && + print_report(ptr, size, access_type, value_change, cpu_id, type); + + if (reported && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); -- cgit v1.2.3 From 703b321501c95c658275fd76775282fe45989641 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:23 +0100 Subject: kcsan: Introduce ASSERT_EXCLUSIVE_BITS(var, mask) This introduces ASSERT_EXCLUSIVE_BITS(var, mask). ASSERT_EXCLUSIVE_BITS(var, mask) will cause KCSAN to assume that the following access is safe w.r.t. data races (however, please see the docbook comment for disclaimer here). For more context on why this was considered necessary, please see: http://lkml.kernel.org/r/1580995070-25139-1-git-send-email-cai@lca.pw In particular, before this patch, data races between reads (that use @mask bits of an access that should not be modified concurrently) and writes (that change ~@mask bits not used by the readers) would have been annotated with "data_race()" (or "READ_ONCE()"). However, doing so would then hide real problems: we would no longer be able to detect harmful races between reads to @mask bits and writes to @mask bits. Therefore, by using ASSERT_EXCLUSIVE_BITS(var, mask), we accomplish: 1. Avoid proliferation of specific macros at the call sites: by including a single mask in the argument list, we can use the same macro in a wide variety of call sites, regardless of how and which bits in a field each call site actually accesses. 2. The existing code does not need to be modified (although READ_ONCE() may still be advisable if we cannot prove that the data race is always safe). 3. We catch bugs where the exclusive bits are modified concurrently. 4. We document properties of the current code. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Cc: David Hildenbrand Cc: Jan Kara Cc: Qian Cai --- include/linux/kcsan-checks.h | 69 ++++++++++++++++++++++++++++++++++++++++---- kernel/kcsan/debugfs.c | 15 +++++++++- 2 files changed, 77 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 4ef5233ff3f0..8f9f6e2191dc 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -152,9 +152,9 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #endif /** - * ASSERT_EXCLUSIVE_WRITER - assert no other threads are writing @var + * ASSERT_EXCLUSIVE_WRITER - assert no concurrent writes to @var * - * Assert that there are no other threads writing @var; other readers are + * Assert that there are no concurrent writes to @var; other readers are * allowed. This assertion can be used to specify properties of concurrent code, * where violation cannot be detected as a normal data race. * @@ -171,11 +171,11 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) /** - * ASSERT_EXCLUSIVE_ACCESS - assert no other threads are accessing @var + * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var * - * Assert that no other thread is accessing @var (no readers nor writers). This - * assertion can be used to specify properties of concurrent code, where - * violation cannot be detected as a normal data race. + * Assert that there are no concurrent accesses to @var (no readers nor + * writers). This assertion can be used to specify properties of concurrent + * code, where violation cannot be detected as a normal data race. * * For example, in a reference-counting algorithm where exclusive access is * expected after the refcount reaches 0. We can check that this property @@ -191,4 +191,61 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define ASSERT_EXCLUSIVE_ACCESS(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) +/** + * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var + * + * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(var). + * + * Assert that there are no concurrent writes to a subset of bits in @var; + * concurrent readers are permitted. This assertion captures more detailed + * bit-level properties, compared to the other (word granularity) assertions. + * Only the bits set in @mask are checked for concurrent modifications, while + * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~@mask bits + * are ignored. + * + * Use this for variables, where some bits must not be modified concurrently, + * yet other bits are expected to be modified concurrently. + * + * For example, variables where, after initialization, some bits are read-only, + * but other bits may still be modified concurrently. A reader may wish to + * assert that this is true as follows: + * + * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); + * foo = (READ_ONCE(flags) & READ_ONLY_MASK) >> READ_ONLY_SHIFT; + * + * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is + * assumed to access the masked bits only, and KCSAN optimistically assumes it + * is therefore safe, even in the presence of data races, and marking it with + * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, + * that it may still be advisable to do so, since we cannot reason about all + * compiler optimizations when it comes to bit manipulations (on the reader + * and writer side). If you are sure nothing can go wrong, we can write the + * above simply as: + * + * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); + * foo = (flags & READ_ONLY_MASK) >> READ_ONLY_SHIFT; + * + * Another example, where this may be used, is when certain bits of @var may + * only be modified when holding the appropriate lock, but other bits may still + * be modified concurrently. Writers, where other bits may change concurrently, + * could use the assertion as follows: + * + * spin_lock(&foo_lock); + * ASSERT_EXCLUSIVE_BITS(flags, FOO_MASK); + * old_flags = READ_ONCE(flags); + * new_flags = (old_flags & ~FOO_MASK) | (new_foo << FOO_SHIFT); + * if (cmpxchg(&flags, old_flags, new_flags) != old_flags) { ... } + * spin_unlock(&foo_lock); + * + * @var variable to assert on + * @mask only check for modifications to bits set in @mask + */ +#define ASSERT_EXCLUSIVE_BITS(var, mask) \ + do { \ + kcsan_set_access_mask(mask); \ + __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT);\ + kcsan_set_access_mask(0); \ + kcsan_atomic_next(1); \ + } while (0) + #endif /* _LINUX_KCSAN_CHECKS_H */ diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 9bbba0e57c9b..2ff196123977 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -100,8 +100,10 @@ static noinline void microbenchmark(unsigned long iters) * debugfs file from multiple tasks to generate real conflicts and show reports. */ static long test_dummy; +static long test_flags; static noinline void test_thread(unsigned long iters) { + const long CHANGE_BITS = 0xff00ff00ff00ff00L; const struct kcsan_ctx ctx_save = current->kcsan_ctx; cycles_t cycles; @@ -109,16 +111,27 @@ static noinline void test_thread(unsigned long iters) memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); + pr_info("test_dummy@%px, test_flags@%px\n", &test_dummy, &test_flags); cycles = get_cycles(); while (iters--) { + /* These all should generate reports. */ __kcsan_check_read(&test_dummy, sizeof(test_dummy)); - __kcsan_check_write(&test_dummy, sizeof(test_dummy)); ASSERT_EXCLUSIVE_WRITER(test_dummy); ASSERT_EXCLUSIVE_ACCESS(test_dummy); + ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */ + __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ + + ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */ + __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ + /* not actually instrumented */ WRITE_ONCE(test_dummy, iters); /* to observe value-change */ + __kcsan_check_write(&test_dummy, sizeof(test_dummy)); + + test_flags ^= CHANGE_BITS; /* generate value-change */ + __kcsan_check_write(&test_flags, sizeof(test_flags)); } cycles = get_cycles() - cycles; -- cgit v1.2.3 From f5d2313bd3c540be405c4977a63840cd6d0167b5 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 14 Feb 2020 22:10:35 +0100 Subject: kcsan, trace: Make KCSAN compatible with tracing Previously the system would lock up if ftrace was enabled together with KCSAN. This is due to recursion on reporting if the tracer code is instrumented with KCSAN. To avoid this for all types of tracing, disable KCSAN instrumentation for all of kernel/trace. Furthermore, since KCSAN relies on udelay() to introduce delay, we have to disable ftrace for udelay() (currently done for x86) in case KCSAN is used together with lockdep and ftrace. The reason is that it may corrupt lockdep IRQ flags tracing state due to a peculiar case of recursion (details in Makefile comment). Reported-by: Qian Cai Tested-by: Qian Cai Acked-by: Steven Rostedt (VMware) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 5 +++++ kernel/kcsan/Makefile | 2 ++ kernel/trace/Makefile | 3 +++ 3 files changed, 10 insertions(+) (limited to 'kernel') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 432a07705677..6110bce7237b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -8,6 +8,11 @@ KCOV_INSTRUMENT_delay.o := n # KCSAN uses udelay for introducing watchpoint delay; avoid recursion. KCSAN_SANITIZE_delay.o := n +ifdef CONFIG_KCSAN +# In case KCSAN+lockdep+ftrace are enabled, disable ftrace for delay.o to avoid +# lockdep -> [other libs] -> KCSAN -> udelay -> ftrace -> lockdep recursion. +CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE) +endif # Early boot use of cmdline; don't instrument it ifdef CONFIG_AMD_MEM_ENCRYPT diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index df6b7799e492..d4999b38d1be 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -4,6 +4,8 @@ KCOV_INSTRUMENT := n UBSAN_SANITIZE := n CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \ $(call cc-option,-fno-stack-protector,) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index f9dcd19165fa..6b601d88bf71 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -6,6 +6,9 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) +# Avoid recursion due to instrumentation. +KCSAN_SANITIZE := n + ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE) -- cgit v1.2.3 From 48b1fc190a180d971fb69217c88c7247f4f2ca19 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 21 Feb 2020 23:02:09 +0100 Subject: kcsan: Add option to allow watcher interruptions Add option to allow interrupts while a watchpoint is set up. This can be enabled either via CONFIG_KCSAN_INTERRUPT_WATCHER or via the boot parameter 'kcsan.interrupt_watcher=1'. Note that, currently not all safe per-CPU access primitives and patterns are accounted for, which could result in false positives. For example, asm-generic/percpu.h uses plain operations, which by default are instrumented. On interrupts and subsequent accesses to the same variable, KCSAN would currently report a data race with this option. Therefore, this option should currently remain disabled by default, but may be enabled for specific test scenarios. To avoid new warnings, changes all uses of smp_processor_id() to use the raw version (as already done in kcsan_found_watchpoint()). The exact SMP processor id is for informational purposes in the report, and correctness is not affected. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 34 ++++++++++------------------------ lib/Kconfig.kcsan | 11 +++++++++++ 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 589b1e7f0f25..e7387fec6679 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -21,6 +21,7 @@ static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH; +static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER); #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX @@ -30,6 +31,7 @@ module_param_named(early_enable, kcsan_early_enable, bool, 0); module_param_named(udelay_task, kcsan_udelay_task, uint, 0644); module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644); module_param_named(skip_watch, kcsan_skip_watch, long, 0644); +module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444); bool kcsan_enabled; @@ -354,7 +356,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) unsigned long access_mask; enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; unsigned long ua_flags = user_access_save(); - unsigned long irq_flags; + unsigned long irq_flags = 0; /* * Always reset kcsan_skip counter in slow-path to avoid underflow; see @@ -370,26 +372,9 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) goto out; } - /* - * Disable interrupts & preemptions to avoid another thread on the same - * CPU accessing memory locations for the set up watchpoint; this is to - * avoid reporting races to e.g. CPU-local data. - * - * An alternative would be adding the source CPU to the watchpoint - * encoding, and checking that watchpoint-CPU != this-CPU. There are - * several problems with this: - * 1. we should avoid stealing more bits from the watchpoint encoding - * as it would affect accuracy, as well as increase performance - * overhead in the fast-path; - * 2. if we are preempted, but there *is* a genuine data race, we - * would *not* report it -- since this is the common case (vs. - * CPU-local data accesses), it makes more sense (from a data race - * detection point of view) to simply disable preemptions to ensure - * as many tasks as possible run on other CPUs. - * - * Use raw versions, to avoid lockdep recursion via IRQ flags tracing. - */ - raw_local_irq_save(irq_flags); + if (!kcsan_interrupt_watcher) + /* Use raw to avoid lockdep recursion via IRQ flags tracing. */ + raw_local_irq_save(irq_flags); watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { @@ -507,7 +492,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, smp_processor_id(), + kcsan_report(ptr, size, type, value_change, raw_smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -518,13 +503,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - smp_processor_id(), + raw_smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: - raw_local_irq_restore(irq_flags); + if (!kcsan_interrupt_watcher) + raw_local_irq_restore(irq_flags); out: user_access_restore(ua_flags); } diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index f0b791143c6a..081ed2e1bf7b 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -88,6 +88,17 @@ config KCSAN_SKIP_WATCH_RANDOMIZE KCSAN_WATCH_SKIP. If false, the chosen value is always KCSAN_WATCH_SKIP. +config KCSAN_INTERRUPT_WATCHER + bool "Interruptible watchers" + help + If enabled, a task that set up a watchpoint may be interrupted while + delayed. This option will allow KCSAN to detect races between + interrupted tasks and other threads of execution on the same CPU. + + Currently disabled by default, because not all safe per-CPU access + primitives and patterns may be accounted for, and therefore could + result in false positives. + config KCSAN_REPORT_ONCE_IN_MS int "Duration in milliseconds, in which any given race is only reported once" default 3000 -- cgit v1.2.3 From 2402d0eae589a31ee7b1774cb220d84d0f5605b4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Sat, 22 Feb 2020 00:10:27 +0100 Subject: kcsan: Add option for verbose reporting Adds CONFIG_KCSAN_VERBOSE to optionally enable more verbose reports. Currently information about the reporting task's held locks and IRQ trace events are shown, if they are enabled. Signed-off-by: Marco Elver Suggested-by: Qian Cai Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 4 +- kernel/kcsan/kcsan.h | 3 ++ kernel/kcsan/report.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.kcsan | 13 +++++++ 4 files changed, 120 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index e7387fec6679..065615df88ea 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -18,8 +18,8 @@ #include "kcsan.h" static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); -static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; -static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; +unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; +unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH; static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 892de5120c1b..e282f8b5749e 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -13,6 +13,9 @@ /* The number of adjacent watchpoints to check. */ #define KCSAN_CHECK_ADJACENT 1 +extern unsigned int kcsan_udelay_task; +extern unsigned int kcsan_udelay_interrupt; + /* * Globally enable and disable KCSAN. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 11c791b886f3..18f9d3bc93a5 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include @@ -31,7 +33,26 @@ static struct { int cpu_id; unsigned long stack_entries[NUM_STACK_ENTRIES]; int num_stack_entries; -} other_info = { .ptr = NULL }; + + /* + * Optionally pass @current. Typically we do not need to pass @current + * via @other_info since just @task_pid is sufficient. Passing @current + * has additional overhead. + * + * To safely pass @current, we must either use get_task_struct/ + * put_task_struct, or stall the thread that populated @other_info. + * + * We cannot rely on get_task_struct/put_task_struct in case + * release_report() races with a task being released, and would have to + * free it in release_report(). This may result in deadlock if we want + * to use KCSAN on the allocators. + * + * Since we also want to reliably print held locks for + * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread + * that populated @other_info until it has been consumed. + */ + struct task_struct *task; +} other_info; /* * Information about reported races; used to rate limit reporting. @@ -245,6 +266,16 @@ static int sym_strcmp(void *addr1, void *addr2) return strncmp(buf1, buf2, sizeof(buf1)); } +static void print_verbose_info(struct task_struct *task) +{ + if (!task) + return; + + pr_err("\n"); + debug_show_held_locks(task); + print_irqtrace_events(task); +} + /* * Returns true if a report was generated, false otherwise. */ @@ -319,6 +350,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, other_info.num_stack_entries - other_skipnr, 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + print_verbose_info(other_info.task); + pr_err("\n"); pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", get_access_type(access_type), ptr, size, @@ -340,6 +374,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + print_verbose_info(current); + /* Print report footer. */ pr_err("\n"); pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); @@ -357,6 +394,67 @@ static void release_report(unsigned long *flags, enum kcsan_report_type type) spin_unlock_irqrestore(&report_lock, *flags); } +/* + * Sets @other_info.task and awaits consumption of @other_info. + * + * Precondition: report_lock is held. + * Postcondition: report_lock is held. + */ +static void +set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) +{ + /* + * We may be instrumenting a code-path where current->state is already + * something other than TASK_RUNNING. + */ + const bool is_running = current->state == TASK_RUNNING; + /* + * To avoid deadlock in case we are in an interrupt here and this is a + * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a + * timeout to ensure this works in all contexts. + * + * Await approximately the worst case delay of the reporting thread (if + * we are not interrupted). + */ + int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); + + other_info.task = current; + do { + if (is_running) { + /* + * Let lockdep know the real task is sleeping, to print + * the held locks (recall we turned lockdep off, so + * locking/unlocking @report_lock won't be recorded). + */ + set_current_state(TASK_UNINTERRUPTIBLE); + } + spin_unlock_irqrestore(&report_lock, *flags); + /* + * We cannot call schedule() since we also cannot reliably + * determine if sleeping here is permitted -- see in_atomic(). + */ + + udelay(1); + spin_lock_irqsave(&report_lock, *flags); + if (timeout-- < 0) { + /* + * Abort. Reset other_info.task to NULL, since it + * appears the other thread is still going to consume + * it. It will result in no verbose info printed for + * this task. + */ + other_info.task = NULL; + break; + } + /* + * If @ptr nor @current matches, then our information has been + * consumed and we may continue. If not, retry. + */ + } while (other_info.ptr == ptr && other_info.task == current); + if (is_running) + set_current_state(TASK_RUNNING); +} + /* * Depending on the report type either sets other_info and returns false, or * acquires the matching other_info and returns true. If other_info is not @@ -388,6 +486,9 @@ retry: other_info.cpu_id = cpu_id; other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + set_other_info_task_blocking(flags, ptr); + spin_unlock_irqrestore(&report_lock, *flags); /* diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 081ed2e1bf7b..0f1447ff8f55 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -20,6 +20,19 @@ menuconfig KCSAN if KCSAN +config KCSAN_VERBOSE + bool "Show verbose reports with more information about system state" + depends on PROVE_LOCKING + help + If enabled, reports show more information about the system state that + may help better analyze and debug races. This includes held locks and + IRQ trace events. + + While this option should generally be benign, we call into more + external functions on report generation; if a race report is + generated from any one of them, system stability may suffer due to + deadlocks or recursion. If in doubt, say N. + config KCSAN_DEBUG bool "Debugging of KCSAN internals" -- cgit v1.2.3 From 44656d3dc4f0dc20010d054f27397a4a1469fabf Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 25 Feb 2020 15:32:58 +0100 Subject: kcsan: Add current->state to implicitly atomic accesses Add volatile current->state to list of implicitly atomic accesses. This is in preparation to eventually enable KCSAN on kernel/sched (which currently still has KCSAN_SANITIZE := n). Since accesses that match the special check in atomic.h are rare, it makes more sense to move this check to the slow-path, avoiding the additional compare in the fast-path. With the microbenchmark, a speedup of ~6% is measured. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/atomic.h | 21 +++++++-------------- kernel/kcsan/core.c | 22 +++++++++++++++------- kernel/kcsan/debugfs.c | 27 ++++++++++++++++++--------- 3 files changed, 40 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index a9c193053491..be9e625227f3 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -4,24 +4,17 @@ #define _KERNEL_KCSAN_ATOMIC_H #include +#include /* - * Helper that returns true if access to @ptr should be considered an atomic - * access, even though it is not explicitly atomic. - * - * List all volatile globals that have been observed in races, to suppress - * data race reports between accesses to these variables. - * - * For now, we assume that volatile accesses of globals are as strong as atomic - * accesses (READ_ONCE, WRITE_ONCE cast to volatile). The situation is still not - * entirely clear, as on some architectures (Alpha) READ_ONCE/WRITE_ONCE do more - * than cast to volatile. Eventually, we hope to be able to remove this - * function. + * Special rules for certain memory where concurrent conflicting accesses are + * common, however, the current convention is to not mark them; returns true if + * access to @ptr should be considered atomic. Called from slow-path. */ -static __always_inline bool kcsan_is_atomic(const volatile void *ptr) +static bool kcsan_is_atomic_special(const volatile void *ptr) { - /* only jiffies for now */ - return ptr == &jiffies; + /* volatile globals that have been observed in data races. */ + return ptr == &jiffies || ptr == ¤t->state; } #endif /* _KERNEL_KCSAN_ATOMIC_H */ diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 065615df88ea..eb30ecdc8c00 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -188,12 +188,13 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +/* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool is_atomic(const volatile void *ptr, size_t size, int type) { struct kcsan_ctx *ctx; - if ((type & KCSAN_ACCESS_ATOMIC) != 0) + if (type & KCSAN_ACCESS_ATOMIC) return true; /* @@ -201,16 +202,16 @@ is_atomic(const volatile void *ptr, size_t size, int type) * as atomic. This allows using them also in atomic regions, such as * seqlocks, without implicitly changing their semantics. */ - if ((type & KCSAN_ACCESS_ASSERT) != 0) + if (type & KCSAN_ACCESS_ASSERT) return false; if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && - (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) && + (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) && IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ ctx = get_ctx(); - if (unlikely(ctx->atomic_next > 0)) { + if (ctx->atomic_next > 0) { /* * Because we do not have separate contexts for nested * interrupts, in case atomic_next is set, we simply assume that @@ -224,10 +225,8 @@ is_atomic(const volatile void *ptr, size_t size, int type) --ctx->atomic_next; /* in task, or outer interrupt */ return true; } - if (unlikely(ctx->atomic_nest_count > 0 || ctx->in_flat_atomic)) - return true; - return kcsan_is_atomic(ptr); + return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic; } static __always_inline bool @@ -367,6 +366,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (!kcsan_is_enabled()) goto out; + /* + * Special atomic rules: unlikely to be true, so we check them here in + * the slow-path, and not in the fast-path in is_atomic(). Call after + * kcsan_is_enabled(), as we may access memory that is not yet + * initialized during early boot. + */ + if (!is_assert && kcsan_is_atomic_special(ptr)) + goto out; + if (!check_encodable((unsigned long)ptr, size)) { kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES); goto out; diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 2ff196123977..72ee188ebc54 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -74,25 +74,34 @@ void kcsan_counter_dec(enum kcsan_counter_id id) */ static noinline void microbenchmark(unsigned long iters) { + const struct kcsan_ctx ctx_save = current->kcsan_ctx; + const bool was_enabled = READ_ONCE(kcsan_enabled); cycles_t cycles; + /* We may have been called from an atomic region; reset context. */ + memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); + /* + * Disable to benchmark fast-path for all accesses, and (expected + * negligible) call into slow-path, but never set up watchpoints. + */ + WRITE_ONCE(kcsan_enabled, false); + pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); cycles = get_cycles(); while (iters--) { - /* - * We can run this benchmark from multiple tasks; this address - * calculation increases likelyhood of some accesses - * overlapping. Make the access type an atomic read, to never - * set up watchpoints and test the fast-path only. - */ - unsigned long addr = - iters % (CONFIG_KCSAN_NUM_WATCHPOINTS * PAGE_SIZE); - __kcsan_check_access((void *)addr, sizeof(long), KCSAN_ACCESS_ATOMIC); + unsigned long addr = iters & ((PAGE_SIZE << 8) - 1); + int type = !(iters & 0x7f) ? KCSAN_ACCESS_ATOMIC : + (!(iters & 0xf) ? KCSAN_ACCESS_WRITE : 0); + __kcsan_check_access((void *)addr, sizeof(long), type); } cycles = get_cycles() - cycles; pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); + + WRITE_ONCE(kcsan_enabled, was_enabled); + /* restore context */ + current->kcsan_ctx = ctx_save; } /* -- cgit v1.2.3 From e7b34100500733f7e052ce3dee94e6338b86e6bc Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Thu, 5 Mar 2020 15:21:07 +0100 Subject: kcsan: Fix a typo in a comment s/slots slots/slots/ Signed-off-by: Qiujun Huang Reviewed-by: Nick Desaulniers [elver: commit message] Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index eb30ecdc8c00..ee8200835b60 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -45,7 +45,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { }; /* - * Helper macros to index into adjacent slots slots, starting from address slot + * Helper macros to index into adjacent slots, starting from address slot * itself, followed by the right and left slots. * * The purpose is 2-fold: -- cgit v1.2.3 From 135c0872d86948046d11d7083e36c930cc43ac93 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 18 Mar 2020 18:38:44 +0100 Subject: kcsan: Introduce report access_info and other_info Improve readability by introducing access_info and other_info structs, and in preparation of the following commit in this series replaces the single instance of other_info with an array of size 1. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 6 +-- kernel/kcsan/kcsan.h | 2 +- kernel/kcsan/report.c | 147 +++++++++++++++++++++++++------------------------- 3 files changed, 77 insertions(+), 78 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index ee8200835b60..f1c38620e3cf 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -321,7 +321,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, flags = user_access_save(); if (consumed) { - kcsan_report(ptr, size, type, true, raw_smp_processor_id(), + kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, KCSAN_REPORT_CONSUMED_WATCHPOINT); } else { /* @@ -500,8 +500,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, raw_smp_processor_id(), - KCSAN_REPORT_RACE_SIGNAL); + kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -511,7 +510,6 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - raw_smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index e282f8b5749e..6630dfe32f31 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -135,7 +135,7 @@ enum kcsan_report_type { * Print a race report from thread that encountered the race. */ extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, + enum kcsan_value_change value_change, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 18f9d3bc93a5..de234d1c1b3d 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -19,18 +19,23 @@ */ #define NUM_STACK_ENTRIES 64 +/* Common access info. */ +struct access_info { + const volatile void *ptr; + size_t size; + int access_type; + int task_pid; + int cpu_id; +}; + /* * Other thread info: communicated from other racing thread to thread that set * up the watchpoint, which then prints the complete report atomically. Only * need one struct, as all threads should to be serialized regardless to print * the reports, with reporting being in the slow-path. */ -static struct { - const volatile void *ptr; - size_t size; - int access_type; - int task_pid; - int cpu_id; +struct other_info { + struct access_info ai; unsigned long stack_entries[NUM_STACK_ENTRIES]; int num_stack_entries; @@ -52,7 +57,9 @@ static struct { * that populated @other_info until it has been consumed. */ struct task_struct *task; -} other_info; +}; + +static struct other_info other_infos[1]; /* * Information about reported races; used to rate limit reporting. @@ -238,7 +245,7 @@ static const char *get_thread_desc(int task_id) } /* Helper to skip KCSAN-related functions in stack-trace. */ -static int get_stack_skipnr(unsigned long stack_entries[], int num_entries) +static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) { char buf[64]; int skip = 0; @@ -279,9 +286,10 @@ static void print_verbose_info(struct task_struct *task) /* * Returns true if a report was generated, false otherwise. */ -static bool print_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, - enum kcsan_report_type type) +static bool print_report(enum kcsan_value_change value_change, + enum kcsan_report_type type, + const struct access_info *ai, + const struct other_info *other_info) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); @@ -297,9 +305,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, return false; if (type == KCSAN_REPORT_RACE_SIGNAL) { - other_skipnr = get_stack_skipnr(other_info.stack_entries, - other_info.num_stack_entries); - other_frame = other_info.stack_entries[other_skipnr]; + other_skipnr = get_stack_skipnr(other_info->stack_entries, + other_info->num_stack_entries); + other_frame = other_info->stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ if (skip_report(value_change, other_frame)) @@ -321,13 +329,13 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, */ cmp = sym_strcmp((void *)other_frame, (void *)this_frame); pr_err("BUG: KCSAN: %s in %ps / %ps\n", - get_bug_type(access_type | other_info.access_type), + get_bug_type(ai->access_type | other_info->ai.access_type), (void *)(cmp < 0 ? other_frame : this_frame), (void *)(cmp < 0 ? this_frame : other_frame)); } break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: - pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type), + pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), (void *)this_frame); break; @@ -341,30 +349,28 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, switch (type) { case KCSAN_REPORT_RACE_SIGNAL: pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(other_info.access_type), other_info.ptr, - other_info.size, get_thread_desc(other_info.task_pid), - other_info.cpu_id); + get_access_type(other_info->ai.access_type), other_info->ai.ptr, + other_info->ai.size, get_thread_desc(other_info->ai.task_pid), + other_info->ai.cpu_id); /* Print the other thread's stack trace. */ - stack_trace_print(other_info.stack_entries + other_skipnr, - other_info.num_stack_entries - other_skipnr, + stack_trace_print(other_info->stack_entries + other_skipnr, + other_info->num_stack_entries - other_skipnr, 0); if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - print_verbose_info(other_info.task); + print_verbose_info(other_info->task); pr_err("\n"); pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(access_type), ptr, size, - get_thread_desc(in_task() ? task_pid_nr(current) : -1), - cpu_id); + get_access_type(ai->access_type), ai->ptr, ai->size, + get_thread_desc(ai->task_pid), ai->cpu_id); break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(access_type), ptr, size, - get_thread_desc(in_task() ? task_pid_nr(current) : -1), - cpu_id); + get_access_type(ai->access_type), ai->ptr, ai->size, + get_thread_desc(ai->task_pid), ai->cpu_id); break; default: @@ -386,22 +392,23 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, return true; } -static void release_report(unsigned long *flags, enum kcsan_report_type type) +static void release_report(unsigned long *flags, struct other_info *other_info) { - if (type == KCSAN_REPORT_RACE_SIGNAL) - other_info.ptr = NULL; /* mark for reuse */ + if (other_info) + other_info->ai.ptr = NULL; /* Mark for reuse. */ spin_unlock_irqrestore(&report_lock, *flags); } /* - * Sets @other_info.task and awaits consumption of @other_info. + * Sets @other_info->task and awaits consumption of @other_info. * * Precondition: report_lock is held. * Postcondition: report_lock is held. */ -static void -set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) +static void set_other_info_task_blocking(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) { /* * We may be instrumenting a code-path where current->state is already @@ -418,7 +425,7 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) */ int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); - other_info.task = current; + other_info->task = current; do { if (is_running) { /* @@ -438,19 +445,19 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) spin_lock_irqsave(&report_lock, *flags); if (timeout-- < 0) { /* - * Abort. Reset other_info.task to NULL, since it + * Abort. Reset @other_info->task to NULL, since it * appears the other thread is still going to consume * it. It will result in no verbose info printed for * this task. */ - other_info.task = NULL; + other_info->task = NULL; break; } /* * If @ptr nor @current matches, then our information has been * consumed and we may continue. If not, retry. */ - } while (other_info.ptr == ptr && other_info.task == current); + } while (other_info->ai.ptr == ai->ptr && other_info->task == current); if (is_running) set_current_state(TASK_RUNNING); } @@ -460,9 +467,8 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) * acquires the matching other_info and returns true. If other_info is not * required for the report type, simply acquires report_lock and returns true. */ -static bool prepare_report(unsigned long *flags, const volatile void *ptr, - size_t size, int access_type, int cpu_id, - enum kcsan_report_type type) +static bool prepare_report(unsigned long *flags, enum kcsan_report_type type, + const struct access_info *ai, struct other_info *other_info) { if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && type != KCSAN_REPORT_RACE_SIGNAL) { @@ -476,18 +482,14 @@ retry: switch (type) { case KCSAN_REPORT_CONSUMED_WATCHPOINT: - if (other_info.ptr != NULL) + if (other_info->ai.ptr) break; /* still in use, retry */ - other_info.ptr = ptr; - other_info.size = size; - other_info.access_type = access_type; - other_info.task_pid = in_task() ? task_pid_nr(current) : -1; - other_info.cpu_id = cpu_id; - other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); + other_info->ai = *ai; + other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 1); if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - set_other_info_task_blocking(flags, ptr); + set_other_info_task_blocking(flags, ai, other_info); spin_unlock_irqrestore(&report_lock, *flags); @@ -498,37 +500,31 @@ retry: return false; case KCSAN_REPORT_RACE_SIGNAL: - if (other_info.ptr == NULL) + if (!other_info->ai.ptr) break; /* no data available yet, retry */ /* * First check if this is the other_info we are expecting, i.e. * matches based on how watchpoint was encoded. */ - if (!matching_access((unsigned long)other_info.ptr & - WATCHPOINT_ADDR_MASK, - other_info.size, - (unsigned long)ptr & WATCHPOINT_ADDR_MASK, - size)) + if (!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, + (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)) break; /* mismatching watchpoint, retry */ - if (!matching_access((unsigned long)other_info.ptr, - other_info.size, (unsigned long)ptr, - size)) { + if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, + (unsigned long)ai->ptr, ai->size)) { /* * If the actual accesses to not match, this was a false * positive due to watchpoint encoding. */ - kcsan_counter_inc( - KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); /* discard this other_info */ - release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + release_report(flags, other_info); return false; } - access_type |= other_info.access_type; - if ((access_type & KCSAN_ACCESS_WRITE) == 0) { + if (!((ai->access_type | other_info->ai.access_type) & KCSAN_ACCESS_WRITE)) { /* * While the address matches, this is not the other_info * from the thread that consumed our watchpoint, since @@ -561,15 +557,11 @@ retry: * data, and at this point the likelihood that we * re-report the same race again is high. */ - release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + release_report(flags, other_info); return false; } - /* - * Matching & usable access in other_info: keep other_info_lock - * locked, as this thread consumes it to print the full report; - * unlocked in release_report. - */ + /* Matching access in other_info. */ return true; default: @@ -582,10 +574,19 @@ retry: } void kcsan_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, + enum kcsan_value_change value_change, enum kcsan_report_type type) { unsigned long flags = 0; + const struct access_info ai = { + .ptr = ptr, + .size = size, + .access_type = access_type, + .task_pid = in_task() ? task_pid_nr(current) : -1, + .cpu_id = raw_smp_processor_id() + }; + struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN + ? NULL : &other_infos[0]; /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if @@ -596,19 +597,19 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, lockdep_off(); kcsan_disable_current(); - if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { + if (prepare_report(&flags, type, &ai, other_info)) { /* * Never report if value_change is FALSE, only if we it is * either TRUE or MAYBE. In case of MAYBE, further filtering may * be done once we know the full stack trace in print_report(). */ bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && - print_report(ptr, size, access_type, value_change, cpu_id, type); + print_report(value_change, type, &ai, other_info); if (reported && panic_on_warn) panic("panic_on_warn set ...\n"); - release_report(&flags, type); + release_report(&flags, other_info); } kcsan_enable_current(); -- cgit v1.2.3 From 6119418f94ca5314392d258d27eb0cb58bb1774e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 18 Mar 2020 18:38:45 +0100 Subject: kcsan: Avoid blocking producers in prepare_report() To avoid deadlock in case watchers can be interrupted, we need to ensure that producers of the struct other_info can never be blocked by an unrelated consumer. (Likely to occur with KCSAN_INTERRUPT_WATCHER.) There are several cases that can lead to this scenario, for example: 1. A watchpoint A was set up by task T1, but interrupted by interrupt I1. Some other thread (task or interrupt) finds watchpoint A consumes it, and sets other_info. Then I1 also finds some unrelated watchpoint B, consumes it, but is blocked because other_info is in use. T1 cannot consume other_info because I1 never returns -> deadlock. 2. A watchpoint A was set up by task T1, but interrupted by interrupt I1, which also sets up a watchpoint B. Some other thread finds watchpoint A, and consumes it and sets up other_info with its information. Similarly some other thread finds watchpoint B and consumes it, but is then blocked because other_info is in use. When I1 continues it sees its watchpoint was consumed, and that it must wait for other_info, which currently contains information to be consumed by T1. However, T1 cannot unblock other_info because I1 never returns -> deadlock. To avoid this, we need to ensure that producers of struct other_info always have a usable other_info entry. This is obviously not the case with only a single instance of struct other_info, as concurrent producers must wait for the entry to be released by some consumer (which may be locked up as illustrated above). While it would be nice if producers could simply call kmalloc() and append their instance of struct other_info to a list, we are very limited in this code path: since KCSAN can instrument the allocators themselves, calling kmalloc() could lead to deadlock or corrupted allocator state. Since producers of the struct other_info will always succeed at try_consume_watchpoint(), preceding the call into kcsan_report(), we know that the particular watchpoint slot cannot simply be reused or consumed by another potential other_info producer. If we move removal of a watchpoint after reporting (by the consumer of struct other_info), we can see a consumed watchpoint as a held lock on elements of other_info, if we create a one-to-one mapping of a watchpoint to an other_info element. Therefore, the simplest solution is to create an array of struct other_info that is as large as the watchpoints array in core.c, and pass the watchpoint index to kcsan_report() for producers and consumers, and change watchpoints to be removed after reporting is done. With a default config on a 64-bit system, the array other_infos consumes ~37KiB. For most systems today this is not a problem. On smaller memory constrained systems, the config value CONFIG_KCSAN_NUM_WATCHPOINTS can be reduced appropriately. Overall, this change is a simplification of the prepare_report() code, and makes some of the checks (such as checking if at least one access is a write) redundant. Tested: $ tools/testing/selftests/rcutorture/bin/kvm.sh \ --cpus 12 --duration 10 --kconfig "CONFIG_DEBUG_INFO=y \ CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n \ CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n \ CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y \ CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_PROVE_LOCKING=y" \ --configs TREE03 => No longer hangs and runs to completion as expected. Reported-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 31 +++++--- kernel/kcsan/kcsan.h | 3 +- kernel/kcsan/report.c | 212 ++++++++++++++++++++++++-------------------------- 3 files changed, 124 insertions(+), 122 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index f1c38620e3cf..4d8ea0fca5f1 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -69,7 +69,6 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * slot=9: [10, 11, 9] * slot=63: [64, 65, 63] */ -#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) /* @@ -171,12 +170,16 @@ try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); } -/* - * Return true if watchpoint was not touched, false if consumed. - */ -static inline bool remove_watchpoint(atomic_long_t *watchpoint) +/* Return true if watchpoint was not touched, false if already consumed. */ +static inline bool consume_watchpoint(atomic_long_t *watchpoint) { - return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; + return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT; +} + +/* Remove the watchpoint -- its slot may be reused after. */ +static inline void remove_watchpoint(atomic_long_t *watchpoint) +{ + atomic_long_set(watchpoint, INVALID_WATCHPOINT); } static __always_inline struct kcsan_ctx *get_ctx(void) @@ -322,7 +325,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (consumed) { kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, - KCSAN_REPORT_CONSUMED_WATCHPOINT); + KCSAN_REPORT_CONSUMED_WATCHPOINT, + watchpoint - watchpoints); } else { /* * The other thread may not print any diagnostics, as it has @@ -470,7 +474,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) value_change = KCSAN_VALUE_CHANGE_TRUE; /* Check if this access raced with another. */ - if (!remove_watchpoint(watchpoint)) { + if (!consume_watchpoint(watchpoint)) { /* * Depending on the access type, map a value_change of MAYBE to * TRUE (always report) or FALSE (never report). @@ -500,7 +504,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL); + kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL, + watchpoint - watchpoints); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -510,9 +515,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); + KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, + watchpoint - watchpoints); } + /* + * Remove watchpoint; must be after reporting, since the slot may be + * reused after this point. + */ + remove_watchpoint(watchpoint); kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: if (!kcsan_interrupt_watcher) diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 6630dfe32f31..763d6d08d94b 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -12,6 +12,7 @@ /* The number of adjacent watchpoints to check. */ #define KCSAN_CHECK_ADJACENT 1 +#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) extern unsigned int kcsan_udelay_task; extern unsigned int kcsan_udelay_interrupt; @@ -136,6 +137,6 @@ enum kcsan_report_type { */ extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, enum kcsan_value_change value_change, - enum kcsan_report_type type); + enum kcsan_report_type type, int watchpoint_idx); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index de234d1c1b3d..ae0a383238ea 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -30,9 +30,7 @@ struct access_info { /* * Other thread info: communicated from other racing thread to thread that set - * up the watchpoint, which then prints the complete report atomically. Only - * need one struct, as all threads should to be serialized regardless to print - * the reports, with reporting being in the slow-path. + * up the watchpoint, which then prints the complete report atomically. */ struct other_info { struct access_info ai; @@ -59,7 +57,11 @@ struct other_info { struct task_struct *task; }; -static struct other_info other_infos[1]; +/* + * To never block any producers of struct other_info, we need as many elements + * as we have watchpoints (upper bound on concurrent races to report). + */ +static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; /* * Information about reported races; used to rate limit reporting. @@ -96,10 +98,11 @@ struct report_time { static struct report_time report_times[REPORT_TIMES_SIZE]; /* - * This spinlock protects reporting and other_info, since other_info is usually - * required when reporting. + * Spinlock serializing report generation, and access to @other_infos. Although + * it could make sense to have a finer-grained locking story for @other_infos, + * report generation needs to be serialized either way, so not much is gained. */ -static DEFINE_SPINLOCK(report_lock); +static DEFINE_RAW_SPINLOCK(report_lock); /* * Checks if the race identified by thread frames frame1 and frame2 has @@ -395,9 +398,13 @@ static bool print_report(enum kcsan_value_change value_change, static void release_report(unsigned long *flags, struct other_info *other_info) { if (other_info) - other_info->ai.ptr = NULL; /* Mark for reuse. */ + /* + * Use size to denote valid/invalid, since KCSAN entirely + * ignores 0-sized accesses. + */ + other_info->ai.size = 0; - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); } /* @@ -435,14 +442,14 @@ static void set_other_info_task_blocking(unsigned long *flags, */ set_current_state(TASK_UNINTERRUPTIBLE); } - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); /* * We cannot call schedule() since we also cannot reliably * determine if sleeping here is permitted -- see in_atomic(). */ udelay(1); - spin_lock_irqsave(&report_lock, *flags); + raw_spin_lock_irqsave(&report_lock, *flags); if (timeout-- < 0) { /* * Abort. Reset @other_info->task to NULL, since it @@ -454,128 +461,107 @@ static void set_other_info_task_blocking(unsigned long *flags, break; } /* - * If @ptr nor @current matches, then our information has been - * consumed and we may continue. If not, retry. + * If invalid, or @ptr nor @current matches, then @other_info + * has been consumed and we may continue. If not, retry. */ - } while (other_info->ai.ptr == ai->ptr && other_info->task == current); + } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && + other_info->task == current); if (is_running) set_current_state(TASK_RUNNING); } -/* - * Depending on the report type either sets other_info and returns false, or - * acquires the matching other_info and returns true. If other_info is not - * required for the report type, simply acquires report_lock and returns true. - */ -static bool prepare_report(unsigned long *flags, enum kcsan_report_type type, - const struct access_info *ai, struct other_info *other_info) +/* Populate @other_info; requires that the provided @other_info not in use. */ +static void prepare_report_producer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) { - if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && - type != KCSAN_REPORT_RACE_SIGNAL) { - /* other_info not required; just acquire report_lock */ - spin_lock_irqsave(&report_lock, *flags); - return true; - } + raw_spin_lock_irqsave(&report_lock, *flags); -retry: - spin_lock_irqsave(&report_lock, *flags); + /* + * The same @other_infos entry cannot be used concurrently, because + * there is a one-to-one mapping to watchpoint slots (@watchpoints in + * core.c), and a watchpoint is only released for reuse after reporting + * is done by the consumer of @other_info. Therefore, it is impossible + * for another concurrent prepare_report_producer() to set the same + * @other_info, and are guaranteed exclusivity for the @other_infos + * entry pointed to by @other_info. + * + * To check this property holds, size should never be non-zero here, + * because every consumer of struct other_info resets size to 0 in + * release_report(). + */ + WARN_ON(other_info->ai.size); - switch (type) { - case KCSAN_REPORT_CONSUMED_WATCHPOINT: - if (other_info->ai.ptr) - break; /* still in use, retry */ + other_info->ai = *ai; + other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); - other_info->ai = *ai; - other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 1); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + set_other_info_task_blocking(flags, ai, other_info); - if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - set_other_info_task_blocking(flags, ai, other_info); + raw_spin_unlock_irqrestore(&report_lock, *flags); +} - spin_unlock_irqrestore(&report_lock, *flags); +/* Awaits producer to fill @other_info and then returns. */ +static bool prepare_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) +{ - /* - * The other thread will print the summary; other_info may now - * be consumed. - */ - return false; + raw_spin_lock_irqsave(&report_lock, *flags); + while (!other_info->ai.size) { /* Await valid @other_info. */ + raw_spin_unlock_irqrestore(&report_lock, *flags); + cpu_relax(); + raw_spin_lock_irqsave(&report_lock, *flags); + } - case KCSAN_REPORT_RACE_SIGNAL: - if (!other_info->ai.ptr) - break; /* no data available yet, retry */ + /* Should always have a matching access based on watchpoint encoding. */ + if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, + (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) + goto discard; + if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, + (unsigned long)ai->ptr, ai->size)) { /* - * First check if this is the other_info we are expecting, i.e. - * matches based on how watchpoint was encoded. + * If the actual accesses to not match, this was a false + * positive due to watchpoint encoding. */ - if (!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, - (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)) - break; /* mismatching watchpoint, retry */ - - if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, - (unsigned long)ai->ptr, ai->size)) { - /* - * If the actual accesses to not match, this was a false - * positive due to watchpoint encoding. - */ - kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); - - /* discard this other_info */ - release_report(flags, other_info); - return false; - } + kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + goto discard; + } - if (!((ai->access_type | other_info->ai.access_type) & KCSAN_ACCESS_WRITE)) { - /* - * While the address matches, this is not the other_info - * from the thread that consumed our watchpoint, since - * neither this nor the access in other_info is a write. - * It is invalid to continue with the report, since we - * only have information about reads. - * - * This can happen due to concurrent races on the same - * address, with at least 4 threads. To avoid locking up - * other_info and all other threads, we have to consume - * it regardless. - * - * A concrete case to illustrate why we might lock up if - * we do not consume other_info: - * - * We have 4 threads, all accessing the same address - * (or matching address ranges). Assume the following - * watcher and watchpoint consumer pairs: - * write1-read1, read2-write2. The first to populate - * other_info is write2, however, write1 consumes it, - * resulting in a report of write1-write2. This report - * is valid, however, now read1 populates other_info; - * read2-read1 is an invalid conflict, yet, no other - * conflicting access is left. Therefore, we must - * consume read1's other_info. - * - * Since this case is assumed to be rare, it is - * reasonable to omit this report: one of the other - * reports includes information about the same shared - * data, and at this point the likelihood that we - * re-report the same race again is high. - */ - release_report(flags, other_info); - return false; - } + return true; - /* Matching access in other_info. */ - return true; +discard: + release_report(flags, other_info); + return false; +} +/* + * Depending on the report type either sets @other_info and returns false, or + * awaits @other_info and returns true. If @other_info is not required for the + * report type, simply acquires @report_lock and returns true. + */ +static noinline bool prepare_report(unsigned long *flags, + enum kcsan_report_type type, + const struct access_info *ai, + struct other_info *other_info) +{ + switch (type) { + case KCSAN_REPORT_CONSUMED_WATCHPOINT: + prepare_report_producer(flags, ai, other_info); + return false; + case KCSAN_REPORT_RACE_SIGNAL: + return prepare_report_consumer(flags, ai, other_info); default: - BUG(); + /* @other_info not required; just acquire @report_lock. */ + raw_spin_lock_irqsave(&report_lock, *flags); + return true; } - - spin_unlock_irqrestore(&report_lock, *flags); - - goto retry; } void kcsan_report(const volatile void *ptr, size_t size, int access_type, enum kcsan_value_change value_change, - enum kcsan_report_type type) + enum kcsan_report_type type, int watchpoint_idx) { unsigned long flags = 0; const struct access_info ai = { @@ -586,7 +572,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, .cpu_id = raw_smp_processor_id() }; struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN - ? NULL : &other_infos[0]; + ? NULL : &other_infos[watchpoint_idx]; + + kcsan_disable_current(); + if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) + goto out; /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if @@ -596,7 +586,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, */ lockdep_off(); - kcsan_disable_current(); if (prepare_report(&flags, type, &ai, other_info)) { /* * Never report if value_change is FALSE, only if we it is @@ -611,7 +600,8 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, release_report(&flags, other_info); } - kcsan_enable_current(); lockdep_on(); +out: + kcsan_enable_current(); } -- cgit v1.2.3 From 757a4cefde76697af2b2c284c8a320912b77e7e6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Mar 2020 17:41:56 +0100 Subject: kcsan: Add support for scoped accesses This adds support for scoped accesses, where the memory range is checked for the duration of the scope. The feature is implemented by inserting the relevant access information into a list of scoped accesses for the current execution context, which are then checked (until removed) on every call (through instrumentation) into the KCSAN runtime. An alternative, more complex, implementation could set up a watchpoint for the scoped access, and keep the watchpoint set up. This, however, would require first exposing a handle to the watchpoint, as well as dealing with cases such as accesses by the same thread while the watchpoint is still set up (and several more cases). It is also doubtful if this would provide any benefit, since the majority of delay where the watchpoint is set up is likely due to the injected delays by KCSAN. Therefore, the implementation in this patch is simpler and avoids hurting KCSAN's main use-case (normal data race detection); it also implicitly increases scoped-access race-detection-ability due to increased probability of setting up watchpoints by repeatedly calling __kcsan_check_access() throughout the scope of the access. The implementation required adding an additional conditional branch to the fast-path. However, the microbenchmark showed a *speedup* of ~5% on the fast-path. This appears to be due to subtly improved codegen by GCC from moving get_ctx() and associated load of preempt_count earlier. Suggested-by: Boqun Feng Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 57 ++++++++++++++++++++++++++++++ include/linux/kcsan.h | 3 ++ init/init_task.c | 1 + kernel/kcsan/core.c | 83 +++++++++++++++++++++++++++++++++++++++----- kernel/kcsan/report.c | 33 ++++++++++++------ 5 files changed, 158 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 3cd8bb03eb41..b24253d3a442 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -3,6 +3,8 @@ #ifndef _LINUX_KCSAN_CHECKS_H #define _LINUX_KCSAN_CHECKS_H +/* Note: Only include what is already included by compiler.h. */ +#include #include /* @@ -12,10 +14,12 @@ * WRITE : write access; * ATOMIC: access is atomic; * ASSERT: access is not a regular access, but an assertion; + * SCOPED: access is a scoped access; */ #define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 #define KCSAN_ACCESS_ASSERT 0x4 +#define KCSAN_ACCESS_SCOPED 0x8 /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -78,6 +82,52 @@ void kcsan_atomic_next(int n); */ void kcsan_set_access_mask(unsigned long mask); +/* Scoped access information. */ +struct kcsan_scoped_access { + struct list_head list; + const volatile void *ptr; + size_t size; + int type; +}; +/* + * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes + * out of scope; relies on attribute "cleanup", which is supported by all + * compilers that support KCSAN. + */ +#define __kcsan_cleanup_scoped \ + __maybe_unused __attribute__((__cleanup__(kcsan_end_scoped_access))) + +/** + * kcsan_begin_scoped_access - begin scoped access + * + * Begin scoped access and initialize @sa, which will cause KCSAN to + * continuously check the memory range in the current thread until + * kcsan_end_scoped_access() is called for @sa. + * + * Scoped accesses are implemented by appending @sa to an internal list for the + * current execution context, and then checked on every call into the KCSAN + * runtime. + * + * @ptr: address of access + * @size: size of access + * @type: access type modifier + * @sa: struct kcsan_scoped_access to use for the scope of the access + */ +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa); + +/** + * kcsan_end_scoped_access - end scoped access + * + * End a scoped access, which will stop KCSAN checking the memory range. + * Requires that kcsan_begin_scoped_access() was previously called once for @sa. + * + * @sa: a previously initialized struct kcsan_scoped_access + */ +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); + + #else /* CONFIG_KCSAN */ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, @@ -90,6 +140,13 @@ static inline void kcsan_flat_atomic_end(void) { } static inline void kcsan_atomic_next(int n) { } static inline void kcsan_set_access_mask(unsigned long mask) { } +struct kcsan_scoped_access { }; +#define __kcsan_cleanup_scoped __maybe_unused +static inline struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) { return sa; } +static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } + #endif /* CONFIG_KCSAN */ /* diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 3b84606e1e67..17ae59e4b685 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -40,6 +40,9 @@ struct kcsan_ctx { * Access mask for all accesses if non-zero. */ unsigned long access_mask; + + /* List of scoped accesses. */ + struct list_head scoped_accesses; }; /** diff --git a/init/init_task.c b/init/init_task.c index 096191d177d5..198943851caf 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -168,6 +168,7 @@ struct task_struct init_task .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }, #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 4d8ea0fca5f1..a572aae61b98 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }; /* @@ -191,12 +193,23 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +/* Check scoped accesses; never inline because this is a slow-path! */ +static noinline void kcsan_check_scoped_accesses(void) +{ + struct kcsan_ctx *ctx = get_ctx(); + struct list_head *prev_save = ctx->scoped_accesses.prev; + struct kcsan_scoped_access *scoped_access; + + ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */ + list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) + __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type); + ctx->scoped_accesses.prev = prev_save; +} + /* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool -is_atomic(const volatile void *ptr, size_t size, int type) +is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { - struct kcsan_ctx *ctx; - if (type & KCSAN_ACCESS_ATOMIC) return true; @@ -213,7 +226,6 @@ is_atomic(const volatile void *ptr, size_t size, int type) IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ - ctx = get_ctx(); if (ctx->atomic_next > 0) { /* * Because we do not have separate contexts for nested @@ -233,7 +245,7 @@ is_atomic(const volatile void *ptr, size_t size, int type) } static __always_inline bool -should_watch(const volatile void *ptr, size_t size, int type) +should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { /* * Never set up watchpoints when memory operations are atomic. @@ -242,7 +254,7 @@ should_watch(const volatile void *ptr, size_t size, int type) * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if (is_atomic(ptr, size, type)) + if (is_atomic(ptr, size, type, ctx)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -563,8 +575,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, if (unlikely(watchpoint != NULL)) kcsan_found_watchpoint(ptr, size, type, watchpoint, encoded_watchpoint); - else if (unlikely(should_watch(ptr, size, type))) - kcsan_setup_watchpoint(ptr, size, type); + else { + struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ + + if (unlikely(should_watch(ptr, size, type, ctx))) + kcsan_setup_watchpoint(ptr, size, type); + else if (unlikely(ctx->scoped_accesses.prev)) + kcsan_check_scoped_accesses(); + } } /* === Public interface ===================================================== */ @@ -660,6 +678,55 @@ void kcsan_set_access_mask(unsigned long mask) } EXPORT_SYMBOL(kcsan_set_access_mask); +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + __kcsan_check_access(ptr, size, type); + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + INIT_LIST_HEAD(&sa->list); + sa->ptr = ptr; + sa->size = size; + sa->type = type; + + if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */ + INIT_LIST_HEAD(&ctx->scoped_accesses); + list_add(&sa->list, &ctx->scoped_accesses); + + ctx->disable_count--; + return sa; +} +EXPORT_SYMBOL(kcsan_begin_scoped_access); + +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__)) + return; + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + list_del(&sa->list); + if (list_empty(&ctx->scoped_accesses)) + /* + * Ensure we do not enter kcsan_check_scoped_accesses() + * slow-path if unnecessary, and avoids requiring list_empty() + * in the fast-path (to avoid a READ_ONCE() and potential + * uaccess warning). + */ + ctx->scoped_accesses.prev = NULL; + + ctx->disable_count--; + + __kcsan_check_access(sa->ptr, sa->size, sa->type); +} +EXPORT_SYMBOL(kcsan_end_scoped_access); + void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { check_access(ptr, size, type); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ae0a383238ea..ddc18f1224a4 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -205,6 +205,20 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame) static const char *get_access_type(int type) { + if (type & KCSAN_ACCESS_ASSERT) { + if (type & KCSAN_ACCESS_SCOPED) { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses (scoped)"; + else + return "assert no writes (scoped)"; + } else { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses"; + else + return "assert no writes"; + } + } + switch (type) { case 0: return "read"; @@ -214,17 +228,14 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; - - /* - * ASSERT variants: - */ - case KCSAN_ACCESS_ASSERT: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: - return "assert no writes"; - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: - return "assert no accesses"; - + case KCSAN_ACCESS_SCOPED: + return "read (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: + return "read (marked, scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: + return "write (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "write (marked, scoped)"; default: BUG(); } -- cgit v1.2.3 From d8949ef1d9f1062848cd068cf369a57ce33dae6f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Mar 2020 17:41:58 +0100 Subject: kcsan: Introduce scoped ASSERT_EXCLUSIVE macros Introduce ASSERT_EXCLUSIVE_*_SCOPED(), which provide an intuitive interface to use the scoped-access feature, without having to explicitly mark the start and end of the desired scope. Basing duration of the checks on scope avoids accidental misuse and resulting false positives, which may be hard to debug. See added comments for usage. The macros are implemented using __attribute__((__cleanup__(func))), which is supported by all compilers that currently support KCSAN. Suggested-by: Boqun Feng Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- Documentation/dev-tools/kcsan.rst | 3 +- include/linux/kcsan-checks.h | 73 ++++++++++++++++++++++++++++++++++++++- kernel/kcsan/debugfs.c | 16 ++++++++- 3 files changed, 89 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst index 52a5d6fb9701..f4b5766f12cc 100644 --- a/Documentation/dev-tools/kcsan.rst +++ b/Documentation/dev-tools/kcsan.rst @@ -238,7 +238,8 @@ are defined at the C-language level. The following macros can be used to check properties of concurrent code where bugs would not manifest as data races. .. kernel-doc:: include/linux/kcsan-checks.h - :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_ACCESS + :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_WRITER_SCOPED + ASSERT_EXCLUSIVE_ACCESS ASSERT_EXCLUSIVE_ACCESS_SCOPED ASSERT_EXCLUSIVE_BITS Implementation Details diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index b24253d3a442..101df7f46d89 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -234,11 +234,63 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * ... = READ_ONCE(shared_foo); * } * + * Note: ASSERT_EXCLUSIVE_WRITER_SCOPED(), if applicable, performs more thorough + * checking if a clear scope where no concurrent writes are expected exists. + * * @var: variable to assert on */ #define ASSERT_EXCLUSIVE_WRITER(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) +/* + * Helper macros for implementation of for ASSERT_EXCLUSIVE_*_SCOPED(). @id is + * expected to be unique for the scope in which instances of kcsan_scoped_access + * are declared. + */ +#define __kcsan_scoped_name(c, suffix) __kcsan_scoped_##c##suffix +#define __ASSERT_EXCLUSIVE_SCOPED(var, type, id) \ + struct kcsan_scoped_access __kcsan_scoped_name(id, _) \ + __kcsan_cleanup_scoped; \ + struct kcsan_scoped_access *__kcsan_scoped_name(id, _dummy_p) \ + __maybe_unused = kcsan_begin_scoped_access( \ + &(var), sizeof(var), KCSAN_ACCESS_SCOPED | (type), \ + &__kcsan_scoped_name(id, _)) + +/** + * ASSERT_EXCLUSIVE_WRITER_SCOPED - assert no concurrent writes to @var in scope + * + * Scoped variant of ASSERT_EXCLUSIVE_WRITER(). + * + * Assert that there are no concurrent writes to @var for the duration of the + * scope in which it is introduced. This provides a better way to fully cover + * the enclosing scope, compared to multiple ASSERT_EXCLUSIVE_WRITER(), and + * increases the likelihood for KCSAN to detect racing accesses. + * + * For example, it allows finding race-condition bugs that only occur due to + * state changes within the scope itself: + * + * .. code-block:: c + * + * void writer(void) { + * spin_lock(&update_foo_lock); + * { + * ASSERT_EXCLUSIVE_WRITER_SCOPED(shared_foo); + * WRITE_ONCE(shared_foo, 42); + * ... + * // shared_foo should still be 42 here! + * } + * spin_unlock(&update_foo_lock); + * } + * void buggy(void) { + * if (READ_ONCE(shared_foo) == 42) + * WRITE_ONCE(shared_foo, 1); // bug! + * } + * + * @var: variable to assert on + */ +#define ASSERT_EXCLUSIVE_WRITER_SCOPED(var) \ + __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_ASSERT, __COUNTER__) + /** * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var * @@ -258,6 +310,9 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * release_for_reuse(obj); * } * + * Note: ASSERT_EXCLUSIVE_ACCESS_SCOPED(), if applicable, performs more thorough + * checking if a clear scope where no concurrent accesses are expected exists. + * * Note: For cases where the object is freed, `KASAN `_ is a better * fit to detect use-after-free bugs. * @@ -266,10 +321,26 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define ASSERT_EXCLUSIVE_ACCESS(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) +/** + * ASSERT_EXCLUSIVE_ACCESS_SCOPED - assert no concurrent accesses to @var in scope + * + * Scoped variant of ASSERT_EXCLUSIVE_ACCESS(). + * + * Assert that there are no concurrent accesses to @var (no readers nor writers) + * for the entire duration of the scope in which it is introduced. This provides + * a better way to fully cover the enclosing scope, compared to multiple + * ASSERT_EXCLUSIVE_ACCESS(), and increases the likelihood for KCSAN to detect + * racing accesses. + * + * @var: variable to assert on + */ +#define ASSERT_EXCLUSIVE_ACCESS_SCOPED(var) \ + __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, __COUNTER__) + /** * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var * - * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(var). + * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(). * * Assert that there are no concurrent writes to a subset of bits in @var; * concurrent readers are permitted. This assertion captures more detailed diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 72ee188ebc54..1a08664a7fab 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -110,6 +110,7 @@ static noinline void microbenchmark(unsigned long iters) */ static long test_dummy; static long test_flags; +static long test_scoped; static noinline void test_thread(unsigned long iters) { const long CHANGE_BITS = 0xff00ff00ff00ff00L; @@ -120,7 +121,8 @@ static noinline void test_thread(unsigned long iters) memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); - pr_info("test_dummy@%px, test_flags@%px\n", &test_dummy, &test_flags); + pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n", + &test_dummy, &test_flags, &test_scoped); cycles = get_cycles(); while (iters--) { @@ -141,6 +143,18 @@ static noinline void test_thread(unsigned long iters) test_flags ^= CHANGE_BITS; /* generate value-change */ __kcsan_check_write(&test_flags, sizeof(test_flags)); + + BUG_ON(current->kcsan_ctx.scoped_accesses.prev); + { + /* Should generate reports anywhere in this block. */ + ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped); + ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped); + BUG_ON(!current->kcsan_ctx.scoped_accesses.prev); + /* Unrelated accesses. */ + __kcsan_check_access(&cycles, sizeof(cycles), 0); + __kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC); + } + BUG_ON(current->kcsan_ctx.scoped_accesses.prev); } cycles = get_cycles() - cycles; -- cgit v1.2.3 From f770ed10a9ee65529f3ec8d90eb374bbd8b7c238 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 10 Apr 2020 18:44:17 +0200 Subject: kcsan: Fix function matching in report Pass string length as returned by scnprintf() to strnstr(), since strnstr() searches exactly len bytes in haystack, even if it contains a NUL-terminator before haystack+len. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/report.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ddc18f1224a4..cf41d63dd0cd 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -192,11 +192,11 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame) * maintainers. */ char buf[64]; + int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); - snprintf(buf, sizeof(buf), "%ps", (void *)top_frame); - if (!strnstr(buf, "rcu_", sizeof(buf)) && - !strnstr(buf, "_rcu", sizeof(buf)) && - !strnstr(buf, "_srcu", sizeof(buf))) + if (!strnstr(buf, "rcu_", len) && + !strnstr(buf, "_rcu", len) && + !strnstr(buf, "_srcu", len)) return true; } @@ -262,15 +262,15 @@ static const char *get_thread_desc(int task_id) static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) { char buf[64]; + int len; int skip = 0; for (; skip < num_entries; ++skip) { - snprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); - if (!strnstr(buf, "csan_", sizeof(buf)) && - !strnstr(buf, "tsan_", sizeof(buf)) && - !strnstr(buf, "_once_size", sizeof(buf))) { + len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); + if (!strnstr(buf, "csan_", len) && + !strnstr(buf, "tsan_", len) && + !strnstr(buf, "_once_size", len)) break; - } } return skip; } -- cgit v1.2.3 From cdb9b07d8c78be63d72aba9a2686ff161ddd2099 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 10 Apr 2020 18:44:18 +0200 Subject: kcsan: Make reporting aware of KCSAN tests Reporting hides KCSAN runtime functions in the stack trace, with filtering done based on function names. Currently this included all functions (or modules) that would match "kcsan_". Make the filter aware of KCSAN tests, which contain "kcsan_test", and are no longer skipped in the report. This is in preparation for adding a KCSAN test module. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/report.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index cf41d63dd0cd..ac5f8345bae9 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -262,16 +262,32 @@ static const char *get_thread_desc(int task_id) static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) { char buf[64]; - int len; - int skip = 0; + char *cur; + int len, skip; - for (; skip < num_entries; ++skip) { + for (skip = 0; skip < num_entries; ++skip) { len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); - if (!strnstr(buf, "csan_", len) && - !strnstr(buf, "tsan_", len) && - !strnstr(buf, "_once_size", len)) - break; + + /* Never show tsan_* or {read,write}_once_size. */ + if (strnstr(buf, "tsan_", len) || + strnstr(buf, "_once_size", len)) + continue; + + cur = strnstr(buf, "kcsan_", len); + if (cur) { + cur += sizeof("kcsan_") - 1; + if (strncmp(cur, "test", sizeof("test") - 1)) + continue; /* KCSAN runtime function. */ + /* KCSAN related test. */ + } + + /* + * No match for runtime functions -- @skip entries to skip to + * get to first frame of interest. + */ + break; } + return skip; } -- cgit v1.2.3 From 52785b6ae8eded7ac99d65c92d989b702e5b4376 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Apr 2020 02:58:37 +0000 Subject: kcsan: Use GFP_ATOMIC under spin lock A spin lock is held in insert_report_filterlist(), so the krealloc() should use GFP_ATOMIC. This commit therefore makes this change. Reviewed-by: Marco Elver Signed-off-by: Wei Yongjun Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 1a08664a7fab..023e49c58d55 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -230,7 +230,7 @@ static ssize_t insert_report_filterlist(const char *func) /* initial allocation */ report_filterlist.addrs = kmalloc_array(report_filterlist.size, - sizeof(unsigned long), GFP_KERNEL); + sizeof(unsigned long), GFP_ATOMIC); if (report_filterlist.addrs == NULL) { ret = -ENOMEM; goto out; @@ -240,7 +240,7 @@ static ssize_t insert_report_filterlist(const char *func) size_t new_size = report_filterlist.size * 2; unsigned long *new_addrs = krealloc(report_filterlist.addrs, - new_size * sizeof(unsigned long), GFP_KERNEL); + new_size * sizeof(unsigned long), GFP_ATOMIC); if (new_addrs == NULL) { /* leave filterlist itself untouched */ -- cgit v1.2.3 From 19acd03d95dad1f50d06f28179a1866fca431fed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Apr 2020 17:47:29 +0200 Subject: kcsan: Add __kcsan_{enable,disable}_current() variants The __kcsan_{enable,disable}_current() variants only call into KCSAN if KCSAN is enabled for the current compilation unit. Note: This is typically not what we want, as we usually want to ensure that even calls into other functions still have KCSAN disabled. These variants may safely be used in header files that are shared between regular kernel code and code that does not link the KCSAN runtime. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 17 ++++++++++++++--- kernel/kcsan/core.c | 7 +++++++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index ef95ddc49182..7b0b9c44f5f3 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -49,6 +49,7 @@ void kcsan_disable_current(void); * Supports nesting. */ void kcsan_enable_current(void); +void kcsan_enable_current_nowarn(void); /* Safe in uaccess regions. */ /** * kcsan_nestable_atomic_begin - begin nestable atomic region @@ -149,6 +150,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } +static inline void kcsan_enable_current_nowarn(void) { } static inline void kcsan_nestable_atomic_begin(void) { } static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } @@ -165,15 +167,24 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } #endif /* CONFIG_KCSAN */ +#ifdef __SANITIZE_THREAD__ /* - * kcsan_*: Only calls into the runtime when the particular compilation unit has - * KCSAN instrumentation enabled. May be used in header files. + * Only calls into the runtime when the particular compilation unit has KCSAN + * instrumentation enabled. May be used in header files. */ -#ifdef __SANITIZE_THREAD__ #define kcsan_check_access __kcsan_check_access + +/* + * Only use these to disable KCSAN for accesses in the current compilation unit; + * calls into libraries may still perform KCSAN checks. + */ +#define __kcsan_disable_current kcsan_disable_current +#define __kcsan_enable_current kcsan_enable_current_nowarn #else static inline void kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void __kcsan_enable_current(void) { } +static inline void __kcsan_disable_current(void) { } #endif /** diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index a572aae61b98..a73a66cf79df 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -625,6 +625,13 @@ void kcsan_enable_current(void) } EXPORT_SYMBOL(kcsan_enable_current); +void kcsan_enable_current_nowarn(void) +{ + if (get_ctx()->disable_count-- == 0) + kcsan_disable_current(); +} +EXPORT_SYMBOL(kcsan_enable_current_nowarn); + void kcsan_nestable_atomic_begin(void) { /* -- cgit v1.2.3 From 75d75b7a4d5489cc6a5e91ace306f6c13f376f33 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 May 2020 16:20:39 +0200 Subject: kcsan: Support distinguishing volatile accesses In the kernel, the "volatile" keyword is used in various concurrent contexts, whether in low-level synchronization primitives or for legacy reasons. If supported by the compiler, it will be assumed that aligned volatile accesses up to sizeof(long long) (matching compiletime_assert_rwonce_type()) are atomic. Recent versions of Clang [1] (GCC tentative [2]) can instrument volatile accesses differently. Add the option (required) to enable the instrumentation, and provide the necessary runtime functions. None of the updated compilers are widely available yet (Clang 11 will be the first release to support the feature). [1] https://github.com/llvm/llvm-project/commit/5a2c31116f412c3b6888be361137efd705e05814 [2] https://gcc.gnu.org/pipermail/gcc-patches/2020-April/544452.html This change allows removing of any explicit checks in primitives such as READ_ONCE() and WRITE_ONCE(). [ bp: Massage commit message a bit. ] Signed-off-by: Marco Elver Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200521142047.169334-4-elver@google.com --- kernel/kcsan/core.c | 43 +++++++++++++++++++++++++++++++++++++++++++ scripts/Makefile.kcsan | 5 ++++- 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index a73a66cf79df..15f67949d11e 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -789,6 +789,49 @@ void __tsan_write_range(void *ptr, size_t size) } EXPORT_SYMBOL(__tsan_write_range); +/* + * Use of explicit volatile is generally disallowed [1], however, volatile is + * still used in various concurrent context, whether in low-level + * synchronization primitives or for legacy reasons. + * [1] https://lwn.net/Articles/233479/ + * + * We only consider volatile accesses atomic if they are aligned and would pass + * the size-check of compiletime_assert_rwonce_type(). + */ +#define DEFINE_TSAN_VOLATILE_READ_WRITE(size) \ + void __tsan_volatile_read##size(void *ptr) \ + { \ + const bool is_atomic = size <= sizeof(long long) && \ + IS_ALIGNED((unsigned long)ptr, size); \ + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \ + return; \ + check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0); \ + } \ + EXPORT_SYMBOL(__tsan_volatile_read##size); \ + void __tsan_unaligned_volatile_read##size(void *ptr) \ + __alias(__tsan_volatile_read##size); \ + EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size); \ + void __tsan_volatile_write##size(void *ptr) \ + { \ + const bool is_atomic = size <= sizeof(long long) && \ + IS_ALIGNED((unsigned long)ptr, size); \ + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \ + return; \ + check_access(ptr, size, \ + KCSAN_ACCESS_WRITE | \ + (is_atomic ? KCSAN_ACCESS_ATOMIC : 0)); \ + } \ + EXPORT_SYMBOL(__tsan_volatile_write##size); \ + void __tsan_unaligned_volatile_write##size(void *ptr) \ + __alias(__tsan_volatile_write##size); \ + EXPORT_SYMBOL(__tsan_unaligned_volatile_write##size) + +DEFINE_TSAN_VOLATILE_READ_WRITE(1); +DEFINE_TSAN_VOLATILE_READ_WRITE(2); +DEFINE_TSAN_VOLATILE_READ_WRITE(4); +DEFINE_TSAN_VOLATILE_READ_WRITE(8); +DEFINE_TSAN_VOLATILE_READ_WRITE(16); + /* * The below are not required by KCSAN, but can still be emitted by the * compiler. diff --git a/scripts/Makefile.kcsan b/scripts/Makefile.kcsan index 20337a7ecf54..75d2942b9437 100644 --- a/scripts/Makefile.kcsan +++ b/scripts/Makefile.kcsan @@ -9,7 +9,10 @@ else cc-param = --param -$(1) endif +# Keep most options here optional, to allow enabling more compilers if absence +# of some options does not break KCSAN nor causes false positive reports. CFLAGS_KCSAN := -fsanitize=thread \ - $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) + $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \ + $(call cc-param,tsan-distinguish-volatile=1) endif # CONFIG_KCSAN -- cgit v1.2.3