From 71de1e34f1dfc31ab3cb052cdd7038950aae06e7 Mon Sep 17 00:00:00 2001 From: Paul E. McKenney Date: Wed, 20 Apr 2022 08:59:46 -0700 Subject: doc: Document the rcutree.rcu_divisor kernel boot parameter This commit adds kernel-parameters.txt documentation for the rcutree.rcu_divisor kernel boot parameter, which controls the softirq callback-invocation batch limit. Cc: Eric Dumazet Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay --- Documentation/admin-guide/kernel-parameters.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2522b11e593f..bdf431bdbfdc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4666,6 +4666,21 @@ When RCU_NOCB_CPU is set, also adjust the priority of NOCB callback kthreads. + rcutree.rcu_divisor= [KNL] + Set the shift-right count to use to compute + the callback-invocation batch limit bl from + the number of callbacks queued on this CPU. + The result will be bounded below by the value of + the rcutree.blimit kernel parameter. Every bl + callbacks, the softirq handler will exit in + order to allow the CPU to do other work. + + Please note that this callback-invocation batch + limit applies only to non-offloaded callback + invocation. Offloaded callbacks are instead + invoked in the context of an rcuoc kthread, which + scheduler will preempt as it does any other task. + rcutree.rcu_nocb_gp_stride= [KNL] Set the number of NOCB callback kthreads in each group, which defaults to the square root -- cgit v1.2.3 From 89f7f29140da767f4675efbbe7892f38786451ec Mon Sep 17 00:00:00 2001 From: Paul E. McKenney Date: Wed, 27 Apr 2022 09:24:31 -0700 Subject: doc: Document rcutree.nocb_nobypass_lim_per_jiffy kernel parameter This commit provides documentation for the kernel parameter controlling RCU's handling of callback floods on offloaded (rcu_nocbs) CPUs. This parameter might be obscure, but it is always there when you need it. Reported-by: Frederic Weisbecker Reported-by: Uladzislau Rezki Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay --- Documentation/admin-guide/kernel-parameters.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdf431bdbfdc..40a05df05580 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4681,6 +4681,19 @@ invoked in the context of an rcuoc kthread, which scheduler will preempt as it does any other task. + rcutree.nocb_nobypass_lim_per_jiffy= [KNL] + On callback-offloaded (rcu_nocbs) CPUs, + RCU reduces the lock contention that would + otherwise be caused by callback floods through + use of the ->nocb_bypass list. However, in the + common non-flooded case, RCU queues directly to + the main ->cblist in order to avoid the extra + overhead of the ->nocb_bypass list and its lock. + But if there are too many callbacks queued during + a single jiffy, RCU pre-queues the callbacks into + the ->nocb_bypass queue. The definition of "too + many" is supplied by this kernel boot parameter. + rcutree.rcu_nocb_gp_stride= [KNL] Set the number of NOCB callback kthreads in each group, which defaults to the square root -- cgit v1.2.3 From 24a9c54182b3758801b8ca6c8c237cc2ff654732 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:24 +0200 Subject: context_tracking: Split user tracking Kconfig Context tracking is going to be used not only to track user transitions but also idle/IRQs/NMIs. The user tracking part will then become a separate feature. Prepare Kconfig for that. [ frederic: Apply Max Filippov feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- .../time/context-tracking/arch-support.txt | 6 ++--- arch/Kconfig | 4 +-- arch/arm/Kconfig | 2 +- arch/arm/kernel/entry-common.S | 4 +-- arch/arm/kernel/entry-header.S | 4 +-- arch/arm64/Kconfig | 2 +- arch/csky/Kconfig | 2 +- arch/csky/kernel/entry.S | 4 +-- arch/loongarch/Kconfig | 2 +- arch/mips/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/context_tracking.h | 2 +- arch/riscv/Kconfig | 2 +- arch/riscv/kernel/entry.S | 6 ++--- arch/sparc/Kconfig | 2 +- arch/sparc/kernel/rtrap_64.S | 2 +- arch/x86/Kconfig | 4 +-- arch/xtensa/Kconfig | 2 +- arch/xtensa/kernel/entry.S | 4 +-- include/linux/context_tracking.h | 12 ++++----- include/linux/context_tracking_state.h | 4 +-- init/Kconfig | 4 +-- kernel/context_tracking.c | 6 ++++- kernel/sched/core.c | 2 +- kernel/time/Kconfig | 31 ++++++++++++++-------- 25 files changed, 65 insertions(+), 52 deletions(-) (limited to 'Documentation') diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index c9e0a16290e6..e59071a49090 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -1,7 +1,7 @@ # -# Feature name: context-tracking -# Kconfig: HAVE_CONTEXT_TRACKING -# description: arch supports context tracking for NO_HZ_FULL +# Feature name: user-context-tracking +# Kconfig: HAVE_CONTEXT_TRACKING_USER +# description: arch supports user context tracking for NO_HZ_FULL # ----------------------- | arch |status| diff --git a/arch/Kconfig b/arch/Kconfig index fcf9a41a4ef5..154b7b78da09 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -774,7 +774,7 @@ config HAVE_ARCH_WITHIN_STACK_FRAMES and similar) by implementing an inline arch_within_stack_frames(), which is used by CONFIG_HARDENED_USERCOPY. -config HAVE_CONTEXT_TRACKING +config HAVE_CONTEXT_TRACKING_USER bool help Provide kernel/user boundaries probes necessary for subsystems @@ -785,7 +785,7 @@ config HAVE_CONTEXT_TRACKING protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on irq exit still need to be protected. -config HAVE_CONTEXT_TRACKING_OFFSTACK +config HAVE_CONTEXT_TRACKING_USER_OFFSTACK bool help Architecture neither relies on exception_enter()/exception_exit() diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7630ba9cb6cc..9acc6aac5912 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -84,7 +84,7 @@ config ARM select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7aa3ded4af92..37a0125fc926 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -28,7 +28,7 @@ #include "entry-header.S" saved_psr .req r8 -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) saved_pc .req r9 #define TRACE(x...) x #else @@ -38,7 +38,7 @@ saved_pc .req lr .section .entry.text,"ax",%progbits .align 5 -#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \ +#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING_USER) || \ IS_ENABLED(CONFIG_DEBUG_RSEQ)) /* * This is the fast syscall return path. We do as little as possible here, diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 95def2b38d1c..99411fa91350 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -366,7 +366,7 @@ ALT_UP_B(.L1_\@) * between user and kernel mode. */ .macro ct_user_exit, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER .if \save stmdb sp!, {r0-r3, ip, lr} bl user_exit_callable @@ -378,7 +378,7 @@ ALT_UP_B(.L1_\@) .endm .macro ct_user_enter, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER .if \save stmdb sp!, {r0-r3, ip, lr} bl user_enter_callable diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..7c5dd2af9ca9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -174,7 +174,7 @@ config ARM64 select HAVE_C_RECORDMCOUNT select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 21d72b078eef..f55ba1745f7b 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -42,7 +42,7 @@ config CSKY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_SECCOMP_FILTER - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_VIRT_CPU_ACCOUNTING_GEN select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index bc734d17c16f..547b4cd1b24b 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -19,7 +19,7 @@ .endm .macro context_tracking -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER mfcr a0, epsr btsti a0, 31 bt 1f @@ -159,7 +159,7 @@ ret_from_exception: and r10, r9 cmpnei r10, 0 bt exit_work -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER jbsr user_enter_callable #endif 1: diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1920d52653b4..130dc65f3c85 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -76,7 +76,7 @@ config LOONGARCH select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index db09d45d59ec..9457894db237 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -56,7 +56,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_TIF_NOHZ select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c2ce2e60c8f0..874c8d81284a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,7 +202,7 @@ config PPC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_CONTEXT_TRACKING_USER if PPC64 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h index f2682b28b050..4b63931c49e0 100644 --- a/arch/powerpc/include/asm/context_tracking.h +++ b/arch/powerpc/include/asm/context_tracking.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_CONTEXT_TRACKING_H #define _ASM_POWERPC_CONTEXT_TRACKING_H -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER #define SCHEDULE_USER bl schedule_user #else #define SCHEDULE_USER bl schedule diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 32ffef9f6e5b..29b46f217345 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -86,7 +86,7 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_EBPF_JIT if MMU diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 12f6bba57e33..b9eda3fcbd6d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -111,7 +111,7 @@ _save_context: call __trace_hardirqs_off #endif -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER /* If previous state is in user mode, call user_exit_callable(). */ li a0, SR_PP and a0, s1, a0 @@ -176,7 +176,7 @@ handle_syscall: */ csrs CSR_STATUS, SR_IE #endif -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) REG_L a1, PT_A1(sp) @@ -269,7 +269,7 @@ resume_userspace: andi s1, s0, _TIF_WORK_MASK bnez s1, work_pending -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER call user_enter_callable #endif diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ba449c47effd..9232411a8821 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -71,7 +71,7 @@ config SPARC64 select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_TIF_NOHZ select HAVE_DEBUG_KMEMLEAK select IOMMU_HELPER diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index c5fd4b450d9b..eef102765a7e 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -15,7 +15,7 @@ #include #include -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER # define SCHEDULE_USER schedule_user #else # define SCHEDULE_USER schedule diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df6..b0a6dbbb760b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -186,8 +186,8 @@ config X86 select HAVE_ASM_MODVERSIONS select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL - select HAVE_CONTEXT_TRACKING if X86_64 - select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER if X86_64 + select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL select HAVE_BUILDTIME_MCOUNT_SORT diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 0b0f0172cced..7927fed7bc83 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -33,7 +33,7 @@ config XTENSA select HAVE_ARCH_KCSAN select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index d72bcafae90c..fb67d85116e4 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -455,7 +455,7 @@ KABI_W or a3, a3, a2 abi_call trace_hardirqs_off 1: #endif -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER l32i abi_tmp0, a1, PT_PS bbci.l abi_tmp0, PS_UM_BIT, 1f abi_call user_exit_callable @@ -544,7 +544,7 @@ common_exception_return: j .Lrestore_state .Lexit_tif_loop_user: -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER abi_call user_enter_callable #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 63259fece7c7..e35ae66b4794 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,7 +10,7 @@ #include -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER extern void ct_cpu_track_user(int cpu); /* Called with interrupts disabled. */ @@ -52,7 +52,7 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) || !context_tracking_enabled()) return 0; @@ -65,7 +65,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) && context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) ct_user_enter(prev_ctx); @@ -109,14 +109,14 @@ static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } -#endif /* !CONFIG_CONTEXT_TRACKING */ +#endif /* !CONFIG_CONTEXT_TRACKING_USER */ #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) -#ifdef CONFIG_CONTEXT_TRACKING_FORCE +#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE extern void context_tracking_init(void); #else static inline void context_tracking_init(void) { } -#endif /* CONFIG_CONTEXT_TRACKING_FORCE */ +#endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */ #endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index edc7b46376a6..2b46afe105a9 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,7 +22,7 @@ struct context_tracking { } state; }; -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); @@ -45,6 +45,6 @@ static inline bool context_tracking_enabled_this_cpu(void) static __always_inline bool context_tracking_enabled(void) { return false; } static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; } static __always_inline bool context_tracking_enabled_this_cpu(void) { return false; } -#endif /* CONFIG_CONTEXT_TRACKING */ +#endif /* CONFIG_CONTEXT_TRACKING_USER */ #endif diff --git a/init/Kconfig b/init/Kconfig index c7900e8975f1..06454d19e2f0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -494,11 +494,11 @@ config VIRT_CPU_ACCOUNTING_NATIVE config VIRT_CPU_ACCOUNTING_GEN bool "Full dynticks CPU time accounting" - depends on HAVE_CONTEXT_TRACKING + depends on HAVE_CONTEXT_TRACKING_USER depends on HAVE_VIRT_CPU_ACCOUNTING_GEN depends on GENERIC_CLOCKEVENTS select VIRT_CPU_ACCOUNTING - select CONTEXT_TRACKING + select CONTEXT_TRACKING_USER help Select this option to enable task and CPU time accounting on full dynticks systems. This accounting is implemented by watching every diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index d361bd52e4e1..f3dec1be2bf6 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -22,6 +22,8 @@ #include #include +#ifdef CONFIG_CONTEXT_TRACKING_USER + #define CREATE_TRACE_POINTS #include @@ -252,7 +254,7 @@ void __init ct_cpu_track_user(int cpu) initialized = true; } -#ifdef CONFIG_CONTEXT_TRACKING_FORCE +#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE void __init context_tracking_init(void) { int cpu; @@ -261,3 +263,5 @@ void __init context_tracking_init(void) ct_cpu_track_user(cpu); } #endif + +#endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index da0bf6fe9ecd..883167a57bf9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6559,7 +6559,7 @@ void __sched schedule_idle(void) } while (need_resched()); } -#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) +#if defined(CONFIG_CONTEXT_TRACKING_USER) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) asmlinkage __visible void __sched schedule_user(void) { /* diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 27b7868b5c30..41f99bcfe9e6 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -73,6 +73,9 @@ config TIME_KUNIT_TEST If unsure, say N. +config CONTEXT_TRACKING + bool + if GENERIC_CLOCKEVENTS menu "Timers subsystem" @@ -111,7 +114,7 @@ config NO_HZ_FULL # NO_HZ_COMMON dependency # We need at least one periodic CPU for timekeeping depends on SMP - depends on HAVE_CONTEXT_TRACKING + depends on HAVE_CONTEXT_TRACKING_USER # VIRT_CPU_ACCOUNTING_GEN dependency depends on HAVE_VIRT_CPU_ACCOUNTING_GEN select NO_HZ_COMMON @@ -137,31 +140,37 @@ config NO_HZ_FULL endchoice -config CONTEXT_TRACKING - bool +config CONTEXT_TRACKING_USER + bool + depends on HAVE_CONTEXT_TRACKING_USER + select CONTEXT_TRACKING + help + Track transitions between kernel and user on behalf of RCU and + tickless cputime accounting. The former case relies on context + tracking to enter/exit RCU extended quiescent states. -config CONTEXT_TRACKING_FORCE - bool "Force context tracking" - depends on CONTEXT_TRACKING +config CONTEXT_TRACKING_USER_FORCE + bool "Force user context tracking" + depends on CONTEXT_TRACKING_USER default y if !NO_HZ_FULL help The major pre-requirement for full dynticks to work is to - support the context tracking subsystem. But there are also + support the user context tracking subsystem. But there are also other dependencies to provide in order to make the full dynticks working. This option stands for testing when an arch implements the - context tracking backend but doesn't yet fulfill all the + user context tracking backend but doesn't yet fulfill all the requirements to make the full dynticks feature working. Without the full dynticks, there is no way to test the support - for context tracking and the subsystems that rely on it: RCU + for user context tracking and the subsystems that rely on it: RCU userspace extended quiescent state and tickless cputime accounting. This option copes with the absence of the full - dynticks subsystem by forcing the context tracking on all + dynticks subsystem by forcing the user context tracking on all CPUs in the system. Say Y only if you're working on the development of an - architecture backend for the context tracking. + architecture backend for the user context tracking. Say N otherwise, this option brings an overhead that you don't want in production. -- cgit v1.2.3 From e67198cc05b8ecbb7b8e2d8ef9fb5c8d26821873 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:25 +0200 Subject: context_tracking: Take idle eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Start with moving the idle extended quiescent states entrypoints to context tracking. For now those are dumb redirections to existing RCU calls. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- Documentation/RCU/stallwarn.rst | 4 ++-- arch/arm/mach-imx/cpuidle-imx6q.c | 5 +++-- drivers/acpi/processor_idle.c | 5 +++-- drivers/cpuidle/cpuidle.c | 9 +++++---- include/linux/context_tracking.h | 8 ++++++++ include/linux/rcupdate.h | 2 +- kernel/context_tracking.c | 15 +++++++++++++++ kernel/locking/lockdep.c | 2 +- kernel/rcu/Kconfig | 2 ++ kernel/rcu/tree.c | 2 -- kernel/rcu/update.c | 2 +- kernel/sched/idle.c | 10 +++++----- kernel/sched/sched.h | 1 + kernel/time/Kconfig | 6 ++++++ 14 files changed, 53 insertions(+), 20 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index 794837eb519b..b95bda7755fa 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -97,8 +97,8 @@ warnings: which will include additional debugging information. - A low-level kernel issue that either fails to invoke one of the - variants of rcu_user_enter(), rcu_user_exit(), rcu_idle_enter(), - rcu_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one + variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(), + ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one hand, or that invokes one of them too many times on the other. Historically, the most frequent issue has been an omission of either irq_enter() or irq_exit(), which in turn invoke diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 094337dc1bc7..d086cbae09c3 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include #include #include #include @@ -24,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev, imx6_set_lpm(WAIT_UNCLOCKED); raw_spin_unlock(&cpuidle_lock); - rcu_idle_enter(); + ct_idle_enter(); cpu_do_idle(); - rcu_idle_exit(); + ct_idle_exit(); raw_spin_lock(&cpuidle_lock); if (num_idle_cpus-- == num_online_cpus()) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 6a5572a1a80c..1401d193a2df 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -23,6 +23,7 @@ #include #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -647,11 +648,11 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv, raw_spin_unlock(&c3_lock); } - rcu_idle_enter(); + ct_idle_enter(); acpi_idle_do_entry(cx); - rcu_idle_exit(); + ct_idle_exit(); /* Re-enable bus master arbitration */ if (dis_bm) { diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index ef2ea1b12cd8..62dd956025f3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "cpuidle.h" @@ -150,12 +151,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, */ stop_critical_timings(); if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_enter(); + ct_idle_enter(); target_state->enter_s2idle(dev, drv, index); if (WARN_ON_ONCE(!irqs_disabled())) local_irq_disable(); if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_exit(); + ct_idle_exit(); tick_unfreeze(); start_critical_timings(); @@ -233,10 +234,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, stop_critical_timings(); if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_enter(); + ct_idle_enter(); entered_state = target_state->enter(dev, drv, index); if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_exit(); + ct_idle_exit(); start_critical_timings(); sched_clock_idle_wakeup_event(); diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e35ae66b4794..01abadb2f993 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -119,4 +119,12 @@ extern void context_tracking_init(void); static inline void context_tracking_init(void) { } #endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */ +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +extern void ct_idle_enter(void); +extern void ct_idle_exit(void); +#else +static inline void ct_idle_enter(void) { } +static inline void ct_idle_exit(void) { } +#endif /* !CONFIG_CONTEXT_TRACKING_IDLE */ + #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..6ebe754501c3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -128,7 +128,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, - * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU + * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index f3dec1be2bf6..c0b3798d4e94 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -22,6 +22,21 @@ #include #include + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +noinstr void ct_idle_enter(void) +{ + rcu_idle_enter(); +} +EXPORT_SYMBOL_GPL(ct_idle_enter); + +void ct_idle_exit(void) +{ + rcu_idle_exit(); +} +EXPORT_SYMBOL_GPL(ct_idle_exit); +#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ + #ifdef CONFIG_CONTEXT_TRACKING_USER #define CREATE_TRACE_POINTS diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f06b91ca6482..5ea690cb4f7a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6570,7 +6570,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) /* * If a CPU is in the RCU-free window in idle (ie: in the section - * between rcu_idle_enter() and rcu_idle_exit(), then RCU + * between ct_idle_enter() and ct_idle_exit(), then RCU * considers that CPU to be in an "extended quiescent state", * which means that RCU will be completely ignoring that CPU. * Therefore, rcu_read_lock() and friends have absolutely no diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 1c630e573548..3fa24e63d6f9 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -8,6 +8,8 @@ menu "RCU Subsystem" config TREE_RCU bool default y if SMP + # Dynticks-idle tracking + select CONTEXT_TRACKING_IDLE help This option selects the RCU implementation that is designed for very large SMP system with hundreds or diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9a5edab5558c..051fed0844b6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -664,7 +664,6 @@ void noinstr rcu_idle_enter(void) WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); rcu_eqs_enter(false); } -EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL @@ -904,7 +903,6 @@ void noinstr rcu_idle_exit(void) rcu_eqs_exit(false); raw_local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_exit); #ifdef CONFIG_NO_HZ_FULL /** diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index fc7fef575606..147214b2cd68 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -85,7 +85,7 @@ module_param(rcu_normal_after_boot, int, 0444); * and while lockdep is disabled. * * Note that if the CPU is in the idle loop from an RCU point of view (ie: - * that we are in the section between rcu_idle_enter() and rcu_idle_exit()) + * that we are in the section between ct_idle_enter() and ct_idle_exit()) * then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an * rcu_read_lock(). The reason for this is that RCU ignores CPUs that are * in such a section, considering these as in extended quiescent state, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 328cccbee444..f26ab2675f7d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -53,14 +53,14 @@ static noinline int __cpuidle cpu_idle_poll(void) { trace_cpu_idle(0, smp_processor_id()); stop_critical_timings(); - rcu_idle_enter(); + ct_idle_enter(); local_irq_enable(); while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) cpu_relax(); - rcu_idle_exit(); + ct_idle_exit(); start_critical_timings(); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); @@ -98,12 +98,12 @@ void __cpuidle default_idle_call(void) * * Trace IRQs enable here, then switch off RCU, and have * arch_cpu_idle() use raw_local_irq_enable(). Note that - * rcu_idle_enter() relies on lockdep IRQ state, so switch that + * ct_idle_enter() relies on lockdep IRQ state, so switch that * last -- this is very similar to the entry code. */ trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); - rcu_idle_enter(); + ct_idle_enter(); lockdep_hardirqs_on(_THIS_IP_); arch_cpu_idle(); @@ -116,7 +116,7 @@ void __cpuidle default_idle_call(void) */ raw_local_irq_disable(); lockdep_hardirqs_off(_THIS_IP_); - rcu_idle_exit(); + ct_idle_exit(); lockdep_hardirqs_on(_THIS_IP_); raw_local_irq_enable(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 47b89a0fc6e5..0cfe2d0af294 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 41f99bcfe9e6..a41753be1a2b 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -76,6 +76,12 @@ config TIME_KUNIT_TEST config CONTEXT_TRACKING bool +config CONTEXT_TRACKING_IDLE + bool + select CONTEXT_TRACKING + help + Tracks idle state on behalf of RCU. + if GENERIC_CLOCKEVENTS menu "Timers subsystem" -- cgit v1.2.3 From 6f0e6c1598b1a3d19fc30db86b6e26d6f881b43d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:26 +0200 Subject: context_tracking: Take IRQ eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Prepare with moving the IRQ extended quiescent states entrypoints to context tracking. For now those are dumb redirection to existing RCU calls. [ paulmck: Apply Stephen Rothwell feedback from -next. ] [ paulmck: Apply Nathan Chancellor feedback. ] Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- .../RCU/Design/Requirements/Requirements.rst | 10 ++++----- Documentation/RCU/stallwarn.rst | 4 ++-- arch/Kconfig | 2 +- arch/arm64/kernel/entry-common.c | 6 +++--- arch/x86/mm/fault.c | 2 +- drivers/cpuidle/cpuidle-psci.c | 8 ++++---- drivers/cpuidle/cpuidle-riscv-sbi.c | 8 ++++---- include/linux/context_tracking_irq.h | 17 +++++++++++++++ include/linux/context_tracking_state.h | 1 + include/linux/entry-common.h | 10 ++++----- include/linux/rcupdate.h | 5 +++-- include/linux/tracepoint.h | 4 ++-- kernel/cfi.c | 4 ++-- kernel/context_tracking.c | 24 ++++++++++++++++++++-- kernel/cpu_pm.c | 8 ++++---- kernel/entry/common.c | 12 +++++------ kernel/softirq.c | 4 ++-- kernel/trace/trace.c | 6 +++--- 18 files changed, 87 insertions(+), 48 deletions(-) create mode 100644 include/linux/context_tracking_irq.h (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index 04ed8bf27a0e..074810c73936 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -1844,10 +1844,10 @@ that meets this requirement. Furthermore, NMI handlers can be interrupted by what appear to RCU to be normal interrupts. One way that this can happen is for code that -directly invokes rcu_irq_enter() and rcu_irq_exit() to be called +directly invokes ct_irq_enter() and ct_irq_exit() to be called from an NMI handler. This astonishing fact of life prompted the current -code structure, which has rcu_irq_enter() invoking -rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit(). +code structure, which has ct_irq_enter() invoking +rcu_nmi_enter() and ct_irq_exit() invoking rcu_nmi_exit(). And yes, I also learned of this requirement the hard way. Loadable Modules @@ -2195,7 +2195,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be: sections, and RCU believes this CPU to be idle, no problem. This sort of thing is used by some architectures for light-weight exception handlers, which can then avoid the overhead of - rcu_irq_enter() and rcu_irq_exit() at exception entry and + ct_irq_enter() and ct_irq_exit() at exception entry and exit, respectively. Some go further and avoid the entireties of irq_enter() and irq_exit(). Just make very sure you are running some of your tests with @@ -2226,7 +2226,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be: +-----------------------------------------------------------------------+ | **Answer**: | +-----------------------------------------------------------------------+ -| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so | +| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so | | often. But given that long-running interrupt handlers can cause other | | problems, not least for response time, shouldn't you work to keep | | your interrupt handler's runtime within reasonable bounds? | diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index b95bda7755fa..ce1f58a9d954 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -98,11 +98,11 @@ warnings: - A low-level kernel issue that either fails to invoke one of the variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(), - ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one + ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one hand, or that invokes one of them too many times on the other. Historically, the most frequent issue has been an omission of either irq_enter() or irq_exit(), which in turn invoke - rcu_irq_enter() or rcu_irq_exit(), respectively. Building your + ct_irq_enter() or ct_irq_exit(), respectively. Building your kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types of issues, which sometimes arise in architecture-specific code. diff --git a/arch/Kconfig b/arch/Kconfig index 154b7b78da09..342642be105f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -782,7 +782,7 @@ config HAVE_CONTEXT_TRACKING_USER Syscalls need to be wrapped inside user_exit()-user_enter(), either optimized behind static key or through the slow path using TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs are already - protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal + protected inside ct_irq_enter/ct_irq_exit() but preemption or signal handling on irq exit still need to be protected. config HAVE_CONTEXT_TRACKING_USER_OFFSTACK diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 56cefd33eb8e..8dabe9ec10f1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -41,7 +41,7 @@ static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { lockdep_hardirqs_off(CALLER_ADDR0); - rcu_irq_enter(); + ct_irq_enter(); trace_hardirqs_off_finish(); regs->exit_rcu = true; @@ -76,7 +76,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) if (regs->exit_rcu) { trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); - rcu_irq_exit(); + ct_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; } @@ -84,7 +84,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) trace_hardirqs_on(); } else { if (regs->exit_rcu) - rcu_irq_exit(); + ct_irq_exit(); } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fad8faa29d04..971977c438fc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1526,7 +1526,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) /* * Entry handling for valid #PF from kernel mode is slightly - * different: RCU is already watching and rcu_irq_enter() must not + * different: RCU is already watching and ct_irq_enter() must not * be invoked because a kernel fault on a user space address might * sleep. * diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 540105ca0781..57bc3e3ae391 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -69,12 +69,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); state = psci_get_domain_state(); if (!state) @@ -82,12 +82,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, ret = psci_cpu_suspend_enter(state) ? -1 : idx; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); cpu_pm_exit(); diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 1151e5e2ba82..862a2876f1c9 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -116,12 +116,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); if (sbi_is_domain_state_available()) state = sbi_get_domain_state(); @@ -130,12 +130,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, ret = sbi_suspend(state) ? -1 : idx; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); cpu_pm_exit(); diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h new file mode 100644 index 000000000000..62f62bbd1a50 --- /dev/null +++ b/include/linux/context_tracking_irq.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H +#define _LINUX_CONTEXT_TRACKING_IRQ_H + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +void ct_irq_enter(void); +void ct_irq_exit(void); +void ct_irq_enter_irqson(void); +void ct_irq_exit_irqson(void); +#else +static inline void ct_irq_enter(void) { } +static inline void ct_irq_exit(void) { } +static inline void ct_irq_enter_irqson(void) { } +static inline void ct_irq_exit_irqson(void) { } +#endif + +#endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 2b46afe105a9..9c16a8b2c194 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -4,6 +4,7 @@ #include #include +#include struct context_tracking { /* diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index c92ac75d6556..84a466b176cf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs); /** * struct irqentry_state - Opaque object for exception state storage * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the - * exit path has to invoke rcu_irq_exit(). + * exit path has to invoke ct_irq_exit(). * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that * lockdep state is restored correctly on exit from nmi. * @@ -395,12 +395,12 @@ typedef struct irqentry_state { * * For kernel mode entries RCU handling is done conditional. If RCU is * watching then the only RCU requirement is to check whether the tick has - * to be restarted. If RCU is not watching then rcu_irq_enter() has to be - * invoked on entry and rcu_irq_exit() on exit. + * to be restarted. If RCU is not watching then ct_irq_enter() has to be + * invoked on entry and ct_irq_exit() on exit. * - * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * Avoiding the ct_irq_enter/exit() calls is an optimization but also * solves the problem of kernel mode pagefaults which can schedule, which - * is not possible after invoking rcu_irq_enter() without undoing it. + * is not possible after invoking ct_irq_enter() without undoing it. * * For user mode entries irqentry_enter_from_user_mode() is invoked to * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ebe754501c3..f1562d91c67d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,6 +29,7 @@ #include #include #include +#include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -143,9 +144,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ } while (0) /* diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f878..55717a2eda08 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ \ if (rcuidle) { \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ } \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ if (rcuidle) { \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ } \ \ diff --git a/kernel/cfi.c b/kernel/cfi.c index 08102d19ec15..2046276ee234 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -295,7 +295,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) rcu_idle = !rcu_is_watching(); if (rcu_idle) { local_irq_save(flags); - rcu_irq_enter(); + ct_irq_enter(); } if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) @@ -304,7 +304,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) fn = find_module_check_fn(ptr); if (rcu_idle) { - rcu_irq_exit(); + ct_irq_exit(); local_irq_restore(flags); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index c0b3798d4e94..72bd71a02c44 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -35,6 +35,26 @@ void ct_idle_exit(void) rcu_idle_exit(); } EXPORT_SYMBOL_GPL(ct_idle_exit); + +noinstr void ct_irq_enter(void) +{ + rcu_irq_enter(); +} + +noinstr void ct_irq_exit(void) +{ + rcu_irq_exit(); +} + +void ct_irq_enter_irqson(void) +{ + rcu_irq_enter_irqson(); +} + +void ct_irq_exit_irqson(void) +{ + rcu_irq_exit_irqson(); +} #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ #ifdef CONFIG_CONTEXT_TRACKING_USER @@ -90,7 +110,7 @@ void noinstr __ct_user_enter(enum ctx_state state) * At this stage, only low level arch entry code remains and * then we'll run in userspace. We can assume there won't be * any RCU read-side critical section until the next call to - * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency + * user_exit() or ct_irq_enter(). Let's remove RCU's dependency * on the tick. */ if (state == CONTEXT_USER) { @@ -136,7 +156,7 @@ void ct_user_enter(enum ctx_state state) /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit() * This would mess up the dyntick_nesting count though. And rcu_irq_*() * helpers are enough to protect RCU uses inside the exception. So * just return immediately if we detect we are in an IRQ. diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 246efc74e3f3..ba4ba71facf9 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -35,11 +35,11 @@ static int cpu_pm_notify(enum cpu_pm_event event) * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know * this. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); rcu_read_lock(); ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL); rcu_read_unlock(); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); return notifier_to_errno(ret); } @@ -49,11 +49,11 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev unsigned long flags; int ret; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL); raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); return notifier_to_errno(ret); } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 032f164abe7c..667ba5d581ff 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -321,7 +321,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) } /* - * If this entry hit the idle task invoke rcu_irq_enter() whether + * If this entry hit the idle task invoke ct_irq_enter() whether * RCU is watching or not. * * Interrupts can nest when the first interrupt invokes softirq @@ -332,12 +332,12 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * not nested into another interrupt. * * Checking for rcu_is_watching() here would prevent the nesting - * interrupt to invoke rcu_irq_enter(). If that nested interrupt is + * interrupt to invoke ct_irq_enter(). If that nested interrupt is * the tick then rcu_flavor_sched_clock_irq() would wrongfully * assume that it is the first interrupt and eventually claim * quiescent state and end grace periods prematurely. * - * Unconditionally invoke rcu_irq_enter() so RCU state stays + * Unconditionally invoke ct_irq_enter() so RCU state stays * consistent. * * TINY_RCU does not support EQS, so let the compiler eliminate @@ -350,7 +350,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * as in irqentry_enter_from_user_mode(). */ lockdep_hardirqs_off(CALLER_ADDR0); - rcu_irq_enter(); + ct_irq_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); instrumentation_end(); @@ -418,7 +418,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); - rcu_irq_exit(); + ct_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; } @@ -436,7 +436,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) * was not watching on entry. */ if (state.exit_rcu) - rcu_irq_exit(); + ct_irq_exit(); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index 9f0aef8aa9ff..c8a6913c067d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -620,7 +620,7 @@ void irq_enter_rcu(void) */ void irq_enter(void) { - rcu_irq_enter(); + ct_irq_enter(); irq_enter_rcu(); } @@ -672,7 +672,7 @@ void irq_exit_rcu(void) void irq_exit(void) { __irq_exit_rcu(); - rcu_irq_exit(); + ct_irq_exit(); /* must be last! */ lockdep_hardirq_exit(); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c95992e2c71..fe78a6818126 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3107,15 +3107,15 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, /* * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI - * triggered someplace critical, and rcu_irq_enter() should + * triggered someplace critical, and ct_irq_enter() should * not be called from NMI. */ if (unlikely(in_nmi())) return; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); } /** -- cgit v1.2.3 From 493c1822825f00025d6754ec0632990a27edc6f8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:27 +0200 Subject: context_tracking: Take NMI eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Prepare with moving the NMI extended quiescent states entrypoints to context tracking. For now those are dumb redirection to existing RCU calls. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- Documentation/RCU/Design/Requirements/Requirements.rst | 2 +- arch/Kconfig | 2 +- arch/arm64/kernel/entry-common.c | 8 ++++---- include/linux/context_tracking_irq.h | 4 ++++ include/linux/hardirq.h | 4 ++-- kernel/context_tracking.c | 10 ++++++++++ kernel/entry/common.c | 4 ++-- kernel/extable.c | 4 ++-- kernel/trace/trace.c | 2 +- 9 files changed, 27 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index 074810c73936..a0f8164c8513 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -1847,7 +1847,7 @@ normal interrupts. One way that this can happen is for code that directly invokes ct_irq_enter() and ct_irq_exit() to be called from an NMI handler. This astonishing fact of life prompted the current code structure, which has ct_irq_enter() invoking -rcu_nmi_enter() and ct_irq_exit() invoking rcu_nmi_exit(). +ct_nmi_enter() and ct_irq_exit() invoking ct_nmi_exit(). And yes, I also learned of this requirement the hard way. Loadable Modules diff --git a/arch/Kconfig b/arch/Kconfig index 342642be105f..f56f7c0e924d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -797,7 +797,7 @@ config HAVE_CONTEXT_TRACKING_USER_OFFSTACK - Critical entry code isn't preemptible (or better yet: not interruptible). - - No use of RCU read side critical sections, unless rcu_nmi_enter() + - No use of RCU read side critical sections, unless ct_nmi_enter() got called. - No use of instrumentation, unless instrumentation_begin() got called. diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 8dabe9ec10f1..c75ca36b4a49 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -161,7 +161,7 @@ static void noinstr arm64_enter_nmi(struct pt_regs *regs) __nmi_enter(); lockdep_hardirqs_off(CALLER_ADDR0); lockdep_hardirq_enter(); - rcu_nmi_enter(); + ct_nmi_enter(); trace_hardirqs_off_finish(); ftrace_nmi_enter(); @@ -182,7 +182,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs) lockdep_hardirqs_on_prepare(); } - rcu_nmi_exit(); + ct_nmi_exit(); lockdep_hardirq_exit(); if (restore) lockdep_hardirqs_on(CALLER_ADDR0); @@ -199,7 +199,7 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); lockdep_hardirqs_off(CALLER_ADDR0); - rcu_nmi_enter(); + ct_nmi_enter(); trace_hardirqs_off_finish(); } @@ -218,7 +218,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) lockdep_hardirqs_on_prepare(); } - rcu_nmi_exit(); + ct_nmi_exit(); if (restore) lockdep_hardirqs_on(CALLER_ADDR0); } diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h index 62f62bbd1a50..c50b5670c4a5 100644 --- a/include/linux/context_tracking_irq.h +++ b/include/linux/context_tracking_irq.h @@ -7,11 +7,15 @@ void ct_irq_enter(void); void ct_irq_exit(void); void ct_irq_enter_irqson(void); void ct_irq_exit_irqson(void); +void ct_nmi_enter(void); +void ct_nmi_exit(void); #else static inline void ct_irq_enter(void) { } static inline void ct_irq_exit(void) { } static inline void ct_irq_enter_irqson(void) { } static inline void ct_irq_exit_irqson(void) { } +static inline void ct_nmi_enter(void) { } +static inline void ct_nmi_exit(void) { } #endif #endif diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 76878b357ffa..345cdbe9c1b7 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -124,7 +124,7 @@ extern void rcu_nmi_exit(void); do { \ __nmi_enter(); \ lockdep_hardirq_enter(); \ - rcu_nmi_enter(); \ + ct_nmi_enter(); \ instrumentation_begin(); \ ftrace_nmi_enter(); \ instrumentation_end(); \ @@ -143,7 +143,7 @@ extern void rcu_nmi_exit(void); instrumentation_begin(); \ ftrace_nmi_exit(); \ instrumentation_end(); \ - rcu_nmi_exit(); \ + ct_nmi_exit(); \ lockdep_hardirq_exit(); \ __nmi_exit(); \ } while (0) diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 72bd71a02c44..b8a731f20778 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -55,6 +55,16 @@ void ct_irq_exit_irqson(void) { rcu_irq_exit_irqson(); } + +noinstr void ct_nmi_enter(void) +{ + rcu_nmi_enter(); +} + +noinstr void ct_nmi_exit(void) +{ + rcu_nmi_exit(); +} #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ #ifdef CONFIG_CONTEXT_TRACKING_USER diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 667ba5d581ff..063068a9ea9b 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -449,7 +449,7 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) __nmi_enter(); lockdep_hardirqs_off(CALLER_ADDR0); lockdep_hardirq_enter(); - rcu_nmi_enter(); + ct_nmi_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); @@ -469,7 +469,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) } instrumentation_end(); - rcu_nmi_exit(); + ct_nmi_exit(); lockdep_hardirq_exit(); if (irq_state.lockdep) lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/kernel/extable.c b/kernel/extable.c index bda5e9761541..71f482581cab 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -114,7 +114,7 @@ int kernel_text_address(unsigned long addr) /* Treat this like an NMI as it can happen anywhere */ if (no_rcu) - rcu_nmi_enter(); + ct_nmi_enter(); if (is_module_text_address(addr)) goto out; @@ -127,7 +127,7 @@ int kernel_text_address(unsigned long addr) ret = 0; out: if (no_rcu) - rcu_nmi_exit(); + ct_nmi_exit(); return ret; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fe78a6818126..5fc7f17f5ec7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3105,7 +3105,7 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, } /* - * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), + * When an NMI triggers, RCU is enabled via ct_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI * triggered someplace critical, and ct_irq_enter() should * not be called from NMI. -- cgit v1.2.3 From c33ef43a359001415032665dfcd433979c462b71 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:34 +0200 Subject: rcu/context-tracking: Remove unused and/or unecessary middle functions Some eqs functions are now only used internally by context tracking, so their public declarations can be removed. Also middle functions such as rcu_user_*() and rcu_idle_*() which now directly call to rcu_eqs_enter() and rcu_eqs_exit() can be wiped out as well. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- Documentation/RCU/stallwarn.rst | 2 +- include/linux/hardirq.h | 8 ---- include/linux/rcupdate.h | 8 ---- include/linux/rcutiny.h | 2 - include/linux/rcutree.h | 2 - kernel/context_tracking.c | 98 ++++++++++++----------------------------- 6 files changed, 28 insertions(+), 92 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index ce1f58a9d954..e38c587067fc 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -97,7 +97,7 @@ warnings: which will include additional debugging information. - A low-level kernel issue that either fails to invoke one of the - variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(), + variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(), ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one hand, or that invokes one of them too many times on the other. Historically, the most frequent issue has been an omission diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 345cdbe9c1b7..d57cab4d4c06 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -92,14 +92,6 @@ void irq_exit_rcu(void); #define arch_nmi_exit() do { } while (0) #endif -#ifdef CONFIG_TINY_RCU -static inline void rcu_nmi_enter(void) { } -static inline void rcu_nmi_exit(void) { } -#else -extern void rcu_nmi_enter(void); -extern void rcu_nmi_exit(void); -#endif - /* * NMI vs Tracing * -------------- diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3717cad983a6..434da1eb88cd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -104,14 +104,6 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#ifdef CONFIG_NO_HZ_FULL -void rcu_user_enter(void); -void rcu_user_exit(void); -#else -static inline void rcu_user_enter(void) { } -static inline void rcu_user_exit(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 591119413cf1..900ba35c3582 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -76,8 +76,6 @@ static inline int rcu_needs_cpu(void) static inline void rcu_virt_note_context_switch(int cpu) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } -static inline void rcu_idle_enter(void) { } -static inline void rcu_idle_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 24db1e41695c..9cca00ed9bc9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,8 +45,6 @@ unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); -void rcu_idle_enter(void); -void rcu_idle_exit(void); bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 072c4b6044b3..e485b6b01537 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -189,17 +189,17 @@ static void noinstr rcu_eqs_exit(bool user) } /** - * rcu_nmi_exit - inform RCU of exit from NMI context + * ct_nmi_exit - inform RCU of exit from NMI context * * If we are returning from the outermost NMI handler that interrupted an * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting * to let the RCU grace-period handling know that the CPU is back to * being RCU-idle. * - * If you add or remove a call to rcu_nmi_exit(), be sure to test + * If you add or remove a call to ct_nmi_exit(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_nmi_exit(void) +void noinstr ct_nmi_exit(void) { struct context_tracking *ct = this_cpu_ptr(&context_tracking); @@ -242,7 +242,7 @@ void noinstr rcu_nmi_exit(void) } /** - * rcu_nmi_enter - inform RCU of entry to NMI context + * ct_nmi_enter - inform RCU of entry to NMI context * * If the CPU was idle from RCU's viewpoint, update ct->dynticks and * ct->dynticks_nmi_nesting to let the RCU grace-period handling know @@ -250,10 +250,10 @@ void noinstr rcu_nmi_exit(void) * long as the nesting level does not overflow an int. (You will probably * run out of stack space first.) * - * If you add or remove a call to rcu_nmi_enter(), be sure to test + * If you add or remove a call to ct_nmi_enter(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_nmi_enter(void) +void noinstr ct_nmi_enter(void) { long incby = 2; struct context_tracking *ct = this_cpu_ptr(&context_tracking); @@ -302,32 +302,33 @@ void noinstr rcu_nmi_enter(void) } /** - * rcu_idle_enter - inform RCU that current CPU is entering idle + * ct_idle_enter - inform RCU that current CPU is entering idle * * Enter idle mode, in other words, -leave- the mode in which RCU * read-side critical sections can occur. (Though RCU read-side * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * If you add or remove a call to rcu_idle_enter(), be sure to test with + * If you add or remove a call to ct_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_idle_enter(void) +void noinstr ct_idle_enter(void) { WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); rcu_eqs_enter(false); } +EXPORT_SYMBOL_GPL(ct_idle_enter); /** - * rcu_idle_exit - inform RCU that current CPU is leaving idle + * ct_idle_exit - inform RCU that current CPU is leaving idle * * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * If you add or remove a call to rcu_idle_exit(), be sure to test with + * If you add or remove a call to ct_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_idle_exit(void) +void noinstr ct_idle_exit(void) { unsigned long flags; @@ -335,18 +336,6 @@ void noinstr rcu_idle_exit(void) rcu_eqs_exit(false); raw_local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_exit); - -noinstr void ct_idle_enter(void) -{ - rcu_idle_enter(); -} -EXPORT_SYMBOL_GPL(ct_idle_enter); - -void ct_idle_exit(void) -{ - rcu_idle_exit(); -} EXPORT_SYMBOL_GPL(ct_idle_exit); /** @@ -431,50 +420,11 @@ void ct_irq_exit_irqson(void) ct_irq_exit(); local_irq_restore(flags); } - -noinstr void ct_nmi_enter(void) -{ - rcu_nmi_enter(); -} - -noinstr void ct_nmi_exit(void) -{ - rcu_nmi_exit(); -} +#else +static __always_inline void rcu_eqs_enter(bool user) { } +static __always_inline void rcu_eqs_exit(bool user) { } #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ -#ifdef CONFIG_NO_HZ_FULL -/** - * rcu_user_enter - inform RCU that we are resuming userspace. - * - * Enter RCU idle mode right before resuming userspace. No use of RCU - * is permitted between this call and rcu_user_exit(). This way the - * CPU doesn't need to maintain the tick for RCU maintenance purposes - * when the CPU runs in userspace. - * - * If you add or remove a call to rcu_user_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_user_enter(void) -{ - rcu_eqs_enter(true); -} - -/** - * rcu_user_exit - inform RCU that we are exiting userspace. - * - * Exit RCU idle mode while entering the kernel because it can - * run a RCU read side critical section anytime. - * - * If you add or remove a call to rcu_user_exit(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void noinstr rcu_user_exit(void) -{ - rcu_eqs_exit(true); -} -#endif /* #ifdef CONFIG_NO_HZ_FULL */ - #ifdef CONFIG_CONTEXT_TRACKING_USER #define CREATE_TRACE_POINTS @@ -542,7 +492,13 @@ void noinstr __ct_user_enter(enum ctx_state state) * that will fire and reschedule once we resume in user/guest mode. */ rcu_irq_work_resched(); - rcu_user_enter(); + /* + * Enter RCU idle mode right before resuming userspace. No use of RCU + * is permitted between this call and rcu_eqs_exit(). This way the + * CPU doesn't need to maintain the tick for RCU maintenance purposes + * when the CPU runs in userspace. + */ + rcu_eqs_enter(true); } /* * Even if context tracking is disabled on this CPU, because it's outside @@ -579,7 +535,7 @@ void ct_user_enter(enum ctx_state state) /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: - * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit() + * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit() * This would mess up the dyntick_nesting count though. And rcu_irq_*() * helpers are enough to protect RCU uses inside the exception. So * just return immediately if we detect we are in an IRQ. @@ -631,10 +587,10 @@ void noinstr __ct_user_exit(enum ctx_state state) if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { /* - * We are going to run code that may use RCU. Inform - * RCU core about that (ie: we may need the tick again). + * Exit RCU idle mode while entering the kernel because it can + * run a RCU read side critical section anytime. */ - rcu_user_exit(); + rcu_eqs_exit(true); if (state == CONTEXT_USER) { instrumentation_begin(); vtime_user_exit(current); -- cgit v1.2.3 From b37a667c62421b34e96b05613457b9fb0ed66ea1 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 22 Apr 2022 17:52:47 +0000 Subject: rcu/nocb: Add an option to offload all CPUs on boot Systems built with CONFIG_RCU_NOCB_CPU=y but booted without either the rcu_nocbs= or rcu_nohz_full= kernel-boot parameters will not have callback offloading on any of the CPUs, nor can any of the CPUs be switched to enable callback offloading at runtime. Although this is intentional, it would be nice to have a way to offload all the CPUs without having to make random bootloaders specify either the rcu_nocbs= or the rcu_nohz_full= kernel-boot parameters. This commit therefore provides a new CONFIG_RCU_NOCB_CPU_DEFAULT_ALL Kconfig option that switches the default so as to offload callback processing on all of the CPUs. This default can still be overridden using the rcu_nocbs= and rcu_nohz_full= kernel-boot parameters. Reviewed-by: Kalesh Singh Reviewed-by: Uladzislau Rezki (In v4.1, fixed issues with CONFIG maze reported by kernel test robot). Reported-by: kernel test robot Signed-off-by: Joel Fernandes Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ kernel/rcu/Kconfig | 13 +++++++++++++ kernel/rcu/tree_nocb.h | 15 ++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2522b11e593f..34605c275294 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3659,6 +3659,9 @@ just as if they had also been called out in the rcu_nocbs= boot parameter. + Note that this argument takes precedence over + the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option. + noiotrap [SH] Disables trapped I/O port accesses. noirqdebug [X86-32] Disables the code which attempts to detect and @@ -4557,6 +4560,9 @@ no-callback mode from boot but the mode may be toggled at runtime via cpusets. + Note that this argument takes precedence over + the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option. + rcu_nocb_poll [KNL] Rather than requiring that offloaded CPUs (specified by rcu_nocbs= above) explicitly diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 1c630e573548..27aab870ae4c 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -262,6 +262,19 @@ config RCU_NOCB_CPU Say Y here if you need reduced OS jitter, despite added overhead. Say N here if you are unsure. +config RCU_NOCB_CPU_DEFAULT_ALL + bool "Offload RCU callback processing from all CPUs by default" + depends on RCU_NOCB_CPU + default n + help + Use this option to offload callback processing from all CPUs + by default, in the absence of the rcu_nocbs or nohz_full boot + parameter. This also avoids the need to use any boot parameters + to achieve the effect of offloading all CPUs on boot. + + Say Y here if you want offload all CPUs by default on boot. + Say N here if you are unsure. + config TASKS_TRACE_RCU_READ_MB bool "Tasks Trace RCU readers use memory barriers in user and idle" depends on RCU_EXPERT && TASKS_TRACE_RCU diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 4cf9a29bba79..60cc92cc6655 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1197,11 +1197,21 @@ void __init rcu_init_nohz(void) { int cpu; bool need_rcu_nocb_mask = false; + bool offload_all = false; struct rcu_data *rdp; +#if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) + if (!rcu_state.nocb_is_setup) { + need_rcu_nocb_mask = true; + offload_all = true; + } +#endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */ + #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) + if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) { need_rcu_nocb_mask = true; + offload_all = false; /* NO_HZ_FULL has its own mask. */ + } #endif /* #if defined(CONFIG_NO_HZ_FULL) */ if (need_rcu_nocb_mask) { @@ -1222,6 +1232,9 @@ void __init rcu_init_nohz(void) cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); #endif /* #if defined(CONFIG_NO_HZ_FULL) */ + if (offload_all) + cpumask_setall(rcu_nocb_mask); + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); cpumask_and(rcu_nocb_mask, cpu_possible_mask, -- cgit v1.2.3