diff options
author | Linus Torvalds | 2023-10-30 13:12:15 -1000 |
---|---|---|
committer | Linus Torvalds | 2023-10-30 13:12:15 -1000 |
commit | 63ce50fff9240d66cf3b59663f458f55ba6dcfcc (patch) | |
tree | c2613289116d6f80a50f534e9c5e4afc26064574 /arch/x86 | |
parent | 3cf3fabccb9dc821ffaec3ad6bf0cd6b278bd012 (diff) | |
parent | 984ffb6a4366752c949f7b39640aecdce222607f (diff) |
Merge tag 'sched-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"Fair scheduler (SCHED_OTHER) improvements:
- Remove the old and now unused SIS_PROP code & option
- Scan cluster before LLC in the wake-up path
- Use candidate prev/recent_used CPU if scanning failed for cluster
wakeup
NUMA scheduling improvements:
- Improve the VMA access-PID code to better skip/scan VMAs
- Extend tracing to cover VMA-skipping decisions
- Improve/fix the recently introduced sched_numa_find_nth_cpu() code
- Generalize numa_map_to_online_node()
Energy scheduling improvements:
- Remove the EM_MAX_COMPLEXITY limit
- Add tracepoints to track energy computation
- Make the behavior of the 'sched_energy_aware' sysctl more
consistent
- Consolidate and clean up access to a CPU's max compute capacity
- Fix uclamp code corner cases
RT scheduling improvements:
- Drive dl_rq->overloaded with dl_rq->pushable_dl_tasks updates
- Drive the ->rto_mask with rt_rq->pushable_tasks updates
Scheduler scalability improvements:
- Rate-limit updates to tg->load_avg
- On x86 disable IBRS when CPU is offline to improve single-threaded
performance
- Micro-optimize in_task() and in_interrupt()
- Micro-optimize the PSI code
- Avoid updating PSI triggers and ->rtpoll_total when there are no
state changes
Core scheduler infrastructure improvements:
- Use saved_state to reduce some spurious freezer wakeups
- Bring in a handful of fast-headers improvements to scheduler
headers
- Make the scheduler UAPI headers more widely usable by user-space
- Simplify the control flow of scheduler syscalls by using lock
guards
- Fix sched_setaffinity() vs. CPU hotplug race
Scheduler debuggability improvements:
- Disallow writing invalid values to sched_rt_period_us
- Fix a race in the rq-clock debugging code triggering warnings
- Fix a warning in the bandwidth distribution code
- Micro-optimize in_atomic_preempt_off() checks
- Enforce that the tasklist_lock is held in for_each_thread()
- Print the TGID in sched_show_task()
- Remove the /proc/sys/kernel/sched_child_runs_first sysctl
... and misc cleanups & fixes"
* tag 'sched-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits)
sched/fair: Remove SIS_PROP
sched/fair: Use candidate prev/recent_used CPU if scanning failed for cluster wakeup
sched/fair: Scan cluster before scanning LLC in wake-up path
sched: Add cpus_share_resources API
sched/core: Fix RQCF_ACT_SKIP leak
sched/fair: Remove unused 'curr' argument from pick_next_entity()
sched/nohz: Update comments about NEWILB_KICK
sched/fair: Remove duplicate #include
sched/psi: Update poll => rtpoll in relevant comments
sched: Make PELT acronym definition searchable
sched: Fix stop_one_cpu_nowait() vs hotplug
sched/psi: Bail out early from irq time accounting
sched/topology: Rename 'DIE' domain to 'PKG'
sched/psi: Delete the 'update_total' function parameter from update_triggers()
sched/psi: Avoid updating PSI triggers and ->rtpoll_total when there are no state changes
sched/headers: Remove comment referring to rq::cpu_load, since this has been removed
sched/numa: Complete scanning of inactive VMAs when there is no alternative
sched/numa: Complete scanning of partial VMAs regardless of PID activity
sched/numa: Move up the access pid reset logic
sched/numa: Trace decisions related to skipping VMAs
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/spec-ctrl.h | 11 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 12 |
2 files changed, 21 insertions, 2 deletions
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index cb0386fc4dc3..c648502e4535 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -4,6 +4,7 @@ #include <linux/thread_info.h> #include <asm/nospec-branch.h> +#include <asm/msr.h> /* * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR @@ -76,6 +77,16 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } +/* + * This can be used in noinstr functions & should only be called in bare + * metal context. + */ +static __always_inline void __update_spec_ctrl(u64 val) +{ + __this_cpu_write(x86_spec_ctrl_current, val); + native_wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + #ifdef CONFIG_SMP extern void speculative_store_bypass_ht_init(void); #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a187c0cbd5b..f7bcd42edaeb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -87,6 +87,7 @@ #include <asm/hw_irq.h> #include <asm/stackprotector.h> #include <asm/sev.h> +#include <asm/spec-ctrl.h> /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -640,13 +641,13 @@ static void __init build_sched_topology(void) }; #endif /* - * When there is NUMA topology inside the package skip the DIE domain + * When there is NUMA topology inside the package skip the PKG domain * since the NUMA domains will auto-magically create the right spanning * domains based on the SLIT. */ if (!x86_has_numa_in_package) { x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE) + cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(PKG) }; } @@ -1596,8 +1597,15 @@ void __noreturn hlt_play_dead(void) native_halt(); } +/* + * native_play_dead() is essentially a __noreturn function, but it can't + * be marked as such as the compiler may complain about it. + */ void native_play_dead(void) { + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + __update_spec_ctrl(0); + play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); |