diff options
author | Jakub Kicinski | 2024-04-11 14:20:04 -0700 |
---|---|---|
committer | Jakub Kicinski | 2024-04-11 14:23:47 -0700 |
commit | 94426ed2137a948d212302df571445d4245772b9 (patch) | |
tree | 2bf20010e453ba302a818ed70271e073078bc4ba /kernel | |
parent | a55b39e858901986408391c574f414ef889f1c53 (diff) | |
parent | 2ae9a8972ce04046957f8af214509cebfd3bfb9c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
net/unix/garbage.c
47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()")
4090fa373f0e ("af_unix: Replace garbage collection algorithm.")
Adjacent changes:
drivers/net/ethernet/broadcom/bnxt/bnxt.c
faa12ca24558 ("bnxt_en: Reset PTP tx_avail after possible firmware reset")
b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()")
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
7ac10c7d728d ("bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()")
194fad5b2781 ("bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions")
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
958f56e48385 ("net/mlx5e: Un-expose functions in en.h")
49e6c9387051 ("net/mlx5e: RSS, Block XOR hash with over 128 channels")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/kprobes.c | 18 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 18 | ||||
-rw-r--r-- | kernel/time/tick-sched.h | 2 | ||||
-rw-r--r-- | kernel/time/timer.c | 22 | ||||
-rw-r--r-- | kernel/time/timer_migration.c | 32 |
5 files changed, 70 insertions, 22 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9d9095e81792..65adc815fc6e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 269e21590df5..1331216a9cae 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -697,6 +697,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu) /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted + * @now: current ktime_t * * Called from interrupt entry when the CPU was idle * @@ -794,7 +795,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { @@ -820,7 +821,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu */ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { @@ -1287,6 +1288,8 @@ void tick_nohz_irq_exit(void) /** * tick_nohz_idle_got_tick - Check whether or not the tick handler has run + * + * Return: %true if the tick handler has run, otherwise %false */ bool tick_nohz_idle_got_tick(void) { @@ -1305,6 +1308,8 @@ bool tick_nohz_idle_got_tick(void) * stopped, it returns the next hrtimer. * * Called from power state control code with interrupts disabled + * + * Return: the next expiration time */ ktime_t tick_nohz_get_next_hrtimer(void) { @@ -1320,6 +1325,8 @@ ktime_t tick_nohz_get_next_hrtimer(void) * The return value of this function and/or the value returned by it through the * @delta_next pointer can be negative which must be taken into account by its * callers. + * + * Return: the expected length of the current sleep */ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { @@ -1357,8 +1364,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) /** * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value * for a particular CPU. + * @cpu: target CPU number * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for @cpu */ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) { @@ -1371,6 +1381,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) * tick_nohz_get_idle_calls - return the current idle calls counter value * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for the current CPU */ unsigned long tick_nohz_get_idle_calls(void) { @@ -1559,7 +1571,7 @@ early_param("skew_tick", skew_tick); /** * tick_setup_sched_timer - setup the tick emulation timer - * @mode: tick_nohz_mode to setup for + * @hrtimer: whether to use the hrtimer or not */ void tick_setup_sched_timer(bool hrtimer) { diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index e11c4dc65bcb..b4a7822f495d 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -46,8 +46,8 @@ struct tick_device { * @next_tick: Next tick to be fired when in dynticks mode. * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_waketime: Time when the idle was interrupted + * @idle_sleeptime_seq: sequence counter for data consistency * @idle_entrytime: Time when the idle call was entered - * @nohz_mode: Mode - one state of tick_nohz_mode * @last_jiffies: Base jiffies snapshot when next event was last computed * @timer_expires_base: Base time clock monotonic for @timer_expires * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index dee29f1f5b75..3baf2fbe6848 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64); /* * The timer wheel has LVL_DEPTH array levels. Each level provides an array of - * LVL_SIZE buckets. Each level is driven by its own clock and therefor each + * LVL_SIZE buckets. Each level is driven by its own clock and therefore each * level has a different granularity. * - * The level granularity is: LVL_CLK_DIV ^ lvl + * The level granularity is: LVL_CLK_DIV ^ level * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) * * The array level of a newly armed timer depends on the relative expiry * time. The farther the expiry time is away the higher the array level and - * therefor the granularity becomes. + * therefore the granularity becomes. * * Contrary to the original timer wheel implementation, which aims for 'exact' * expiry of the timers, this implementation removes the need for recascading @@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64); * struct timer_base - Per CPU timer base (number of base depends on config) * @lock: Lock protecting the timer_base * @running_timer: When expiring timers, the lock is dropped. To make - * sure not to race agains deleting/modifying a + * sure not to race against deleting/modifying a * currently running timer, the pointer is set to the * timer, which expires at the moment. If no timer is * running, the pointer is NULL. @@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr) } /* - * fixup_init is called when: + * timer_fixup_init is called when: * - an active object is initialized */ static bool timer_fixup_init(void *addr, enum debug_obj_state state) @@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused) } /* - * fixup_activate is called when: + * timer_fixup_activate is called when: * - an active object is activated * - an unknown non-static object is activated */ @@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) } /* - * fixup_free is called when: + * timer_fixup_free is called when: * - an active object is freed */ static bool timer_fixup_free(void *addr, enum debug_obj_state state) @@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state) } /* - * fixup_assert_init is called when: + * timer_fixup_assert_init is called when: * - an untracked/uninit-ed object is found */ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) @@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer, * @key: lockdep class key of the fake lock used for tracking timer * sync lock dependencies * - * init_timer_key() must be done to a timer prior calling *any* of the + * init_timer_key() must be done to a timer prior to calling *any* of the * other timer functions. */ void init_timer_key(struct timer_list *timer, @@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown) * If @shutdown is set then the lock has to be taken whether the * timer is pending or not to protect against a concurrent rearm * which might hit between the lockless pending check and the lock - * aquisition. By taking the lock it is ensured that such a newly + * acquisition. By taking the lock it is ensured that such a newly * enqueued timer is dequeued and cannot end up with * timer->function == NULL in the expiry code. * @@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, /* * When timer base is not set idle, undo the effect of - * tmigr_cpu_deactivate() to prevent inconsitent states - active + * tmigr_cpu_deactivate() to prevent inconsistent states - active * timer base but inactive timer migration hierarchy. * * When timer base was already marked idle, nothing will be diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index c63a0afdcebe..ccba875d2234 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, first_childevt = evt = data->evt; + /* + * Walking the hierarchy is required in any case when a + * remote expiry was done before. This ensures to not lose + * already queued events in non active groups (see section + * "Required event and timerqueue update after a remote + * expiry" in the documentation at the top). + * + * The two call sites which are executed without a remote expiry + * before, are not prevented from propagating changes through + * the hierarchy by the return: + * - When entering this path by tmigr_new_timer(), @evt->ignore + * is never set. + * - tmigr_inactive_up() takes care of the propagation by + * itself and ignores the return value. But an immediate + * return is possible if there is a parent, sparing group + * locking at this level, because the upper walking call to + * the parent will take care about removing this event from + * within the group and update next_expiry accordingly. + * + * However if there is no parent, ie: the hierarchy has only a + * single level so @group is the top level group, make sure the + * first event information of the group is updated properly and + * also handled properly, so skip this fast return path. + */ + if (evt->ignore && !remote && group->parent) + return true; + raw_spin_lock(&group->lock); childstate.state = 0; @@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, * queue when the expiry time changed only or when it could be ignored. */ if (timerqueue_node_queued(&evt->nextevt)) { - if ((evt->nextevt.expires == nextexp) && !evt->ignore) + if ((evt->nextevt.expires == nextexp) && !evt->ignore) { + /* Make sure not to miss a new CPU event with the same expiry */ + evt->cpu = first_childevt->cpu; goto check_toplvl; + } if (!timerqueue_del(&group->events, &evt->nextevt)) WRITE_ONCE(group->next_expiry, KTIME_MAX); |