aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c18
-rw-r--r--kernel/events/core.c57
-rw-r--r--kernel/events/internal.h16
-rw-r--r--kernel/events/ring_buffer.c20
-rw-r--r--kernel/fail_function.c5
-rw-r--r--kernel/futex.c5
-rw-r--r--kernel/locking/lockdep.c19
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/rcu/tree_stall.h22
-rw-r--r--kernel/reboot.c28
-rw-r--r--kernel/sched/debug.c12
-rw-r--r--kernel/sched/fair.c70
-rw-r--r--kernel/trace/bpf_trace.c12
-rw-r--r--kernel/watchdog.c4
15 files changed, 170 insertions, 123 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fb2943ea715d..ab2d6a02aee0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7884,9 +7884,11 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
+ const bool is_subprog = env->cur_state->frame[0]->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+ if (!is_subprog &&
+ (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
@@ -7906,6 +7908,16 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}
+ reg = cur_regs(env) + BPF_REG_0;
+ if (is_subprog) {
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+ reg_type_str[reg->type]);
+ return -EINVAL;
+ }
+ return 0;
+ }
+
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7959,7 +7971,6 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}
- reg = cur_regs(env) + BPF_REG_0;
if (reg->type != SCALAR_VALUE) {
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
reg_type_str[reg->type]);
@@ -9670,12 +9681,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_insn_aux_data *aux)
{
- u32 datasec_id, type, id = insn->imm;
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
+ u32 type, id = insn->imm;
+ s32 datasec_id;
u64 addr;
int i;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a29ab09e72d..dc568ca295bd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event,
event_sched_out(event, cpuctx, ctx);
perf_pmu_enable(ctx->pmu);
-
- if (group_event->attr.exclusive)
- cpuctx->exclusive = 0;
}
#define DETACH_GROUP 0x01UL
@@ -2583,11 +2580,8 @@ group_sched_in(struct perf_event *group_event,
pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
- if (event_sched_in(group_event, cpuctx, ctx)) {
- pmu->cancel_txn(pmu);
- perf_mux_hrtimer_restart(cpuctx);
- return -EAGAIN;
- }
+ if (event_sched_in(group_event, cpuctx, ctx))
+ goto error;
/*
* Schedule in siblings as one group (if any):
@@ -2616,10 +2610,8 @@ group_error:
}
event_sched_out(group_event, cpuctx, ctx);
+error:
pmu->cancel_txn(pmu);
-
- perf_mux_hrtimer_restart(cpuctx);
-
return -EAGAIN;
}
@@ -2645,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event,
* If this group is exclusive and there are already
* events on the CPU, it can't go on.
*/
- if (event->attr.exclusive && cpuctx->active_oncpu)
+ if (event->attr.exclusive && !list_empty(get_event_list(event)))
return 0;
/*
* Otherwise, try to add it if all previous groups were able
@@ -3679,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data)
*can_add_hw = 0;
ctx->rotate_necessary = 1;
+ perf_mux_hrtimer_restart(cpuctx);
}
return 0;
@@ -6374,14 +6367,13 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
static void perf_sample_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
} else if (!(current->flags & PF_KTHREAD)) {
- perf_get_regs_user(regs_user, regs, regs_user_copy);
+ perf_get_regs_user(regs_user, regs);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
@@ -7083,8 +7075,7 @@ void perf_prepare_sample(struct perf_event_header *header,
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
- perf_sample_regs_user(&data->regs_user, regs,
- &data->regs_user_copy);
+ perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
@@ -7186,6 +7177,7 @@ __perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
int (*output_begin)(struct perf_output_handle *,
+ struct perf_sample_data *,
struct perf_event *,
unsigned int))
{
@@ -7198,7 +7190,7 @@ __perf_event_output(struct perf_event *event,
perf_prepare_sample(&header, data, event, regs);
- err = output_begin(&handle, event, header.size);
+ err = output_begin(&handle, data, event, header.size);
if (err)
goto exit;
@@ -7264,7 +7256,7 @@ perf_event_read_event(struct perf_event *event,
int ret;
perf_event_header__init_id(&read_event.header, &sample, event);
- ret = perf_output_begin(&handle, event, read_event.header.size);
+ ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
if (ret)
return;
@@ -7533,7 +7525,7 @@ static void perf_event_task_output(struct perf_event *event,
perf_event_header__init_id(&task_event->event_id.header, &sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
task_event->event_id.header.size);
if (ret)
goto out;
@@ -7636,7 +7628,7 @@ static void perf_event_comm_output(struct perf_event *event,
return;
perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
comm_event->event_id.header.size);
if (ret)
@@ -7736,7 +7728,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
namespaces_event->event_id.header.size);
if (ret)
goto out;
@@ -7863,7 +7855,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data)
perf_event_header__init_id(&cgroup_event->event_id.header,
&sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
cgroup_event->event_id.header.size);
if (ret)
goto out;
@@ -7989,7 +7981,7 @@ static void perf_event_mmap_output(struct perf_event *event,
}
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
mmap_event->event_id.header.size);
if (ret)
goto out;
@@ -8299,7 +8291,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
int ret;
perf_event_header__init_id(&rec.header, &sample, event);
- ret = perf_output_begin(&handle, event, rec.header.size);
+ ret = perf_output_begin(&handle, &sample, event, rec.header.size);
if (ret)
return;
@@ -8333,7 +8325,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
perf_event_header__init_id(&lost_samples_event.header, &sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
lost_samples_event.header.size);
if (ret)
return;
@@ -8388,7 +8380,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data)
perf_event_header__init_id(&se->event_id.header, &sample, event);
- ret = perf_output_begin(&handle, event, se->event_id.header.size);
+ ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
if (ret)
return;
@@ -8463,7 +8455,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_event_header__init_id(&throttle_event.header, &sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
throttle_event.header.size);
if (ret)
return;
@@ -8506,7 +8498,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data)
perf_event_header__init_id(&ksymbol_event->event_id.header,
&sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, &sample, event,
ksymbol_event->event_id.header.size);
if (ret)
return;
@@ -8596,7 +8588,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
perf_event_header__init_id(&bpf_event->event_id.header,
&sample, event);
- ret = perf_output_begin(&handle, event,
+ ret = perf_output_begin(&handle, data, event,
bpf_event->event_id.header.size);
if (ret)
return;
@@ -8705,7 +8697,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data)
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
- ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
+ ret = perf_output_begin(&handle, &sample, event,
+ text_poke_event->event_id.header.size);
if (ret)
return;
@@ -8786,7 +8779,7 @@ static void perf_log_itrace_start(struct perf_event *event)
rec.tid = perf_event_tid(event, current);
perf_event_header__init_id(&rec.header, &sample, event);
- ret = perf_output_begin(&handle, event, rec.header.size);
+ ret = perf_output_begin(&handle, &sample, event, rec.header.size);
if (ret)
return;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index fcbf5616a441..228801e20788 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(int *recursion)
{
- int rctx;
-
- if (unlikely(in_nmi()))
- rctx = 3;
- else if (in_irq())
- rctx = 2;
- else if (in_softirq())
- rctx = 1;
- else
- rctx = 0;
+ unsigned int pc = preempt_count();
+ unsigned char rctx = 0;
+
+ rctx += !!(pc & (NMI_MASK));
+ rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+ rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
if (recursion[rctx])
return -1;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 192b8abc6330..ef91ae75ca56 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size,
bool backward)
{
@@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle,
handle->size = (1UL << page_shift) - offset;
if (unlikely(have_lost)) {
- struct perf_sample_data sample_data;
-
lost_event.header.size = sizeof(lost_event);
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.id = event->id;
lost_event.lost = local_xchg(&rb->lost, 0);
- perf_event_header__init_id(&lost_event.header,
- &sample_data, event);
+ /* XXX mostly redundant; @data is already fully initializes */
+ perf_event_header__init_id(&lost_event.header, data, event);
perf_output_put(handle, lost_event);
- perf_event__output_id_sample(event, handle, &sample_data);
+ perf_event__output_id_sample(event, handle, data);
}
return 0;
@@ -263,22 +262,25 @@ out:
}
int perf_output_begin_forward(struct perf_output_handle *handle,
- struct perf_event *event, unsigned int size)
+ struct perf_sample_data *data,
+ struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, false);
+ return __perf_output_begin(handle, data, event, size, false);
}
int perf_output_begin_backward(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, true);
+ return __perf_output_begin(handle, data, event, size, true);
}
int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size,
+ return __perf_output_begin(handle, data, event, size,
unlikely(is_write_backward(event)));
}
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
index 63b349168da7..b0b1ad93fa95 100644
--- a/kernel/fail_function.c
+++ b/kernel/fail_function.c
@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
if (copy_from_user(buf, buffer, count)) {
ret = -EFAULT;
- goto out;
+ goto out_free;
}
buf[count] = '\0';
sym = strstrip(buf);
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
ret = count;
}
out:
- kfree(buf);
mutex_unlock(&fei_lock);
+out_free:
+ kfree(buf);
return ret;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index ac328874f6e5..00259c7e288e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi_state *pi_state)
*/
if (pi_state->owner) {
struct task_struct *owner;
+ unsigned long flags;
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner;
if (owner) {
raw_spin_lock(&owner->pi_lock);
@@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache) {
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index b71ad8d9f1c9..d9fb9e19d2ed 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2765,7 +2765,9 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
* (Note that this has to be done separately, because the graph cannot
* detect such classes of deadlocks.)
*
- * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
+ * Returns: 0 on deadlock detected, 1 on OK, 2 if another lock with the same
+ * lock class is held but nest_lock is also held, i.e. we rely on the
+ * nest_lock to avoid the deadlock.
*/
static int
check_deadlock(struct task_struct *curr, struct held_lock *next)
@@ -2788,7 +2790,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
* lock class (i.e. read_lock(lock)+read_lock(lock)):
*/
if ((next->read == 2) && prev->read)
- return 2;
+ continue;
/*
* We're holding the nest_lock, which serializes this lock's
@@ -3593,15 +3595,12 @@ static int validate_chain(struct task_struct *curr,
if (!ret)
return 0;
/*
- * Mark recursive read, as we jump over it when
- * building dependencies (just like we jump over
- * trylock entries):
- */
- if (ret == 2)
- hlock->read = 2;
- /*
* Add dependency only if this lock is not the head
- * of the chain, and if it's not a secondary read-lock:
+ * of the chain, and if the new lock introduces no more
+ * lock dependency (because we already hold a lock with the
+ * same lock class) nor deadlock (because the nest_lock
+ * serializes nesting locks), see the comments for
+ * check_deadlock().
*/
if (!chain_head && ret != 2) {
if (!check_prevs_add(curr, hlock))
diff --git a/kernel/panic.c b/kernel/panic.c
index 396142ee43fd..332736a72a58 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -605,7 +605,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
panic("panic_on_warn set ...\n");
}
- dump_stack();
+ if (!regs)
+ dump_stack();
print_irqtrace_events(current);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a52f42f64b6..bd04b09b84b3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4077,7 +4077,6 @@ void rcu_cpu_starting(unsigned int cpu)
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}
-#ifdef CONFIG_HOTPLUG_CPU
/*
* The outgoing function has no further need of RCU, so remove it from
* the rcu_node tree's ->qsmaskinitnext bit masks.
@@ -4117,6 +4116,7 @@ void rcu_report_dead(unsigned int cpu)
rdp->cpu_started = false;
}
+#ifdef CONFIG_HOTPLUG_CPU
/*
* The outgoing CPU has just passed through the dying-idle state, and we
* are being invoked from the CPU that was IPIed to continue the offline
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 0fde39b8daab..ca21d28a0f98 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -249,13 +249,16 @@ static bool check_slow_task(struct task_struct *t, void *arg)
/*
* Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
+ * sections, printing out the tid of each of the first few of them.
*/
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
{
+ int i = 0;
int ndetected = 0;
struct rcu_stall_chk_rdr rscr;
struct task_struct *t;
+ struct task_struct *ts[8];
if (!rcu_preempt_blocked_readers_cgp(rnp))
return 0;
@@ -264,6 +267,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
t = list_entry(rnp->gp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+ get_task_struct(t);
+ ts[i++] = t;
+ if (i >= ARRAY_SIZE(ts))
+ break;
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ for (i--; i; i--) {
+ t = ts[i];
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
pr_cont(" P%d", t->pid);
else
@@ -273,6 +284,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
".q"[rscr.rs.b.need_qs],
".e"[rscr.rs.b.exp_hint],
".l"[rscr.on_blkd_list]);
+ put_task_struct(t);
ndetected++;
}
pr_cont("\n");
@@ -293,8 +305,9 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
* Because preemptible RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
{
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
@@ -472,7 +485,6 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
rcu_for_each_leaf_node(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for_each_leaf_node_possible_cpu(rnp, cpu)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
@@ -480,7 +492,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
ndetected++;
}
}
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
}
for_each_possible_cpu(cpu)
diff --git a/kernel/reboot.c b/kernel/reboot.c
index e7b78d5ae1ab..af6f23d8bea1 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -551,22 +551,22 @@ static int __init reboot_setup(char *str)
break;
case 's':
- {
- int rc;
-
- if (isdigit(*(str+1))) {
- rc = kstrtoint(str+1, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else if (str[1] == 'm' && str[2] == 'p' &&
- isdigit(*(str+3))) {
- rc = kstrtoint(str+3, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else
+ if (isdigit(*(str+1)))
+ reboot_cpu = simple_strtoul(str+1, NULL, 0);
+ else if (str[1] == 'm' && str[2] == 'p' &&
+ isdigit(*(str+3)))
+ reboot_cpu = simple_strtoul(str+3, NULL, 0);
+ else
*mode = REBOOT_SOFT;
+ if (reboot_cpu >= num_possible_cpus()) {
+ pr_err("Ignoring the CPU number in reboot= option. "
+ "CPU %d exceeds possible cpu number %d\n",
+ reboot_cpu, num_possible_cpus());
+ reboot_cpu = 0;
+ break;
+ }
break;
- }
+
case 'g':
*mode = REBOOT_GPIO;
break;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 0655524700d2..2357921580f9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
unsigned long flags = *(unsigned long *)table->data;
size_t data_size = 0;
size_t len = 0;
- char *tmp;
+ char *tmp, *buf;
int idx;
if (write)
@@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
return 0;
}
- tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
- if (!tmp)
+ buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
char *name = sd_flag_debug[idx].name;
- len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
+ len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
}
- tmp += *ppos;
+ tmp = buf + *ppos;
len -= *ppos;
if (len > *lenp)
@@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
*lenp = len;
*ppos += len;
- kfree(tmp);
+ kfree(buf);
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 290f9e38378c..8917d2d715ef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
static int
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
{
- unsigned long best_cap = 0;
+ unsigned long task_util, best_cap = 0;
int cpu, best_cpu = -1;
struct cpumask *cpus;
- sync_entity_load_avg(&p->se);
-
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+ task_util = uclamp_task_util(p);
+
for_each_cpu_wrap(cpu, cpus, target) {
unsigned long cpu_cap = capacity_of(cpu);
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
continue;
- if (task_fits_capacity(p, cpu_cap))
+ if (fits_capacity(task_util, cpu_cap))
return cpu;
if (cpu_cap > best_cap) {
@@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
return best_cpu;
}
+static inline bool asym_fits_capacity(int task_util, int cpu)
+{
+ if (static_branch_unlikely(&sched_asym_cpucapacity))
+ return fits_capacity(task_util, capacity_of(cpu));
+
+ return true;
+}
+
/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
+ unsigned long task_util;
int i, recent_used_cpu;
/*
- * For asymmetric CPU capacity systems, our domain of interest is
- * sd_asym_cpucapacity rather than sd_llc.
+ * On asymmetric system, update task utilization because we will check
+ * that the task fits with cpu's capacity.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
- sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
- /*
- * On an asymmetric CPU capacity system where an exclusive
- * cpuset defines a symmetric island (i.e. one unique
- * capacity_orig value through the cpuset), the key will be set
- * but the CPUs within that cpuset will not have a domain with
- * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
- * capacity path.
- */
- if (!sd)
- goto symmetric;
-
- i = select_idle_capacity(p, sd, target);
- return ((unsigned)i < nr_cpumask_bits) ? i : target;
+ sync_entity_load_avg(&p->se);
+ task_util = uclamp_task_util(p);
}
-symmetric:
- if (available_idle_cpu(target) || sched_idle_cpu(target))
+ if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
+ asym_fits_capacity(task_util, target))
return target;
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
if (prev != target && cpus_share_cache(prev, target) &&
- (available_idle_cpu(prev) || sched_idle_cpu(prev)))
+ (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
+ asym_fits_capacity(task_util, prev))
return prev;
/*
@@ -6258,7 +6256,8 @@ symmetric:
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
- cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
+ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
+ asym_fits_capacity(task_util, recent_used_cpu)) {
/*
* Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake:
@@ -6267,6 +6266,26 @@ symmetric:
return recent_used_cpu;
}
+ /*
+ * For asymmetric CPU capacity systems, our domain of interest is
+ * sd_asym_cpucapacity rather than sd_llc.
+ */
+ if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
+ /*
+ * On an asymmetric CPU capacity system where an exclusive
+ * cpuset defines a symmetric island (i.e. one unique
+ * capacity_orig value through the cpuset), the key will be set
+ * but the CPUs within that cpuset will not have a domain with
+ * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
+ * capacity path.
+ */
+ if (sd) {
+ i = select_idle_capacity(p, sd, target);
+ return ((unsigned)i < nr_cpumask_bits) ? i : target;
+ }
+ }
+
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
@@ -9031,7 +9050,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* emptying busiest.
*/
if (local->group_type == group_has_spare) {
- if (busiest->group_type > group_fully_busy) {
+ if ((busiest->group_type > group_fully_busy) &&
+ !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
/*
* If busiest is overloaded, try to fill spare
* capacity. This might end up creating spare capacity
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 02986c7b90eb..a18fee10e906 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -184,6 +184,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
{
int ret;
+ /*
+ * NB: We rely on strncpy_from_user() not copying junk past the NUL
+ * terminator into `dst`.
+ *
+ * strncpy_from_user() does long-sized strides in the fast path. If the
+ * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
+ * then there could be junk after the NUL in `dst`. If user takes `dst`
+ * and keys a hash map with it, then semantically identical strings can
+ * occupy multiple entries in the map.
+ */
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
@@ -1219,7 +1229,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
*btf = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(*btf))
- return PTR_ERR(*btf);
+ return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
if (ptr->type_id > 0)
*btf_id = ptr->type_id;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5abb5b22ad13..71109065bd8e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -44,8 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly nmi_watchdog_available;
-static struct cpumask watchdog_allowed_mask __read_mostly;
-
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -162,6 +160,8 @@ static void lockup_detector_update_enable(void)
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif
+static struct cpumask watchdog_allowed_mask __read_mostly;
+
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;