diff options
author | Paul E. McKenney | 2021-11-29 11:46:33 -0800 |
---|---|---|
committer | Paul E. McKenney | 2021-12-09 10:52:11 -0800 |
commit | 2cee0789b458afa384c422b5969c1a338891fd33 (patch) | |
tree | dea77aeb194de1e612e71dd7e64ab7218abbeca7 /kernel/rcu | |
parent | ab97152f88a4d580b89f0b7cc3028ffac438216f (diff) |
rcu-tasks: Use separate ->percpu_dequeue_lim for callback dequeueing
Decreasing the number of callback queues is a bit tricky because it
is necessary to handle callbacks that were queued before the number of
queues decreased, but which were not ready to invoke until afterwards.
This commit takes a first step in this direction by maintaining a separate
->percpu_dequeue_lim to control callback dequeueing, in addition to the
existing ->percpu_enqueue_lim which now controls only enqueueing.
Reported-by: Martin Lau <kafai@fb.com>
Cc: Neeraj Upadhyay <neeraj.iitr10@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/tasks.h | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d37ab69b9db8..b4a2cab6985a 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -66,7 +66,8 @@ struct rcu_tasks_percpu { * @call_func: This flavor's call_rcu()-equivalent function. * @rtpcpu: This flavor's rcu_tasks_percpu structure. * @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks. - * @percpu_enqueue_lim: Number of per-CPU callback queues in use. + * @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing. + * @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing. * @barrier_q_mutex: Serialize barrier operations. * @barrier_q_count: Number of queues being waited on. * @barrier_q_completion: Barrier wait/wakeup mechanism. @@ -96,6 +97,7 @@ struct rcu_tasks { struct rcu_tasks_percpu __percpu *rtpcpu; int percpu_enqueue_shift; int percpu_enqueue_lim; + int percpu_dequeue_lim; struct mutex barrier_q_mutex; atomic_t barrier_q_count; struct completion barrier_q_completion; @@ -121,6 +123,7 @@ static struct rcu_tasks rt_name = \ .name = n, \ .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \ .percpu_enqueue_lim = 1, \ + .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ .barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT, \ .kname = #rt_name, \ @@ -223,6 +226,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp) if (lim > nr_cpu_ids) lim = nr_cpu_ids; WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); + WRITE_ONCE(rtp->percpu_dequeue_lim, lim); smp_store_release(&rtp->percpu_enqueue_lim, lim); for_each_possible_cpu(cpu) { struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); @@ -290,6 +294,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); } @@ -342,7 +347,7 @@ static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp) init_completion(&rtp->barrier_q_completion); atomic_set(&rtp->barrier_q_count, 2); for_each_possible_cpu(cpu) { - if (cpu >= smp_load_acquire(&rtp->percpu_enqueue_lim)) + if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim)) break; rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb; @@ -366,7 +371,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) unsigned long flags; int needgpcb = 0; - for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_enqueue_lim); cpu++) { + for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) { struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); /* Advance and accelerate any new callbacks. */ @@ -397,11 +402,11 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu cpu = rtpcp->cpu; cpunext = cpu * 2 + 1; - if (cpunext < smp_load_acquire(&rtp->percpu_enqueue_lim)) { + if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); cpunext++; - if (cpunext < smp_load_acquire(&rtp->percpu_enqueue_lim)) { + if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); } |