From bd7bdd43dcb81bb08240b9401b36a104f77dc135 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 12 Jul 2012 14:46:37 -0700 Subject: workqueue: factor out worker_pool from global_cwq Move worklist and all worker management fields from global_cwq into the new struct worker_pool. worker_pool points back to the containing gcwq. worker and cpu_workqueue_struct are updated to point to worker_pool instead of gcwq too. This change is mechanical and doesn't introduce any functional difference other than rearranging of fields and an added level of indirection in some places. This is to prepare for multiple pools per gcwq. v2: Comment typo fixes as suggested by Namhyung. Signed-off-by: Tejun Heo Cc: Namhyung Kim --- include/trace/events/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 4018f5058f27..f28d1b65f178 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -54,7 +54,7 @@ TRACE_EVENT(workqueue_queue_work, __entry->function = work->func; __entry->workqueue = cwq->wq; __entry->req_cpu = req_cpu; - __entry->cpu = cwq->gcwq->cpu; + __entry->cpu = cwq->pool->gcwq->cpu; ), TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", -- cgit v1.2.3 From 6575820221f7a4dd6eadecf7bf83cdd154335eda Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jul 2012 12:39:26 -0700 Subject: workqueue: perform cpu down operations from low priority cpu_notifier() Currently, all workqueue cpu hotplug operations run off CPU_PRI_WORKQUEUE which is higher than normal notifiers. This is to ensure that workqueue is up and running while bringing up a CPU before other notifiers try to use workqueue on the CPU. Per-cpu workqueues are supposed to remain working and bound to the CPU for normal CPU_DOWN_PREPARE notifiers. This holds mostly true even with workqueue offlining running with higher priority because workqueue CPU_DOWN_PREPARE only creates a bound trustee thread which runs the per-cpu workqueue without concurrency management without explicitly detaching the existing workers. However, if the trustee needs to create new workers, it creates unbound workers which may wander off to other CPUs while CPU_DOWN_PREPARE notifiers are in progress. Furthermore, if the CPU down is cancelled, the per-CPU workqueue may end up with workers which aren't bound to the CPU. While reliably reproducible with a convoluted artificial test-case involving scheduling and flushing CPU burning work items from CPU down notifiers, this isn't very likely to happen in the wild, and, even when it happens, the effects are likely to be hidden by the following successful CPU down. Fix it by using different priorities for up and down notifiers - high priority for up operations and low priority for down operations. Workqueue cpu hotplug operations will soon go through further cleanup. Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Acked-by: "Rafael J. Wysocki" --- include/linux/cpu.h | 5 +++-- kernel/workqueue.c | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2e9b9ebbeb78..ce7a074f2519 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -73,8 +73,9 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, - /* prepare workqueues for other notifiers */ - CPU_PRI_WORKQUEUE = 5, + /* bring up workqueues before normal notifiers and down after */ + CPU_PRI_WORKQUEUE_UP = 5, + CPU_PRI_WORKQUEUE_DOWN = -5, }; #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4fa9e3552f1e..f59b7fd26e26 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3644,6 +3644,41 @@ err_destroy: return NOTIFY_BAD; } +/* + * Workqueues should be brought up before normal priority CPU notifiers. + * This will be registered high priority CPU notifier. + */ +static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + case CPU_UP_CANCELED: + case CPU_DOWN_FAILED: + case CPU_ONLINE: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + +/* + * Workqueues should be brought down after normal priority CPU notifiers. + * This will be registered as low priority CPU notifier. + */ +static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + case CPU_DYING: + case CPU_POST_DEAD: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + #ifdef CONFIG_SMP struct work_for_cpu { @@ -3839,7 +3874,8 @@ static int __init init_workqueues(void) unsigned int cpu; int i; - cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); + cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); + cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { -- cgit v1.2.3 From 46f3d976213452350f9d10b0c2780c2681f7075b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 19 Jul 2012 13:52:53 -0700 Subject: kthread_worker: reimplement flush_kthread_work() to allow freeing the work item being executed kthread_worker provides minimalistic workqueue-like interface for users which need a dedicated worker thread (e.g. for realtime priority). It has basic queue, flush_work, flush_worker operations which mostly match the workqueue counterparts; however, due to the way flush_work() is implemented, it has a noticeable difference of not allowing work items to be freed while being executed. While the current users of kthread_worker are okay with the current behavior, the restriction does impede some valid use cases. Also, removing this difference isn't difficult and actually makes the code easier to understand. This patch reimplements flush_kthread_work() such that it uses a flush_work item instead of queue/done sequence numbers. Signed-off-by: Tejun Heo --- include/linux/kthread.h | 8 ++------ kernel/kthread.c | 48 +++++++++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0714b24c0e45..22ccf9dee177 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -49,8 +49,6 @@ extern int tsk_fork_get_node(struct task_struct *tsk); * can be queued and flushed using queue/flush_kthread_work() * respectively. Queued kthread_works are processed by a kthread * running kthread_worker_fn(). - * - * A kthread_work can't be freed while it is executing. */ struct kthread_work; typedef void (*kthread_work_func_t)(struct kthread_work *work); @@ -59,15 +57,14 @@ struct kthread_worker { spinlock_t lock; struct list_head work_list; struct task_struct *task; + struct kthread_work *current_work; }; struct kthread_work { struct list_head node; kthread_work_func_t func; wait_queue_head_t done; - atomic_t flushing; - int queue_seq; - int done_seq; + struct kthread_worker *worker; }; #define KTHREAD_WORKER_INIT(worker) { \ @@ -79,7 +76,6 @@ struct kthread_work { .node = LIST_HEAD_INIT((work).node), \ .func = (fn), \ .done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done), \ - .flushing = ATOMIC_INIT(0), \ } #define DEFINE_KTHREAD_WORKER(worker) \ diff --git a/kernel/kthread.c b/kernel/kthread.c index 4bfbff36d447..b579af57ea10 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -360,16 +360,12 @@ repeat: struct kthread_work, node); list_del_init(&work->node); } + worker->current_work = work; spin_unlock_irq(&worker->lock); if (work) { __set_current_state(TASK_RUNNING); work->func(work); - smp_wmb(); /* wmb worker-b0 paired with flush-b1 */ - work->done_seq = work->queue_seq; - smp_mb(); /* mb worker-b1 paired with flush-b0 */ - if (atomic_read(&work->flushing)) - wake_up_all(&work->done); } else if (!freezing(current)) schedule(); @@ -386,7 +382,7 @@ static void insert_kthread_work(struct kthread_worker *worker, lockdep_assert_held(&worker->lock); list_add_tail(&work->node, pos); - work->queue_seq++; + work->worker = worker; if (likely(worker->task)) wake_up_process(worker->task); } @@ -436,25 +432,35 @@ static void kthread_flush_work_fn(struct kthread_work *work) */ void flush_kthread_work(struct kthread_work *work) { - int seq = work->queue_seq; + struct kthread_flush_work fwork = { + KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), + COMPLETION_INITIALIZER_ONSTACK(fwork.done), + }; + struct kthread_worker *worker; + bool noop = false; + +retry: + worker = work->worker; + if (!worker) + return; - atomic_inc(&work->flushing); + spin_lock_irq(&worker->lock); + if (work->worker != worker) { + spin_unlock_irq(&worker->lock); + goto retry; + } - /* - * mb flush-b0 paired with worker-b1, to make sure either - * worker sees the above increment or we see done_seq update. - */ - smp_mb__after_atomic_inc(); + if (!list_empty(&work->node)) + insert_kthread_work(worker, &fwork.work, work->node.next); + else if (worker->current_work == work) + insert_kthread_work(worker, &fwork.work, worker->work_list.next); + else + noop = true; - /* A - B <= 0 tests whether B is in front of A regardless of overflow */ - wait_event(work->done, seq - work->done_seq <= 0); - atomic_dec(&work->flushing); + spin_unlock_irq(&worker->lock); - /* - * rmb flush-b1 paired with worker-b0, to make sure our caller - * sees every change made by work->func(). - */ - smp_mb__after_atomic_dec(); + if (!noop) + wait_for_completion(&fwork.done); } EXPORT_SYMBOL_GPL(flush_kthread_work); -- cgit v1.2.3