From 0a13c00e9d4502b8e3fd9260ce781758ff2c3970 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:44 -0700
Subject: workqueue: reorder queueing functions so that _on() variants are on
 top

Currently, queue/schedule[_delayed]_work_on() are located below the
counterpart without the _on postifx even though the latter is usually
implemented using the former.  Swap them.

This is cleanup and doesn't cause any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  13 ++---
 kernel/workqueue.c        | 124 +++++++++++++++++++++++-----------------------
 2 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index af155450cabb..597034276611 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -365,23 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern int queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern int queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay);
+extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
+extern int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
-extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
+extern int schedule_work(struct work_struct *work);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
-					unsigned long delay);
+				    unsigned long delay);
+extern int schedule_delayed_work(struct delayed_work *work,
+				 unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..07d309e7e359 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1052,27 +1052,6 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
 
-/**
- * queue_work - queue work on a workqueue
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
- *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
- */
-int queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
-	int ret;
-
-	ret = queue_work_on(get_cpu(), wq, work);
-	put_cpu();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(queue_work);
-
 /**
  * queue_work_on - queue work on specific cpu
  * @cpu: CPU number to execute work on
@@ -1097,31 +1076,34 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work_on);
 
-static void delayed_work_timer_fn(unsigned long __data)
-{
-	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
-
-	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
-}
-
 /**
- * queue_delayed_work - queue work on a workqueue after delay
+ * queue_work - queue work on a workqueue
  * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
+ * @work: work to queue
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to the CPU on which it was submitted, but if the CPU dies
+ * it can be processed by another CPU.
  */
-int queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
+int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	if (delay == 0)
-		return queue_work(wq, &dwork->work);
+	int ret;
 
-	return queue_delayed_work_on(-1, wq, dwork, delay);
+	ret = queue_work_on(get_cpu(), wq, work);
+	put_cpu();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work);
+
+static void delayed_work_timer_fn(unsigned long __data)
+{
+	struct delayed_work *dwork = (struct delayed_work *)__data;
+	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
+
+	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work);
 
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
@@ -1178,6 +1160,24 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @dwork: delayable work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ */
+int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay)
+{
+	if (delay == 0)
+		return queue_work(wq, &dwork->work);
+
+	return queue_delayed_work_on(-1, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(queue_delayed_work);
+
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
@@ -2877,6 +2877,19 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, system_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
 /**
  * schedule_work - put work task in global workqueue
  * @work: job to be done
@@ -2894,18 +2907,21 @@ int schedule_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(schedule_work);
 
-/*
- * schedule_work_on - put work task on a specific cpu
- * @cpu: cpu to put the work task on
- * @work: job to be done
+/**
+ * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * @cpu: cpu to use
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait
  *
- * This puts a job on a specific cpu
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue on the specified CPU.
  */
-int schedule_work_on(int cpu, struct work_struct *work)
+int schedule_delayed_work_on(int cpu,
+			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_work_on(cpu, system_wq, work);
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
-EXPORT_SYMBOL(schedule_work_on);
+EXPORT_SYMBOL(schedule_delayed_work_on);
 
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
@@ -2922,22 +2938,6 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
-/**
- * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
- * @cpu: cpu to use
- * @dwork: job to be done
- * @delay: number of jiffies to wait
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue on the specified CPU.
- */
-int schedule_delayed_work_on(int cpu,
-			struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work_on);
-
 /**
  * schedule_on_each_cpu - execute a function synchronously on each online CPU
  * @func: the function to call
-- 
cgit v1.2.3


From d4283e9378619c14dc3826a6b0527eb5d967ffde Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:44 -0700
Subject: workqueue: make queueing functions return bool

All queueing functions return 1 on success, 0 if the work item was
already pending.  Update them to return bool instead.  This signifies
better that they don't return 0 / -errno.

This is cleanup and doesn't cause any functional difference.

While at it, fix comment opening for schedule_work_on().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 20 ++++++++++----------
 kernel/workqueue.c        | 47 +++++++++++++++++++++++------------------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 597034276611..278dc5ddb73f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -365,24 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int queue_work_on(int cpu, struct workqueue_struct *wq,
+extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
-extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
-extern int queue_delayed_work(struct workqueue_struct *wq,
+extern bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int schedule_work_on(int cpu, struct work_struct *work);
-extern int schedule_work(struct work_struct *work);
-extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
-				    unsigned long delay);
-extern int schedule_delayed_work(struct delayed_work *work,
-				 unsigned long delay);
+extern bool schedule_work_on(int cpu, struct work_struct *work);
+extern bool schedule_work(struct work_struct *work);
+extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work,
+				     unsigned long delay);
+extern bool schedule_delayed_work(struct delayed_work *work,
+				  unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 07d309e7e359..70f95ab28f3d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1058,19 +1058,19 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
  * We queue the work to a specific CPU, the caller must ensure it
  * can't go away.
  */
-int
-queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work_on(int cpu, struct workqueue_struct *wq,
+		   struct work_struct *work)
 {
-	int ret = 0;
+	bool ret = false;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
-		ret = 1;
+		ret = true;
 	}
 	return ret;
 }
@@ -1081,14 +1081,14 @@ EXPORT_SYMBOL_GPL(queue_work_on);
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
  * We queue the work to the CPU on which it was submitted, but if the CPU dies
  * it can be processed by another CPU.
  */
-int queue_work(struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	int ret;
+	bool ret;
 
 	ret = queue_work_on(get_cpu(), wq, work);
 	put_cpu();
@@ -1112,14 +1112,14 @@ static void delayed_work_timer_fn(unsigned long __data)
  * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  */
-int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
+bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			   struct delayed_work *dwork, unsigned long delay)
 {
-	int ret = 0;
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
+	bool ret = false;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		unsigned int lcpu;
@@ -1154,7 +1154,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			add_timer_on(timer, cpu);
 		else
 			add_timer(timer);
-		ret = 1;
+		ret = true;
 	}
 	return ret;
 }
@@ -1166,9 +1166,9 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on);
  * @dwork: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  */
-int queue_delayed_work(struct workqueue_struct *wq,
+bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay)
 {
 	if (delay == 0)
@@ -2877,14 +2877,14 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-/*
+/**
  * schedule_work_on - put work task on a specific cpu
  * @cpu: cpu to put the work task on
  * @work: job to be done
  *
  * This puts a job on a specific cpu
  */
-int schedule_work_on(int cpu, struct work_struct *work)
+bool schedule_work_on(int cpu, struct work_struct *work)
 {
 	return queue_work_on(cpu, system_wq, work);
 }
@@ -2894,14 +2894,14 @@ EXPORT_SYMBOL(schedule_work_on);
  * schedule_work - put work task in global workqueue
  * @work: job to be done
  *
- * Returns zero if @work was already on the kernel-global workqueue and
- * non-zero otherwise.
+ * Returns %false if @work was already on the kernel-global workqueue and
+ * %true otherwise.
  *
  * This puts a job in the kernel-global workqueue if it was not already
  * queued and leaves it in the same position on the kernel-global
  * workqueue otherwise.
  */
-int schedule_work(struct work_struct *work)
+bool schedule_work(struct work_struct *work)
 {
 	return queue_work(system_wq, work);
 }
@@ -2916,8 +2916,8 @@ EXPORT_SYMBOL(schedule_work);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue on the specified CPU.
  */
-int schedule_delayed_work_on(int cpu,
-			struct delayed_work *dwork, unsigned long delay)
+bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+			      unsigned long delay)
 {
 	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
@@ -2931,8 +2931,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue.
  */
-int schedule_delayed_work(struct delayed_work *dwork,
-					unsigned long delay)
+bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
 {
 	return queue_delayed_work(system_wq, dwork, delay);
 }
-- 
cgit v1.2.3


From 959d1af8cffc8fd38ed53e8be1cf4ab8782f9c00 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:45 -0700
Subject: workqueue: add missing smp_wmb() in process_one_work()

WORK_STRUCT_PENDING is used to claim ownership of a work item and
process_one_work() releases it before starting execution.  When
someone else grabs PENDING, all pre-release updates to the work item
should be visible and all updates made by the new owner should happen
afterwards.

Grabbing PENDING uses test_and_set_bit() and thus has a full barrier;
however, clearing doesn't have a matching wmb.  Given the preceding
spin_unlock and use of clear_bit, I don't believe this can be a
problem on an actual machine and there hasn't been any related report
but it still is theretically possible for clear_pending to permeate
upwards and happen before work->entry update.

Add an explicit smp_wmb() before work_clear_pending().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
---
 kernel/workqueue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 70f95ab28f3d..5c26d36146b7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1997,7 +1997,9 @@ __acquires(&gcwq->lock)
 
 	spin_unlock_irq(&gcwq->lock);
 
+	smp_wmb();	/* paired with test_and_set_bit(PENDING) */
 	work_clear_pending(work);
+
 	lock_map_acquire_read(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
-- 
cgit v1.2.3


From 8930caba3dbdd8b86dd6934a5920bf61b53a931e Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:45 -0700
Subject: workqueue: disable irq while manipulating PENDING

Queueing operations use WORK_STRUCT_PENDING_BIT to synchronize access
to the target work item.  They first try to claim the bit and proceed
with queueing only after that succeeds and there's a window between
PENDING being set and the actual queueing where the task can be
interrupted or preempted.

There's also a similar window in process_one_work() when clearing
PENDING.  A work item is dequeued, gcwq->lock is released and then
PENDING is cleared and the worker might get interrupted or preempted
between releasing gcwq->lock and clearing PENDING.

cancel[_delayed]_work_sync() tries to claim or steal PENDING.  The
function assumes that a work item with PENDING is either queued or in
the process of being [de]queued.  In the latter case, it busy-loops
until either the work item loses PENDING or is queued.  If canceling
coincides with the above described interrupts or preemptions, the
canceling task will busy-loop while the queueing or executing task is
preempted.

This patch keeps irq disabled across claiming PENDING and actual
queueing and moves PENDING clearing in process_one_work() inside
gcwq->lock so that busy looping from PENDING && !queued doesn't wait
for interrupted/preempted tasks.  Note that, in process_one_work(),
setting last CPU and clearing PENDING got merged into single
operation.

This removes possible long busy-loops and will allow using
try_to_grab_pending() from bh and irq contexts.

v2: __queue_work() was testing preempt_count() to ensure that the
    caller has disabled preemption.  This triggers spuriously if
    !CONFIG_PREEMPT_COUNT.  Use preemptible() instead.  Reported by
    Fengguang Wu.

v3: Disable irq instead of preemption.  IRQ will be disabled while
    grabbing gcwq->lock later anyway and this allows using
    try_to_grab_pending() from bh and irq contexts.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
---
 kernel/workqueue.c | 73 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5c26d36146b7..30474c4e107c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -537,9 +537,10 @@ static int work_next_color(int color)
  * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
  * cleared and the work data contains the cpu number it was last on.
  *
- * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
- * cwq, cpu or clear work->data.  These functions should only be
- * called while the work is owned - ie. while the PENDING bit is set.
+ * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
+ * can be used to set the cwq, cpu or clear work->data.  These functions
+ * should only be called while the work is owned - ie. while the PENDING
+ * bit is set.
  *
  * get_work_[g]cwq() can be used to obtain the gcwq or cwq
  * corresponding to a work.  gcwq is available once the work has been
@@ -561,9 +562,10 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu(struct work_struct *work, unsigned int cpu)
+static void set_work_cpu_and_clear_pending(struct work_struct *work,
+					   unsigned int cpu)
 {
-	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
+	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
@@ -981,7 +983,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
-	unsigned long flags;
+
+	/*
+	 * While a work item is PENDING && off queue, a task trying to
+	 * steal the PENDING will busy-loop waiting for it to either get
+	 * queued or lose PENDING.  Grabbing PENDING and queueing should
+	 * happen with IRQ disabled.
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
 
 	debug_work_activate(work);
 
@@ -1008,7 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
 			struct worker *worker;
 
-			spin_lock_irqsave(&last_gcwq->lock, flags);
+			spin_lock(&last_gcwq->lock);
 
 			worker = find_worker_executing_work(last_gcwq, work);
 
@@ -1016,14 +1025,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 				gcwq = last_gcwq;
 			else {
 				/* meh... not running there, queue here */
-				spin_unlock_irqrestore(&last_gcwq->lock, flags);
-				spin_lock_irqsave(&gcwq->lock, flags);
+				spin_unlock(&last_gcwq->lock);
+				spin_lock(&gcwq->lock);
 			}
-		} else
-			spin_lock_irqsave(&gcwq->lock, flags);
+		} else {
+			spin_lock(&gcwq->lock);
+		}
 	} else {
 		gcwq = get_gcwq(WORK_CPU_UNBOUND);
-		spin_lock_irqsave(&gcwq->lock, flags);
+		spin_lock(&gcwq->lock);
 	}
 
 	/* gcwq determined, get cwq and queue */
@@ -1031,7 +1041,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	trace_workqueue_queue_work(cpu, cwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock_irqrestore(&gcwq->lock, flags);
+		spin_unlock(&gcwq->lock);
 		return;
 	}
 
@@ -1049,7 +1059,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	insert_work(cwq, work, worklist, work_flags);
 
-	spin_unlock_irqrestore(&gcwq->lock, flags);
+	spin_unlock(&gcwq->lock);
 }
 
 /**
@@ -1067,11 +1077,16 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 		   struct work_struct *work)
 {
 	bool ret = false;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
 		ret = true;
 	}
+
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(queue_work_on);
@@ -1102,7 +1117,9 @@ static void delayed_work_timer_fn(unsigned long __data)
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
+	local_irq_disable();
 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
+	local_irq_enable();
 }
 
 /**
@@ -1120,6 +1137,10 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 	bool ret = false;
+	unsigned long flags;
+
+	/* read the comment in __queue_work() */
+	local_irq_save(flags);
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		unsigned int lcpu;
@@ -1156,6 +1177,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			add_timer(timer);
 		ret = true;
 	}
+
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
@@ -1970,15 +1993,13 @@ __acquires(&gcwq->lock)
 		return;
 	}
 
-	/* claim and process */
+	/* claim and dequeue */
 	debug_work_deactivate(work);
 	hlist_add_head(&worker->hentry, bwh);
 	worker->current_work = work;
 	worker->current_cwq = cwq;
 	work_color = get_work_color(work);
 
-	/* record the current cpu number in the work data and dequeue */
-	set_work_cpu(work, gcwq->cpu);
 	list_del_init(&work->entry);
 
 	/*
@@ -1995,10 +2016,18 @@ __acquires(&gcwq->lock)
 	if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
 		wake_up_worker(pool);
 
-	spin_unlock_irq(&gcwq->lock);
+	/*
+	 * Record the last CPU and clear PENDING.  The following wmb is
+	 * paired with the implied mb in test_and_set_bit(PENDING) and
+	 * ensures all updates to @work made here are visible to and
+	 * precede any updates by the next PENDING owner.  Also, clear
+	 * PENDING inside @gcwq->lock so that PENDING and queued state
+	 * changes happen together while IRQ is disabled.
+	 */
+	smp_wmb();
+	set_work_cpu_and_clear_pending(work, gcwq->cpu);
 
-	smp_wmb();	/* paired with test_and_set_bit(PENDING) */
-	work_clear_pending(work);
+	spin_unlock_irq(&gcwq->lock);
 
 	lock_map_acquire_read(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
@@ -2836,9 +2865,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  */
 bool flush_delayed_work(struct delayed_work *dwork)
 {
+	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
 		__queue_work(raw_smp_processor_id(),
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	local_irq_enable();
 	return flush_work(&dwork->work);
 }
 EXPORT_SYMBOL(flush_delayed_work);
@@ -2857,9 +2888,11 @@ EXPORT_SYMBOL(flush_delayed_work);
  */
 bool flush_delayed_work_sync(struct delayed_work *dwork)
 {
+	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
 		__queue_work(raw_smp_processor_id(),
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	local_irq_enable();
 	return flush_work_sync(&dwork->work);
 }
 EXPORT_SYMBOL(flush_delayed_work_sync);
-- 
cgit v1.2.3


From d8e794dfd51c368ed3f686b7f4172830b60ae47b Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:45 -0700
Subject: workqueue: set delayed_work->timer function on initialization

delayed_work->timer.function is currently initialized during
queue_delayed_work_on().  Export delayed_work_timer_fn() and set
delayed_work timer function during delayed_work initialization
together with other fields.

This ensures the timer function is always valid on an initialized
delayed_work.  This is to help mod_delayed_work() implementation.

To detect delayed_work users which diddle with the internal timer,
trigger WARN if timer function doesn't match on queue.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 13 +++++++++++--
 kernel/workqueue.c        |  7 ++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 278dc5ddb73f..ab95fef38d56 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -16,6 +16,7 @@ struct workqueue_struct;
 
 struct work_struct;
 typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
 
 /*
  * The first word is the work queue pointer and the flags rolled into
@@ -124,12 +125,14 @@ struct execute_work {
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,	\
+				0, (unsigned long)&(n)),	\
 	}
 
 #define __DEFERRED_WORK_INITIALIZER(n, f) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),	\
+	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn, \
+				0, (unsigned long)&(n)),	\
 	}
 
 #define DECLARE_WORK(n, f)					\
@@ -207,18 +210,24 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
 	do {							\
 		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_deferrable(&(_work)->timer);		\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 30474c4e107c..55392385fe30 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1112,7 +1112,7 @@ bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
@@ -1121,6 +1121,7 @@ static void delayed_work_timer_fn(unsigned long __data)
 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 	local_irq_enable();
 }
+EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
@@ -1145,6 +1146,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		unsigned int lcpu;
 
+		WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
+			     timer->data != (unsigned long)dwork);
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -1168,8 +1171,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		set_work_cwq(work, get_cwq(lcpu, wq), 0);
 
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)dwork;
-		timer->function = delayed_work_timer_fn;
 
 		if (unlikely(cpu >= 0))
 			add_timer_on(timer, cpu);
-- 
cgit v1.2.3


From 57469821fd5c61f25f783827d7334063cff67d65 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:45 -0700
Subject: workqueue: unify local CPU queueing handling

Queueing functions have been using different methods to determine the
local CPU.

* queue_work() superflously uses get/put_cpu() to acquire and hold the
  local CPU across queue_work_on().

* delayed_work_timer_fn() uses smp_processor_id().

* queue_delayed_work() calls queue_delayed_work_on() with -1 @cpu
  which is interpreted as the local CPU.

* flush_delayed_work[_sync]() were using raw_smp_processor_id().

* __queue_work() interprets %WORK_CPU_UNBOUND as local CPU if the
  target workqueue is bound one but nobody uses this.

This patch converts all functions to uniformly use %WORK_CPU_UNBOUND
to indicate local CPU and use the local binding feature of
__queue_work().  unlikely() is dropped from %WORK_CPU_UNBOUND handling
in __queue_work().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 55392385fe30..ce60bb5d12fb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1003,7 +1003,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
 
-		if (unlikely(cpu == WORK_CPU_UNBOUND))
+		if (cpu == WORK_CPU_UNBOUND)
 			cpu = raw_smp_processor_id();
 
 		/*
@@ -1103,12 +1103,7 @@ EXPORT_SYMBOL_GPL(queue_work_on);
  */
 bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	bool ret;
-
-	ret = queue_work_on(get_cpu(), wq, work);
-	put_cpu();
-
-	return ret;
+	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
@@ -1118,7 +1113,7 @@ void delayed_work_timer_fn(unsigned long __data)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	local_irq_disable();
-	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
+	__queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work);
 	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
@@ -1172,7 +1167,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 		timer->expires = jiffies + delay;
 
-		if (unlikely(cpu >= 0))
+		if (unlikely(cpu != WORK_CPU_UNBOUND))
 			add_timer_on(timer, cpu);
 		else
 			add_timer(timer);
@@ -1198,7 +1193,7 @@ bool queue_delayed_work(struct workqueue_struct *wq,
 	if (delay == 0)
 		return queue_work(wq, &dwork->work);
 
-	return queue_delayed_work_on(-1, wq, dwork, delay);
+	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
 
@@ -2868,7 +2863,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(raw_smp_processor_id(),
+		__queue_work(WORK_CPU_UNBOUND,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
@@ -2891,7 +2886,7 @@ bool flush_delayed_work_sync(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(raw_smp_processor_id(),
+		__queue_work(WORK_CPU_UNBOUND,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work_sync(&dwork->work);
-- 
cgit v1.2.3


From 715f1300802e6eaefa85f6cfc70ae99af3d5d497 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: fix zero @delay handling of queue_delayed_work_on()

If @delay is zero and the dealyed_work is idle, queue_delayed_work()
queues it for immediate execution; however, queue_delayed_work_on()
lacks this logic and always goes through timer regardless of @delay.

This patch moves 0 @delay handling logic from queue_delayed_work() to
queue_delayed_work_on() so that both functions behave the same.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ce60bb5d12fb..6cbdc22f8ec7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1125,7 +1125,9 @@ EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
  * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns %false if @work was already on a queue, %true otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.  If
+ * @delay is zero and @dwork is idle, it will be scheduled for immediate
+ * execution.
  */
 bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			   struct delayed_work *dwork, unsigned long delay)
@@ -1135,6 +1137,9 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	bool ret = false;
 	unsigned long flags;
 
+	if (!delay)
+		return queue_work_on(cpu, wq, &dwork->work);
+
 	/* read the comment in __queue_work() */
 	local_irq_save(flags);
 
@@ -1185,14 +1190,11 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on);
  * @dwork: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns %false if @work was already on a queue, %true otherwise.
+ * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
  */
 bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay)
 {
-	if (delay == 0)
-		return queue_work(wq, &dwork->work);
-
 	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
-- 
cgit v1.2.3


From bf4ede014ea886b71ef71368738da35b316cb7c0 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: move try_to_grab_pending() upwards

try_to_grab_pending() will be used by to-be-implemented
mod_delayed_work[_on]().  Move try_to_grab_pending() and related
functions above queueing functions.

This patch only moves functions around.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 286 ++++++++++++++++++++++++++---------------------------
 1 file changed, 143 insertions(+), 143 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6cbdc22f8ec7..0f50f4078e36 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -903,6 +903,149 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 					    work);
 }
 
+/**
+ * move_linked_works - move linked works to a list
+ * @work: start of series of works to be scheduled
+ * @head: target list to append @work to
+ * @nextp: out paramter for nested worklist walking
+ *
+ * Schedule linked works starting from @work to @head.  Work series to
+ * be scheduled starts at @work and includes any consecutive work with
+ * WORK_STRUCT_LINKED set in its predecessor.
+ *
+ * If @nextp is not NULL, it's updated to point to the next work of
+ * the last scheduled work.  This allows move_linked_works() to be
+ * nested inside outer list_for_each_entry_safe().
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void move_linked_works(struct work_struct *work, struct list_head *head,
+			      struct work_struct **nextp)
+{
+	struct work_struct *n;
+
+	/*
+	 * Linked worklist will always end before the end of the list,
+	 * use NULL for list head.
+	 */
+	list_for_each_entry_safe_from(work, n, NULL, entry) {
+		list_move_tail(&work->entry, head);
+		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
+			break;
+	}
+
+	/*
+	 * If we're already inside safe list traversal and have moved
+	 * multiple works to the scheduled queue, the next position
+	 * needs to be updated.
+	 */
+	if (nextp)
+		*nextp = n;
+}
+
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+						    struct work_struct, entry);
+
+	trace_workqueue_activate_work(work);
+	move_linked_works(work, &cwq->pool->worklist, NULL);
+	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+	cwq->nr_active++;
+}
+
+/**
+ * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+ * @cwq: cwq of interest
+ * @color: color of work which left the queue
+ * @delayed: for a delayed work
+ *
+ * A work either has completed or is removed from pending queue,
+ * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+				 bool delayed)
+{
+	/* ignore uncolored works */
+	if (color == WORK_NO_COLOR)
+		return;
+
+	cwq->nr_in_flight[color]--;
+
+	if (!delayed) {
+		cwq->nr_active--;
+		if (!list_empty(&cwq->delayed_works)) {
+			/* one down, submit a delayed one */
+			if (cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+		}
+	}
+
+	/* is flush in progress and are we at the flushing tip? */
+	if (likely(cwq->flush_color != color))
+		return;
+
+	/* are there still in-flight works? */
+	if (cwq->nr_in_flight[color])
+		return;
+
+	/* this cwq is done, clear flush_color */
+	cwq->flush_color = -1;
+
+	/*
+	 * If this was the last cwq, wake up the first flusher.  It
+	 * will handle the rest.
+	 */
+	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
+		complete(&cwq->wq->first_flusher->done);
+}
+
+/*
+ * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
+ * so this work can't be re-armed in any way.
+ */
+static int try_to_grab_pending(struct work_struct *work)
+{
+	struct global_cwq *gcwq;
+	int ret = -1;
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
+		return 0;
+
+	/*
+	 * The queueing is in progress, or it is already queued. Try to
+	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
+	 */
+	gcwq = get_work_gcwq(work);
+	if (!gcwq)
+		return ret;
+
+	spin_lock_irq(&gcwq->lock);
+	if (!list_empty(&work->entry)) {
+		/*
+		 * This work is queued, but perhaps we locked the wrong gcwq.
+		 * In that case we must see the new value after rmb(), see
+		 * insert_work()->wmb().
+		 */
+		smp_rmb();
+		if (gcwq == get_work_gcwq(work)) {
+			debug_work_deactivate(work);
+			list_del_init(&work->entry);
+			cwq_dec_nr_in_flight(get_work_cwq(work),
+				get_work_color(work),
+				*work_data_bits(work) & WORK_STRUCT_DELAYED);
+			ret = 1;
+		}
+	}
+	spin_unlock_irq(&gcwq->lock);
+
+	return ret;
+}
+
 /**
  * insert_work - insert a work into gcwq
  * @cwq: cwq @work belongs to
@@ -1831,107 +1974,6 @@ static bool manage_workers(struct worker *worker)
 	return ret;
 }
 
-/**
- * move_linked_works - move linked works to a list
- * @work: start of series of works to be scheduled
- * @head: target list to append @work to
- * @nextp: out paramter for nested worklist walking
- *
- * Schedule linked works starting from @work to @head.  Work series to
- * be scheduled starts at @work and includes any consecutive work with
- * WORK_STRUCT_LINKED set in its predecessor.
- *
- * If @nextp is not NULL, it's updated to point to the next work of
- * the last scheduled work.  This allows move_linked_works() to be
- * nested inside outer list_for_each_entry_safe().
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void move_linked_works(struct work_struct *work, struct list_head *head,
-			      struct work_struct **nextp)
-{
-	struct work_struct *n;
-
-	/*
-	 * Linked worklist will always end before the end of the list,
-	 * use NULL for list head.
-	 */
-	list_for_each_entry_safe_from(work, n, NULL, entry) {
-		list_move_tail(&work->entry, head);
-		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
-			break;
-	}
-
-	/*
-	 * If we're already inside safe list traversal and have moved
-	 * multiple works to the scheduled queue, the next position
-	 * needs to be updated.
-	 */
-	if (nextp)
-		*nextp = n;
-}
-
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
-{
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
-						    struct work_struct, entry);
-
-	trace_workqueue_activate_work(work);
-	move_linked_works(work, &cwq->pool->worklist, NULL);
-	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-	cwq->nr_active++;
-}
-
-/**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
- * @color: color of work which left the queue
- * @delayed: for a delayed work
- *
- * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
-				 bool delayed)
-{
-	/* ignore uncolored works */
-	if (color == WORK_NO_COLOR)
-		return;
-
-	cwq->nr_in_flight[color]--;
-
-	if (!delayed) {
-		cwq->nr_active--;
-		if (!list_empty(&cwq->delayed_works)) {
-			/* one down, submit a delayed one */
-			if (cwq->nr_active < cwq->max_active)
-				cwq_activate_first_delayed(cwq);
-		}
-	}
-
-	/* is flush in progress and are we at the flushing tip? */
-	if (likely(cwq->flush_color != color))
-		return;
-
-	/* are there still in-flight works? */
-	if (cwq->nr_in_flight[color])
-		return;
-
-	/* this cwq is done, clear flush_color */
-	cwq->flush_color = -1;
-
-	/*
-	 * If this was the last cwq, wake up the first flusher.  It
-	 * will handle the rest.
-	 */
-	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-		complete(&cwq->wq->first_flusher->done);
-}
-
 /**
  * process_one_work - process single work
  * @worker: self
@@ -2767,48 +2809,6 @@ bool flush_work_sync(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work_sync);
 
-/*
- * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
- * so this work can't be re-armed in any way.
- */
-static int try_to_grab_pending(struct work_struct *work)
-{
-	struct global_cwq *gcwq;
-	int ret = -1;
-
-	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
-		return 0;
-
-	/*
-	 * The queueing is in progress, or it is already queued. Try to
-	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
-	 */
-	gcwq = get_work_gcwq(work);
-	if (!gcwq)
-		return ret;
-
-	spin_lock_irq(&gcwq->lock);
-	if (!list_empty(&work->entry)) {
-		/*
-		 * This work is queued, but perhaps we locked the wrong gcwq.
-		 * In that case we must see the new value after rmb(), see
-		 * insert_work()->wmb().
-		 */
-		smp_rmb();
-		if (gcwq == get_work_gcwq(work)) {
-			debug_work_deactivate(work);
-			list_del_init(&work->entry);
-			cwq_dec_nr_in_flight(get_work_cwq(work),
-				get_work_color(work),
-				*work_data_bits(work) & WORK_STRUCT_DELAYED);
-			ret = 1;
-		}
-	}
-	spin_unlock_irq(&gcwq->lock);
-
-	return ret;
-}
-
 static bool __cancel_work_timer(struct work_struct *work,
 				struct timer_list* timer)
 {
-- 
cgit v1.2.3


From b5490077274482efde57a50b060b99bc839acd45 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: introduce WORK_OFFQ_FLAG_*

Low WORK_STRUCT_FLAG_BITS bits of work_struct->data contain
WORK_STRUCT_FLAG_* and flush color.  If the work item is queued, the
rest point to the cpu_workqueue with WORK_STRUCT_CWQ set; otherwise,
WORK_STRUCT_CWQ is clear and the bits contain the last CPU number -
either a real CPU number or one of WORK_CPU_*.

Scheduled addition of mod_delayed_work[_on]() requires an additional
flag, which is used only while a work item is off queue.  There are
more than enough bits to represent off-queue CPU number on both 32 and
64bits.  This patch introduces WORK_OFFQ_FLAG_* which occupy the lower
part of the @work->data high bits while off queue.  This patch doesn't
define any actual OFFQ flag yet.

Off-queue CPU number is now shifted by WORK_OFFQ_CPU_SHIFT, which adds
the number of bits used by OFFQ flags to WORK_STRUCT_FLAG_SHIFT, to
make room for OFFQ flags.

To avoid shift width warning with large WORK_OFFQ_FLAG_BITS, ulong
cast is added to WORK_STRUCT_NO_CPU and, just in case, BUILD_BUG_ON()
to check that there are enough bits to accomodate off-queue CPU number
is added.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  8 +++++++-
 kernel/workqueue.c        | 14 +++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ab95fef38d56..f562674db404 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,9 +68,15 @@ enum {
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
+	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
+	WORK_OFFQ_FLAG_BITS	= 0,
+	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+
+	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0f50f4078e36..eeae77079483 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -533,9 +533,9 @@ static int work_next_color(int color)
 }
 
 /*
- * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
- * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
- * cleared and the work data contains the cpu number it was last on.
+ * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * is cleared and the high bits contain OFFQ flags and CPU number.
  *
  * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
  * can be used to set the cwq, cpu or clear work->data.  These functions
@@ -565,7 +565,7 @@ static void set_work_cwq(struct work_struct *work,
 static void set_work_cpu_and_clear_pending(struct work_struct *work,
 					   unsigned int cpu)
 {
-	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0);
+	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
@@ -592,7 +592,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 		return ((struct cpu_workqueue_struct *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
-	cpu = data >> WORK_STRUCT_FLAG_BITS;
+	cpu = data >> WORK_OFFQ_CPU_SHIFT;
 	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
@@ -3724,6 +3724,10 @@ static int __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
+	/* make sure we have enough bits for OFFQ CPU number */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
+		     WORK_CPU_LAST);
+
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
-- 
cgit v1.2.3


From 7beb2edf44b4dea820c733046ad7666d092bb4b6 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: factor out __queue_delayed_work() from
 queue_delayed_work_on()

This is to prepare for mod_delayed_work[_on]() and doesn't cause any
functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 74 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eeae77079483..d7f1b7e2bbaa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1261,6 +1261,46 @@ void delayed_work_timer_fn(unsigned long __data)
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
+static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
+				struct delayed_work *dwork, unsigned long delay)
+{
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
+	unsigned int lcpu;
+
+	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
+		     timer->data != (unsigned long)dwork);
+	BUG_ON(timer_pending(timer));
+	BUG_ON(!list_empty(&work->entry));
+
+	timer_stats_timer_set_start_info(&dwork->timer);
+
+	/*
+	 * This stores cwq for the moment, for the timer_fn.  Note that the
+	 * work's gcwq is preserved to allow reentrance detection for
+	 * delayed works.
+	 */
+	if (!(wq->flags & WQ_UNBOUND)) {
+		struct global_cwq *gcwq = get_work_gcwq(work);
+
+		if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
+			lcpu = gcwq->cpu;
+		else
+			lcpu = raw_smp_processor_id();
+	} else {
+		lcpu = WORK_CPU_UNBOUND;
+	}
+
+	set_work_cwq(work, get_cwq(lcpu, wq), 0);
+
+	timer->expires = jiffies + delay;
+
+	if (unlikely(cpu != WORK_CPU_UNBOUND))
+		add_timer_on(timer, cpu);
+	else
+		add_timer(timer);
+}
+
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
  * @cpu: CPU number to execute work on
@@ -1275,7 +1315,6 @@ EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			   struct delayed_work *dwork, unsigned long delay)
 {
-	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 	bool ret = false;
 	unsigned long flags;
@@ -1287,38 +1326,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	local_irq_save(flags);
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-		unsigned int lcpu;
-
-		WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
-			     timer->data != (unsigned long)dwork);
-		BUG_ON(timer_pending(timer));
-		BUG_ON(!list_empty(&work->entry));
-
-		timer_stats_timer_set_start_info(&dwork->timer);
-
-		/*
-		 * This stores cwq for the moment, for the timer_fn.
-		 * Note that the work's gcwq is preserved to allow
-		 * reentrance detection for delayed works.
-		 */
-		if (!(wq->flags & WQ_UNBOUND)) {
-			struct global_cwq *gcwq = get_work_gcwq(work);
-
-			if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
-				lcpu = gcwq->cpu;
-			else
-				lcpu = raw_smp_processor_id();
-		} else
-			lcpu = WORK_CPU_UNBOUND;
-
-		set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
-		timer->expires = jiffies + delay;
-
-		if (unlikely(cpu != WORK_CPU_UNBOUND))
-			add_timer_on(timer, cpu);
-		else
-			add_timer(timer);
+		__queue_delayed_work(cpu, wq, dwork, delay);
 		ret = true;
 	}
 
-- 
cgit v1.2.3


From 36e227d242f9ec7cb4a8e968561b3b26e3d8b5d1 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: reorganize try_to_grab_pending() and __cancel_timer_work()

* Use bool @is_dwork instead of @timer and let try_to_grab_pending()
  use to_delayed_work() to determine the delayed_work address.

* Move timer handling from __cancel_work_timer() to
  try_to_grab_pending().

* Make try_to_grab_pending() use -EAGAIN instead of -1 for
  busy-looping and drop the ret local variable.

* Add proper function comment to try_to_grab_pending().

This makes the code a bit easier to understand and will ease further
changes.  This patch doesn't make any functional change.

v2: Use @is_dwork instead of @timer.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d7f1b7e2bbaa..4b3663b1c677 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1004,15 +1004,33 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
 		complete(&cwq->wq->first_flusher->done);
 }
 
-/*
- * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
- * so this work can't be re-armed in any way.
+/**
+ * try_to_grab_pending - steal work item from worklist
+ * @work: work item to steal
+ * @is_dwork: @work is a delayed_work
+ *
+ * Try to grab PENDING bit of @work.  This function can handle @work in any
+ * stable state - idle, on timer or on worklist.  Return values are
+ *
+ *  1		if @work was pending and we successfully stole PENDING
+ *  0		if @work was idle and we claimed PENDING
+ *  -EAGAIN	if PENDING couldn't be grabbed at the moment, safe to busy-retry
+ *
+ * On >= 0 return, the caller owns @work's PENDING bit.
  */
-static int try_to_grab_pending(struct work_struct *work)
+static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
 {
 	struct global_cwq *gcwq;
-	int ret = -1;
 
+	/* try to steal the timer if it exists */
+	if (is_dwork) {
+		struct delayed_work *dwork = to_delayed_work(work);
+
+		if (likely(del_timer(&dwork->timer)))
+			return 1;
+	}
+
+	/* try to claim PENDING the normal way */
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
 		return 0;
 
@@ -1022,7 +1040,7 @@ static int try_to_grab_pending(struct work_struct *work)
 	 */
 	gcwq = get_work_gcwq(work);
 	if (!gcwq)
-		return ret;
+		return -EAGAIN;
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
@@ -1038,12 +1056,14 @@ static int try_to_grab_pending(struct work_struct *work)
 			cwq_dec_nr_in_flight(get_work_cwq(work),
 				get_work_color(work),
 				*work_data_bits(work) & WORK_STRUCT_DELAYED);
-			ret = 1;
+
+			spin_unlock_irq(&gcwq->lock);
+			return 1;
 		}
 	}
 	spin_unlock_irq(&gcwq->lock);
 
-	return ret;
+	return -EAGAIN;
 }
 
 /**
@@ -2817,15 +2837,12 @@ bool flush_work_sync(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work_sync);
 
-static bool __cancel_work_timer(struct work_struct *work,
-				struct timer_list* timer)
+static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
 	int ret;
 
 	do {
-		ret = (timer && likely(del_timer(timer)));
-		if (!ret)
-			ret = try_to_grab_pending(work);
+		ret = try_to_grab_pending(work, is_dwork);
 		wait_on_work(work);
 	} while (unlikely(ret < 0));
 
@@ -2853,7 +2870,7 @@ static bool __cancel_work_timer(struct work_struct *work,
  */
 bool cancel_work_sync(struct work_struct *work)
 {
-	return __cancel_work_timer(work, NULL);
+	return __cancel_work_timer(work, false);
 }
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
@@ -2914,7 +2931,7 @@ EXPORT_SYMBOL(flush_delayed_work_sync);
  */
 bool cancel_delayed_work_sync(struct delayed_work *dwork)
 {
-	return __cancel_work_timer(&dwork->work, &dwork->timer);
+	return __cancel_work_timer(&dwork->work, true);
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-- 
cgit v1.2.3


From bbb68dfaba73e8338fe0f1dc711cc1d261daec87 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: mark a work item being canceled as such

There can be two reasons try_to_grab_pending() can fail with -EAGAIN.
One is when someone else is queueing or deqeueing the work item.  With
the previous patches, it is guaranteed that PENDING and queued state
will soon agree making it safe to busy-retry in this case.

The other is if multiple __cancel_work_timer() invocations are racing
one another.  __cancel_work_timer() grabs PENDING and then waits for
running instances of the target work item on all CPUs while holding
PENDING and !queued.  try_to_grab_pending() invoked from another task
will keep returning -EAGAIN while the current owner is waiting.

Not distinguishing the two cases is okay because __cancel_work_timer()
is the only user of try_to_grab_pending() and it invokes
wait_on_work() whenever grabbing fails.  For the first case, busy
looping should be fine but wait_on_work() doesn't cause any critical
problem.  For the latter case, the new contender usually waits for the
same condition as the current owner, so no unnecessarily extended
busy-looping happens.  Combined, these make __cancel_work_timer()
technically correct even without irq protection while grabbing PENDING
or distinguishing the two different cases.

While the current code is technically correct, not distinguishing the
two cases makes it difficult to use try_to_grab_pending() for other
purposes than canceling because it's impossible to tell whether it's
safe to busy-retry grabbing.

This patch adds a mechanism to mark a work item being canceled.
try_to_grab_pending() now disables irq on success and returns -EAGAIN
to indicate that grabbing failed but PENDING and queued states are
gonna agree soon and it's safe to busy-loop.  It returns -ENOENT if
the work item is being canceled and it may stay PENDING && !queued for
arbitrary amount of time.

__cancel_work_timer() is modified to mark the work canceling with
WORK_OFFQ_CANCELING after grabbing PENDING, thus making
try_to_grab_pending() fail with -ENOENT instead of -EAGAIN.  Also, it
invokes wait_on_work() iff grabbing failed with -ENOENT.  This isn't
necessary for correctness but makes it consistent with other future
users of try_to_grab_pending().

v2: try_to_grab_pending() was testing preempt_count() to ensure that
    the caller has disabled preemption.  This triggers spuriously if
    !CONFIG_PREEMPT_COUNT.  Use preemptible() instead.  Reported by
    Fengguang Wu.

v3: Updated so that try_to_grab_pending() disables irq on success
    rather than requiring preemption disabled by the caller.  This
    makes busy-looping easier and will allow try_to_grap_pending() to
    be used from bh/irq contexts.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
---
 include/linux/workqueue.h |  5 ++-
 kernel/workqueue.c        | 90 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f562674db404..5f4aeaa9f3e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,10 @@ enum {
 
 	/* data contains off-queue information when !WORK_STRUCT_CWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
-	WORK_OFFQ_FLAG_BITS	= 0,
+
+	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
+
+	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
 
 	/* convenience constants */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4b3663b1c677..b4a4e05c89e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -537,15 +537,20 @@ static int work_next_color(int color)
  * contain the pointer to the queued cwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and CPU number.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
- * can be used to set the cwq, cpu or clear work->data.  These functions
- * should only be called while the work is owned - ie. while the PENDING
- * bit is set.
+ * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, cpu or clear
+ * work->data.  These functions should only be called while the work is
+ * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq
- * corresponding to a work.  gcwq is available once the work has been
- * queued anywhere after initialization.  cwq is available only from
- * queueing until execution starts.
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
+ * a work.  gcwq is available once the work has been queued anywhere after
+ * initialization until it is sync canceled.  cwq is available only while
+ * the work item is queued.
+ *
+ * %WORK_OFFQ_CANCELING is used to mark a work item which is being
+ * canceled.  While being canceled, a work item may have its PENDING set
+ * but stay off timer and worklist for arbitrarily long and nobody should
+ * try to steal the PENDING bit.
  */
 static inline void set_work_data(struct work_struct *work, unsigned long data,
 				 unsigned long flags)
@@ -600,6 +605,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	return get_gcwq(cpu);
 }
 
+static void mark_work_canceling(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+
+	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
+		      WORK_STRUCT_PENDING);
+}
+
+static bool work_is_canceling(struct work_struct *work)
+{
+	unsigned long data = atomic_long_read(&work->data);
+
+	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+}
+
 /*
  * Policy functions.  These define the policies on how the global worker
  * pools are managed.  Unless noted otherwise, these functions assume that
@@ -1005,9 +1026,10 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
 }
 
 /**
- * try_to_grab_pending - steal work item from worklist
+ * try_to_grab_pending - steal work item from worklist and disable irq
  * @work: work item to steal
  * @is_dwork: @work is a delayed_work
+ * @flags: place to store irq state
  *
  * Try to grab PENDING bit of @work.  This function can handle @work in any
  * stable state - idle, on timer or on worklist.  Return values are
@@ -1015,13 +1037,30 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
  *  1		if @work was pending and we successfully stole PENDING
  *  0		if @work was idle and we claimed PENDING
  *  -EAGAIN	if PENDING couldn't be grabbed at the moment, safe to busy-retry
+ *  -ENOENT	if someone else is canceling @work, this state may persist
+ *		for arbitrarily long
  *
- * On >= 0 return, the caller owns @work's PENDING bit.
+ * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
+ * preempted while holding PENDING and @work off queue, preemption must be
+ * disabled on entry.  This ensures that we don't return -EAGAIN while
+ * another task is preempted in this function.
+ *
+ * On successful return, >= 0, irq is disabled and the caller is
+ * responsible for releasing it using local_irq_restore(*@flags).
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * An IRQ handler may run on top of delayed_work_timer_fn() which can make
+ * this function return -EAGAIN perpetually.
  */
-static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
+static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
+			       unsigned long *flags)
 {
 	struct global_cwq *gcwq;
 
+	WARN_ON_ONCE(in_irq());
+
+	local_irq_save(*flags);
+
 	/* try to steal the timer if it exists */
 	if (is_dwork) {
 		struct delayed_work *dwork = to_delayed_work(work);
@@ -1040,9 +1079,9 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
 	 */
 	gcwq = get_work_gcwq(work);
 	if (!gcwq)
-		return -EAGAIN;
+		goto fail;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
 		 * This work is queued, but perhaps we locked the wrong gcwq.
@@ -1057,12 +1096,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
 				get_work_color(work),
 				*work_data_bits(work) & WORK_STRUCT_DELAYED);
 
-			spin_unlock_irq(&gcwq->lock);
+			spin_unlock(&gcwq->lock);
 			return 1;
 		}
 	}
-	spin_unlock_irq(&gcwq->lock);
-
+	spin_unlock(&gcwq->lock);
+fail:
+	local_irq_restore(*flags);
+	if (work_is_canceling(work))
+		return -ENOENT;
+	cpu_relax();
 	return -EAGAIN;
 }
 
@@ -2839,13 +2882,24 @@ EXPORT_SYMBOL_GPL(flush_work_sync);
 
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+	unsigned long flags;
 	int ret;
 
 	do {
-		ret = try_to_grab_pending(work, is_dwork);
-		wait_on_work(work);
+		ret = try_to_grab_pending(work, is_dwork, &flags);
+		/*
+		 * If someone else is canceling, wait for the same event it
+		 * would be waiting for before retrying.
+		 */
+		if (unlikely(ret == -ENOENT))
+			wait_on_work(work);
 	} while (unlikely(ret < 0));
 
+	/* tell other tasks trying to grab @work to back off */
+	mark_work_canceling(work);
+	local_irq_restore(flags);
+
+	wait_on_work(work);
 	clear_work_data(work);
 	return ret;
 }
-- 
cgit v1.2.3


From 8376fe22c7e79c7e90857d39f82aeae6cad6c4b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:47 -0700
Subject: workqueue: implement mod_delayed_work[_on]()

Workqueue was lacking a mechanism to modify the timeout of an already
pending delayed_work.  delayed_work users have been working around
this using several methods - using an explicit timer + work item,
messing directly with delayed_work->timer, and canceling before
re-queueing, all of which are error-prone and/or ugly.

This patch implements mod_delayed_work[_on]() which behaves similarly
to mod_timer() - if the delayed_work is idle, it's queued with the
given delay; otherwise, its timeout is modified to the new value.
Zero @delay guarantees immediate execution.

v2: Updated to reflect try_to_grab_pending() changes.  Now safe to be
    called from bh context.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
 include/linux/workqueue.h |  4 ++++
 kernel/workqueue.c        | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5f4aeaa9f3e6..20000305a8a6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -390,6 +390,10 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
+extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay);
+extern bool mod_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b4a4e05c89e1..41ae2c0979fe 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1413,6 +1413,59 @@ bool queue_delayed_work(struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
 
+/**
+ * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
+ * modify @dwork's timer so that it expires after @delay.  If @delay is
+ * zero, @work is guaranteed to be scheduled immediately regardless of its
+ * current state.
+ *
+ * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * pending and its timer was modified.
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * See try_to_grab_pending() for details.
+ */
+bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			 struct delayed_work *dwork, unsigned long delay)
+{
+	unsigned long flags;
+	int ret;
+
+	do {
+		ret = try_to_grab_pending(&dwork->work, true, &flags);
+	} while (unlikely(ret == -EAGAIN));
+
+	if (likely(ret >= 0)) {
+		__queue_delayed_work(cpu, wq, dwork, delay);
+		local_irq_restore(flags);
+	}
+
+	/* -ENOENT from try_to_grab_pending() becomes %true */
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work_on);
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
+		      unsigned long delay)
+{
+	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work);
+
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
-- 
cgit v1.2.3


From 41f63c5359d14ca995172b8f6eaffd93f60fec54 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Fri, 3 Aug 2012 10:30:47 -0700
Subject: workqueue: use mod_delayed_work() instead of cancel + queue

Convert delayed_work users doing cancel_delayed_work() followed by
queue_delayed_work() to mod_delayed_work().

Most conversions are straight-forward.  Ones worth mentioning are,

* drivers/edac/edac_mc.c: edac_mc_workq_setup() converted to always
  use mod_delayed_work() and cancel loop in
  edac_mc_reset_delay_period() is dropped.

* drivers/platform/x86/thinkpad_acpi.c: No need to remember whether
  watchdog is active or not.  @fan_watchdog_active and related code
  dropped.

* drivers/power/charger-manager.c: Seemingly a lot of
  delayed_work_pending() abuse going on here.
  [delayed_]work_pending() are unsynchronized and racy when used like
  this.  I converted one instance in fullbatt_handler().  Please
  conver the rest so that it invokes workqueue APIs for the intended
  target state rather than trying to game work item pending state
  transitions.  e.g. if timer should be modified - call
  mod_delayed_work(), canceled - call cancel_delayed_work[_sync]().

* drivers/thermal/thermal_sys.c: thermal_zone_device_set_polling()
  simplified.  Note that round_jiffies() calls in this function are
  meaningless.  round_jiffies() work on absolute jiffies not delta
  delay used by delayed_work.

v2: Tomi pointed out that __cancel_delayed_work() users can't be
    safely converted to mod_delayed_work().  They could be calling it
    from irq context and if that happens while delayed_work_timer_fn()
    is running, it could deadlock.  __cancel_delayed_work() users are
    dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Roland Dreier <roland@kernel.org>
Cc: "John W. Linville" <linville@tuxdriver.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
---
 block/genhd.c                          |  6 ++----
 drivers/edac/edac_mc.c                 | 17 +----------------
 drivers/infiniband/core/addr.c         |  4 +---
 drivers/infiniband/hw/nes/nes_hw.c     |  6 ++----
 drivers/infiniband/hw/nes/nes_nic.c    |  5 ++---
 drivers/net/wireless/ipw2x00/ipw2100.c |  8 +++-----
 drivers/net/wireless/zd1211rw/zd_usb.c |  3 +--
 drivers/platform/x86/thinkpad_acpi.c   | 20 +++++---------------
 drivers/power/charger-manager.c        |  9 +++------
 drivers/power/ds2760_battery.c         |  9 +++------
 drivers/power/jz4740-battery.c         |  6 ++----
 drivers/thermal/thermal_sys.c          | 15 ++++++---------
 fs/afs/callback.c                      |  4 +---
 fs/afs/server.c                        | 10 ++--------
 fs/afs/vlocation.c                     | 14 +++-----------
 fs/nfs/nfs4renewd.c                    |  3 +--
 net/core/dst.c                         |  4 ++--
 net/rfkill/input.c                     |  3 +--
 18 files changed, 41 insertions(+), 105 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index cac7366957c3..5d8b44a6442b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1534,10 +1534,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
-	if (!ev->block) {
-		cancel_delayed_work(&ev->dwork);
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
-	}
+	if (!ev->block)
+		mod_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
 	spin_unlock_irq(&ev->lock);
 }
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 616d90bcb3a4..7c0df4af9ef7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -538,7 +538,7 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 		return;
 
 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
-	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
+	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 }
 
 /*
@@ -578,21 +578,6 @@ void edac_mc_reset_delay_period(int value)
 
 	mutex_lock(&mem_ctls_mutex);
 
-	/* scan the list and turn off all workq timers, doing so under lock
-	 */
-	list_for_each(item, &mc_devices) {
-		mci = list_entry(item, struct mem_ctl_info, link);
-
-		if (mci->op_state == OP_RUNNING_POLL)
-			cancel_delayed_work(&mci->work);
-	}
-
-	mutex_unlock(&mem_ctls_mutex);
-
-
-	/* re-walk the list, and reset the poll delay */
-	mutex_lock(&mem_ctls_mutex);
-
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 28058ae33d38..eaec8d7a3b73 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -152,13 +152,11 @@ static void set_timeout(unsigned long time)
 {
 	unsigned long delay;
 
-	cancel_delayed_work(&work);
-
 	delay = time - jiffies;
 	if ((long)delay <= 0)
 		delay = 1;
 
-	queue_delayed_work(addr_wq, &work, delay);
+	mod_delayed_work(addr_wq, &work, delay);
 }
 
 static void queue_req(struct addr_req *req)
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index d42c9f435b1b..9e0895b45eb8 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2679,11 +2679,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 			}
 		}
 		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
-			if (nesdev->link_recheck)
-				cancel_delayed_work(&nesdev->work);
 			nesdev->link_recheck = 1;
-			schedule_delayed_work(&nesdev->work,
-					      NES_LINK_RECHECK_DELAY);
+			mod_delayed_work(system_wq, &nesdev->work,
+					 NES_LINK_RECHECK_DELAY);
 		}
 	}
 
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index f3a3ecf8d09e..e43f6e41a6bd 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -243,10 +243,9 @@ static int nes_netdev_open(struct net_device *netdev)
 
 	spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
 	if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
-		if (nesdev->link_recheck)
-			cancel_delayed_work(&nesdev->work);
 		nesdev->link_recheck = 1;
-		schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
+		mod_delayed_work(system_wq, &nesdev->work,
+				 NES_LINK_RECHECK_DELAY);
 	}
 	spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 95aa8e1683ec..8a3420257eeb 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -2180,8 +2180,7 @@ static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
 
 	/* Make sure the RF Kill check timer is running */
 	priv->stop_rf_kill = 0;
-	cancel_delayed_work(&priv->rf_kill);
-	schedule_delayed_work(&priv->rf_kill, round_jiffies_relative(HZ));
+	mod_delayed_work(system_wq, &priv->rf_kill, round_jiffies_relative(HZ));
 }
 
 static void send_scan_event(void *data)
@@ -4321,9 +4320,8 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio)
 					  "disabled by HW switch\n");
 			/* Make sure the RF_KILL check timer is running */
 			priv->stop_rf_kill = 0;
-			cancel_delayed_work(&priv->rf_kill);
-			schedule_delayed_work(&priv->rf_kill,
-					      round_jiffies_relative(HZ));
+			mod_delayed_work(system_wq, &priv->rf_kill,
+					 round_jiffies_relative(HZ));
 		} else
 			schedule_reset(priv);
 	}
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index af83c43bcdb1..ef2b171e3514 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -1164,8 +1164,7 @@ void zd_usb_reset_rx_idle_timer(struct zd_usb *usb)
 {
 	struct zd_usb_rx *rx = &usb->rx;
 
-	cancel_delayed_work(&rx->idle_work);
-	queue_delayed_work(zd_workqueue, &rx->idle_work, ZD_RX_IDLE_INTERVAL);
+	mod_delayed_work(zd_workqueue, &rx->idle_work, ZD_RX_IDLE_INTERVAL);
 }
 
 static inline void init_usb_interrupt(struct zd_usb *usb)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index e7f73287636c..06d2502ffb37 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -7682,25 +7682,15 @@ static int fan_set_speed(int speed)
 
 static void fan_watchdog_reset(void)
 {
-	static int fan_watchdog_active;
-
 	if (fan_control_access_mode == TPACPI_FAN_WR_NONE)
 		return;
 
-	if (fan_watchdog_active)
-		cancel_delayed_work(&fan_watchdog_task);
-
 	if (fan_watchdog_maxinterval > 0 &&
-	    tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
-		fan_watchdog_active = 1;
-		if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task,
-				msecs_to_jiffies(fan_watchdog_maxinterval
-						 * 1000))) {
-			pr_err("failed to queue the fan watchdog, "
-			       "watchdog will not trigger\n");
-		}
-	} else
-		fan_watchdog_active = 0;
+	    tpacpi_lifecycle != TPACPI_LIFE_EXITING)
+		mod_delayed_work(tpacpi_wq, &fan_watchdog_task,
+			msecs_to_jiffies(fan_watchdog_maxinterval * 1000));
+	else
+		cancel_delayed_work(&fan_watchdog_task);
 }
 
 static void fan_watchdog_fire(struct work_struct *ignored)
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 526e5c931294..7ff83cf43c8c 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -509,9 +509,8 @@ static void _setup_polling(struct work_struct *work)
 	if (!delayed_work_pending(&cm_monitor_work) ||
 	    (delayed_work_pending(&cm_monitor_work) &&
 	     time_after(next_polling, _next_polling))) {
-		cancel_delayed_work_sync(&cm_monitor_work);
 		next_polling = jiffies + polling_jiffy;
-		queue_delayed_work(cm_wq, &cm_monitor_work, polling_jiffy);
+		mod_delayed_work(cm_wq, &cm_monitor_work, polling_jiffy);
 	}
 
 out:
@@ -546,10 +545,8 @@ static void fullbatt_handler(struct charger_manager *cm)
 	if (cm_suspended)
 		device_set_wakeup_capable(cm->dev, true);
 
-	if (delayed_work_pending(&cm->fullbatt_vchk_work))
-		cancel_delayed_work(&cm->fullbatt_vchk_work);
-	queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
-			   msecs_to_jiffies(desc->fullbatt_vchkdrop_ms));
+	mod_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
+			 msecs_to_jiffies(desc->fullbatt_vchkdrop_ms));
 	cm->fullbatt_vchk_jiffies_at = jiffies + msecs_to_jiffies(
 				       desc->fullbatt_vchkdrop_ms);
 
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 076e211a40b7..704e652072be 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -355,8 +355,7 @@ static void ds2760_battery_external_power_changed(struct power_supply *psy)
 
 	dev_dbg(di->dev, "%s\n", __func__);
 
-	cancel_delayed_work(&di->monitor_work);
-	queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10);
+	mod_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10);
 }
 
 
@@ -401,8 +400,7 @@ static void ds2760_battery_set_charged(struct power_supply *psy)
 
 	/* postpone the actual work by 20 secs. This is for debouncing GPIO
 	 * signals and to let the current value settle. See AN4188. */
-	cancel_delayed_work(&di->set_charged_work);
-	queue_delayed_work(di->monitor_wqueue, &di->set_charged_work, HZ * 20);
+	mod_delayed_work(di->monitor_wqueue, &di->set_charged_work, HZ * 20);
 }
 
 static int ds2760_battery_get_property(struct power_supply *psy,
@@ -616,8 +614,7 @@ static int ds2760_battery_resume(struct platform_device *pdev)
 	di->charge_status = POWER_SUPPLY_STATUS_UNKNOWN;
 	power_supply_changed(&di->bat);
 
-	cancel_delayed_work(&di->monitor_work);
-	queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ);
+	mod_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ);
 
 	return 0;
 }
diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c
index 8dbc7bfaab14..ffbed5e5b945 100644
--- a/drivers/power/jz4740-battery.c
+++ b/drivers/power/jz4740-battery.c
@@ -173,16 +173,14 @@ static void jz_battery_external_power_changed(struct power_supply *psy)
 {
 	struct jz_battery *jz_battery = psy_to_jz_battery(psy);
 
-	cancel_delayed_work(&jz_battery->work);
-	schedule_delayed_work(&jz_battery->work, 0);
+	mod_delayed_work(system_wq, &jz_battery->work, 0);
 }
 
 static irqreturn_t jz_battery_charge_irq(int irq, void *data)
 {
 	struct jz_battery *jz_battery = data;
 
-	cancel_delayed_work(&jz_battery->work);
-	schedule_delayed_work(&jz_battery->work, 0);
+	mod_delayed_work(system_wq, &jz_battery->work, 0);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 2ab31e4f02cc..67789b8345d2 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -694,17 +694,14 @@ thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
 					    int delay)
 {
-	cancel_delayed_work(&(tz->poll_queue));
-
-	if (!delay)
-		return;
-
 	if (delay > 1000)
-		queue_delayed_work(system_freezable_wq, &(tz->poll_queue),
-				      round_jiffies(msecs_to_jiffies(delay)));
+		mod_delayed_work(system_freezable_wq, &tz->poll_queue,
+				 round_jiffies(msecs_to_jiffies(delay)));
+	else if (delay)
+		mod_delayed_work(system_freezable_wq, &tz->poll_queue,
+				 msecs_to_jiffies(delay));
 	else
-		queue_delayed_work(system_freezable_wq, &(tz->poll_queue),
-				      msecs_to_jiffies(delay));
+		cancel_delayed_work(&tz->poll_queue);
 }
 
 static void thermal_zone_device_passive(struct thermal_zone_device *tz,
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 587ef5123cd8..7ef637d7f3a5 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work)
  */
 void afs_flush_callback_breaks(struct afs_server *server)
 {
-	cancel_delayed_work(&server->cb_break_work);
-	queue_delayed_work(afs_callback_update_worker,
-			   &server->cb_break_work, 0);
+	mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
 }
 
 #if 0
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d59b7516e943..f342acf3547d 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work)
 		expiry = server->time_of_death + afs_server_timeout;
 		if (expiry > now) {
 			delay = (expiry - now) * HZ;
-			if (!queue_delayed_work(afs_wq, &afs_server_reaper,
-						delay)) {
-				cancel_delayed_work(&afs_server_reaper);
-				queue_delayed_work(afs_wq, &afs_server_reaper,
-						   delay);
-			}
+			mod_delayed_work(afs_wq, &afs_server_reaper, delay);
 			break;
 		}
 
@@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work)
 void __exit afs_purge_servers(void)
 {
 	afs_server_timeout = 0;
-	cancel_delayed_work(&afs_server_reaper);
-	queue_delayed_work(afs_wq, &afs_server_reaper, 0);
+	mod_delayed_work(afs_wq, &afs_server_reaper, 0);
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 431984d2e372..57bcb1596530 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work)
 		if (expiry > now) {
 			delay = (expiry - now) * HZ;
 			_debug("delay %lu", delay);
-			if (!queue_delayed_work(afs_wq, &afs_vlocation_reap,
-						delay)) {
-				cancel_delayed_work(&afs_vlocation_reap);
-				queue_delayed_work(afs_wq, &afs_vlocation_reap,
-						   delay);
-			}
+			mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
 			break;
 		}
 
@@ -614,13 +609,10 @@ void afs_vlocation_purge(void)
 	spin_lock(&afs_vlocation_updates_lock);
 	list_del_init(&afs_vlocation_updates);
 	spin_unlock(&afs_vlocation_updates_lock);
-	cancel_delayed_work(&afs_vlocation_update);
-	queue_delayed_work(afs_vlocation_update_worker,
-			   &afs_vlocation_update, 0);
+	mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
 	destroy_workqueue(afs_vlocation_update_worker);
 
-	cancel_delayed_work(&afs_vlocation_reap);
-	queue_delayed_work(afs_wq, &afs_vlocation_reap, 0);
+	mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
 }
 
 /*
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 6930bec91bca..1720d32ffa54 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 		timeout = 5 * HZ;
 	dprintk("%s: requeueing work. Lease period = %ld\n",
 			__func__, (timeout + HZ - 1) / HZ);
-	cancel_delayed_work(&clp->cl_renewd);
-	schedule_delayed_work(&clp->cl_renewd, timeout);
+	mod_delayed_work(system_wq, &clp->cl_renewd, timeout);
 	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
diff --git a/net/core/dst.c b/net/core/dst.c
index 069d51d29414..ed5a0b409aa3 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -214,8 +214,8 @@ void __dst_free(struct dst_entry *dst)
 	if (dst_garbage.timer_inc > DST_GC_INC) {
 		dst_garbage.timer_inc = DST_GC_INC;
 		dst_garbage.timer_expires = DST_GC_MIN;
-		cancel_delayed_work(&dst_gc_work);
-		schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
+		mod_delayed_work(system_wq, &dst_gc_work,
+				 dst_garbage.timer_expires);
 	}
 	spin_unlock_bh(&dst_garbage.lock);
 }
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 24c55c53e6a2..c9d931e7ffec 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -164,8 +164,7 @@ static void rfkill_schedule_global_op(enum rfkill_sched_op op)
 	rfkill_op_pending = true;
 	if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) {
 		/* bypass the limiter for EPO */
-		cancel_delayed_work(&rfkill_op_work);
-		schedule_delayed_work(&rfkill_op_work, 0);
+		mod_delayed_work(system_wq, &rfkill_op_work, 0);
 		rfkill_last_scheduled = jiffies;
 	} else
 		rfkill_schedule_ratelimited();
-- 
cgit v1.2.3


From 1265057fa02c7bed3b6d9ddc8a2048065a370364 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Wed, 8 Aug 2012 09:38:42 -0700
Subject: workqueue: fix CPU binding of flush_delayed_work[_sync]()

delayed_work encodes the workqueue to use and the last CPU in
delayed_work->work.data while it's on timer.  The target CPU is
implicitly recorded as the CPU the timer is queued on and
delayed_work_timer_fn() queues delayed_work->work to the CPU it is
running on.

Unfortunately, this leaves flush_delayed_work[_sync]() no way to find
out which CPU the delayed_work was queued for when they try to
re-queue after killing the timer.  Currently, it chooses the local CPU
flush is running on.  This can unexpectedly move a delayed_work queued
on a specific CPU to another CPU and lead to subtle errors.

There isn't much point in trying to save several bytes in struct
delayed_work, which is already close to a hundred bytes on 64bit with
all debug options turned off.  This patch adds delayed_work->cpu to
remember the CPU it's queued for.

Note that if the timer is migrated during CPU down, the work item
could be queued to the downed global_cwq after this change.  As a
detached global_cwq behaves like an unbound one, this doesn't change
much for the delayed_work.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 1 +
 kernel/workqueue.c        | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 20000305a8a6..b14d5d59af7c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -102,6 +102,7 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+	int cpu;
 };
 
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41ae2c0979fe..11723c5b2b20 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1319,7 +1319,7 @@ void delayed_work_timer_fn(unsigned long __data)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	local_irq_disable();
-	__queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
 	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
@@ -1356,6 +1356,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	set_work_cwq(work, get_cwq(lcpu, wq), 0);
 
+	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
 	if (unlikely(cpu != WORK_CPU_UNBOUND))
@@ -2997,7 +2998,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(WORK_CPU_UNBOUND,
+		__queue_work(dwork->cpu,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
@@ -3020,7 +3021,7 @@ bool flush_delayed_work_sync(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(WORK_CPU_UNBOUND,
+		__queue_work(dwork->cpu,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work_sync(&dwork->work);
-- 
cgit v1.2.3


From 23657bb192f14b789e4c478def8f11ecc95b4f6c Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 13 Aug 2012 17:08:19 -0700
Subject: workqueue: add missing wmb() in clear_work_data()

Any operation which clears PENDING should be preceded by a wmb to
guarantee that the next PENDING owner sees all the changes made before
PENDING release.

There are only two places where PENDING is cleared -
set_work_cpu_and_clear_pending() and clear_work_data().  The caller of
the former already does smp_wmb() but the latter doesn't have any.

Move the wmb above set_work_cpu_and_clear_pending() into it and add
one to clear_work_data().

There hasn't been any report related to this issue, and, given how
clear_work_data() is used, it is extremely unlikely to have caused any
actual problems on any architecture.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/workqueue.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 11723c5b2b20..4fef9527a620 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -570,11 +570,19 @@ static void set_work_cwq(struct work_struct *work,
 static void set_work_cpu_and_clear_pending(struct work_struct *work,
 					   unsigned int cpu)
 {
+	/*
+	 * The following wmb is paired with the implied mb in
+	 * test_and_set_bit(PENDING) and ensures all updates to @work made
+	 * here are visible to and precede any updates by the next PENDING
+	 * owner.
+	 */
+	smp_wmb();
 	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
+	smp_wmb();	/* see set_work_cpu_and_clear_pending() */
 	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
 }
 
@@ -2182,14 +2190,11 @@ __acquires(&gcwq->lock)
 		wake_up_worker(pool);
 
 	/*
-	 * Record the last CPU and clear PENDING.  The following wmb is
-	 * paired with the implied mb in test_and_set_bit(PENDING) and
-	 * ensures all updates to @work made here are visible to and
-	 * precede any updates by the next PENDING owner.  Also, clear
-	 * PENDING inside @gcwq->lock so that PENDING and queued state
-	 * changes happen together while IRQ is disabled.
+	 * Record the last CPU and clear PENDING which should be the last
+	 * update to @work.  Also, do this inside @gcwq->lock so that
+	 * PENDING and queued state changes happen together while IRQ is
+	 * disabled.
 	 */
-	smp_wmb();
 	set_work_cpu_and_clear_pending(work, gcwq->cpu);
 
 	spin_unlock_irq(&gcwq->lock);
-- 
cgit v1.2.3


From 330dad5b9c9555632578c00e94e85c122561c5c7 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:36 +0900
Subject: workqueue: use enum value to set array size of pools in gcwq

Commit 3270476a6c0ce322354df8679652f060d66526dc ('workqueue: reimplement
WQ_HIGHPRI using a separate worker_pool') introduce separate worker_pool
for HIGHPRI. Although there is NR_WORKER_POOLS enum value which represent
size of pools, definition of worker_pool in gcwq doesn't use it.
Using it makes code robust and prevent future mistakes.
So change code to use this enum value.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4fef9527a620..49d8f4a0110d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -183,7 +183,8 @@ struct global_cwq {
 	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
 						/* L: hash of busy workers */
 
-	struct worker_pool	pools[2];	/* normal and highpri pools */
+	struct worker_pool	pools[NR_WORKER_POOLS];
+						/* normal and highpri pools */
 
 	wait_queue_head_t	rebind_hold;	/* rebind hold wait */
 } ____cacheline_aligned_in_smp;
-- 
cgit v1.2.3


From b75cac9368fa91636e17d0f7950b35d837154e14 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:37 +0900
Subject: workqueue: correct req_cpu in trace_workqueue_queue_work()

When we do tracing workqueue_queue_work(), it records requested cpu.
But, if !(@wq->flag & WQ_UNBOUND) and @cpu is WORK_CPU_UNBOUND,
requested cpu is changed as local cpu.
In case of @wq->flag & WQ_UNBOUND, above change is not occured,
therefore it is reasonable to correct it.

Use temporary local variable for storing requested cpu.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 49d8f4a0110d..c29f2dc0f4fc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1198,6 +1198,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
+	unsigned int req_cpu = cpu;
 
 	/*
 	 * While a work item is PENDING && off queue, a task trying to
@@ -1253,7 +1254,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	/* gcwq determined, get cwq and queue */
 	cwq = get_cwq(gcwq->cpu, wq);
-	trace_workqueue_queue_work(cpu, cwq, work);
+	trace_workqueue_queue_work(req_cpu, cwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
 		spin_unlock(&gcwq->lock);
-- 
cgit v1.2.3


From e42986de481238204f6e0b0f4434da428895c20b Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:38 +0900
Subject: workqueue: change value of lcpu in __queue_delayed_work_on()

We assign cpu id into work struct's data field in __queue_delayed_work_on().
In current implementation, when work is come in first time,
current running cpu id is assigned.
If we do __queue_delayed_work_on() with CPU A on CPU B,
__queue_work() invoked in delayed_work_timer_fn() go into
the following sub-optimal path in case of WQ_NON_REENTRANT.

	gcwq = get_gcwq(cpu);
	if (wq->flags & WQ_NON_REENTRANT &&
		(last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {

Change lcpu to @cpu and rechange lcpu to local cpu if lcpu is WORK_CPU_UNBOUND.
It is sufficient to prevent to go into sub-optimal path.

tj: Slightly rephrased the comment.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c29f2dc0f4fc..99ee9b939264 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1356,9 +1356,15 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *gcwq = get_work_gcwq(work);
 
-		if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
+		/*
+		 * If we cannot get the last gcwq from @work directly,
+		 * select the last CPU such that it avoids unnecessarily
+		 * triggering non-reentrancy check in __queue_work().
+		 */
+		lcpu = cpu;
+		if (gcwq)
 			lcpu = gcwq->cpu;
-		else
+		if (lcpu == WORK_CPU_UNBOUND)
 			lcpu = raw_smp_processor_id();
 	} else {
 		lcpu = WORK_CPU_UNBOUND;
-- 
cgit v1.2.3


From 1aabe902ca3638d862bf0dad5a697d3a8e046b0a Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:39 +0900
Subject: workqueue: introduce system_highpri_wq

Commit 3270476a6c0ce322354df8679652f060d66526dc ('workqueue: reimplement
WQ_HIGHPRI using a separate worker_pool') introduce separate worker pool
for HIGHPRI. When we handle busyworkers for gcwq, it can be normal worker
or highpri worker. But, we don't consider this difference in rebind_workers(),
we use just system_wq for highpri worker. It makes mismatch between
cwq->pool and worker->pool.

It doesn't make error in current implementation, but possible in the future.
Now, we introduce system_highpri_wq to use proper cwq for highpri workers
in rebind_workers(). Following patch fix this issue properly.

tj: Even apart from rebinding, having system_highpri_wq generally
    makes sense.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 99ee9b939264..329c404b68c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -269,12 +269,14 @@ struct workqueue_struct {
 };
 
 struct workqueue_struct *system_wq __read_mostly;
+struct workqueue_struct *system_highpri_wq __read_mostly;
 struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
 struct workqueue_struct *system_unbound_wq __read_mostly;
 struct workqueue_struct *system_freezable_wq __read_mostly;
 struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
+EXPORT_SYMBOL_GPL(system_highpri_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
@@ -3928,6 +3930,7 @@ static int __init init_workqueues(void)
 	}
 
 	system_wq = alloc_workqueue("events", 0, 0);
+	system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
@@ -3936,9 +3939,9 @@ static int __init init_workqueues(void)
 					      WQ_FREEZABLE, 0);
 	system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
 			WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
-	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-	       !system_unbound_wq || !system_freezable_wq ||
-		!system_nrt_freezable_wq);
+	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+	       !system_nrt_wq || !system_unbound_wq || !system_freezable_wq ||
+	       !system_nrt_freezable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From e2b6a6d570f070aa90ac00d2d10b1488512f8520 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:40 +0900
Subject: workqueue: use system_highpri_wq for highpri workers in
 rebind_workers()

In rebind_workers(), we do inserting a work to rebind to cpu for busy workers.
Currently, in this case, we use only system_wq. This makes a possible
error situation as there is mismatch between cwq->pool and worker->pool.

To prevent this, we should use system_highpri_wq for highpri worker
to match theses. This implements it.

tj: Rephrased comment a bit.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 329c404b68c2..8936761b814a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1741,6 +1741,7 @@ retry:
 	/* rebind busy workers */
 	for_each_busy_worker(worker, i, pos, gcwq) {
 		struct work_struct *rebind_work = &worker->rebind_work;
+		struct workqueue_struct *wq;
 
 		/* morph UNBOUND to REBIND */
 		worker->flags &= ~WORKER_UNBOUND;
@@ -1750,11 +1751,20 @@ retry:
 				     work_data_bits(rebind_work)))
 			continue;
 
-		/* wq doesn't matter, use the default one */
 		debug_work_activate(rebind_work);
-		insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
-			    worker->scheduled.next,
-			    work_color_to_flags(WORK_NO_COLOR));
+
+		/*
+		 * wq doesn't really matter but let's keep @worker->pool
+		 * and @cwq->pool consistent for sanity.
+		 */
+		if (worker_pool_pri(worker->pool))
+			wq = system_highpri_wq;
+		else
+			wq = system_wq;
+
+		insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
+			worker->scheduled.next,
+			work_color_to_flags(WORK_NO_COLOR));
 	}
 }
 
-- 
cgit v1.2.3


From 7635d2fd7f0fa63b6ec03050614c314d7139f14a Mon Sep 17 00:00:00 2001
From: Joonsoo Kim
Date: Wed, 15 Aug 2012 23:25:41 +0900
Subject: workqueue: use system_highpri_wq for unbind_work

To speed cpu down processing up, use system_highpri_wq.
As scheduling priority of workers on it is higher than system_wq and
it is not contended by other normal works on this cpu, work on it
is processed faster than system_wq.

tj: CPU up/downs care quite a bit about latency these days.  This
    shouldn't hurt anything and makes sense.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8936761b814a..7da24711038f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3680,7 +3680,7 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 	case CPU_DOWN_PREPARE:
 		/* unbinding should happen on the local CPU */
 		INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn);
-		schedule_work_on(cpu, &unbind_work);
+		queue_work_on(cpu, system_highpri_wq, &unbind_work);
 		flush_work(&unbind_work);
 		break;
 	}
-- 
cgit v1.2.3


From 044c782ce3a901fbd17cbe701c592f582381174d Mon Sep 17 00:00:00 2001
From: Valentin Ilie
Date: Sun, 19 Aug 2012 00:52:42 +0300
Subject: workqueue: fix checkpatch issues

Fixed some checkpatch warnings.

tj: adapted to wq/for-3.7 and massaged pr_xxx() format strings a bit.

Signed-off-by: Valentin Ilie <valentin.ilie@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <1345326762-21747-1-git-send-email-valentin.ilie@gmail.com>
---
 kernel/workqueue.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7da24711038f..de429ba000ee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -269,18 +269,18 @@ struct workqueue_struct {
 };
 
 struct workqueue_struct *system_wq __read_mostly;
-struct workqueue_struct *system_highpri_wq __read_mostly;
-struct workqueue_struct *system_long_wq __read_mostly;
-struct workqueue_struct *system_nrt_wq __read_mostly;
-struct workqueue_struct *system_unbound_wq __read_mostly;
-struct workqueue_struct *system_freezable_wq __read_mostly;
-struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
+struct workqueue_struct *system_highpri_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
+struct workqueue_struct *system_long_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_long_wq);
+struct workqueue_struct *system_nrt_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_nrt_wq);
+struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
@@ -2232,11 +2232,9 @@ __acquires(&gcwq->lock)
 	lock_map_release(&cwq->wq->lockdep_map);
 
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
-		printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
-		       "%s/0x%08x/%d\n",
-		       current->comm, preempt_count(), task_pid_nr(current));
-		printk(KERN_ERR "    last function: ");
-		print_symbol("%s\n", (unsigned long)f);
+		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
+		       "     last function: %pf\n",
+		       current->comm, preempt_count(), task_pid_nr(current), f);
 		debug_show_held_locks(current);
 		dump_stack();
 	}
@@ -2790,8 +2788,8 @@ reflush:
 
 		if (++flush_cnt == 10 ||
 		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
-			pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
-				   wq->name, flush_cnt);
+			pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
+				wq->name, flush_cnt);
 		goto reflush;
 	}
 
@@ -3275,9 +3273,8 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
 	int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
 
 	if (max_active < 1 || max_active > lim)
-		printk(KERN_WARNING "workqueue: max_active %d requested for %s "
-		       "is out of range, clamping between %d and %d\n",
-		       max_active, name, 1, lim);
+		pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
+			max_active, name, 1, lim);
 
 	return clamp_val(max_active, 1, lim);
 }
-- 
cgit v1.2.3


From dbf2576e37da0fcc7aacbfbb9fd5d3de7888a3c1 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 20 Aug 2012 14:51:23 -0700
Subject: workqueue: make all workqueues non-reentrant

By default, each per-cpu part of a bound workqueue operates separately
and a work item may be executing concurrently on different CPUs.  The
behavior avoids some cross-cpu traffic but leads to subtle weirdities
and not-so-subtle contortions in the API.

* There's no sane usefulness in allowing a single work item to be
  executed concurrently on multiple CPUs.  People just get the
  behavior unintentionally and get surprised after learning about it.
  Most either explicitly synchronize or use non-reentrant/ordered
  workqueue but this is error-prone.

* flush_work() can't wait for multiple instances of the same work item
  on different CPUs.  If a work item is executing on cpu0 and then
  queued on cpu1, flush_work() can only wait for the one on cpu1.

  Unfortunately, work items can easily cross CPU boundaries
  unintentionally when the queueing thread gets migrated.  This means
  that if multiple queuers compete, flush_work() can't even guarantee
  that the instance queued right before it is finished before
  returning.

* flush_work_sync() was added to work around some of the deficiencies
  of flush_work().  In addition to the usual flushing, it ensures that
  all currently executing instances are finished before returning.
  This operation is expensive as it has to walk all CPUs and at the
  same time fails to address competing queuer case.

  Incorrectly using flush_work() when flush_work_sync() is necessary
  is an easy error to make and can lead to bugs which are difficult to
  reproduce.

* Similar problems exist for flush_delayed_work[_sync]().

Other than the cross-cpu access concern, there's no benefit in
allowing parallel execution and it's plain silly to have this level of
contortion for workqueue which is widely used from core code to
extremely obscure drivers.

This patch makes all workqueues non-reentrant.  If a work item is
executing on a different CPU when queueing is requested, it is always
queued to that CPU.  This guarantees that any given work item can be
executing on one CPU at maximum and if a work item is queued and
executing, both are on the same CPU.

The only behavior change which may affect workqueue users negatively
is that non-reentrancy overrides the affinity specified by
queue_work_on().  On a reentrant workqueue, the affinity specified by
queue_work_on() is always followed.  Now, if the work item is
executing on one of the CPUs, the work item will be queued there
regardless of the requested affinity.  I've reviewed all workqueue
users which request explicit affinity, and, fortunately, none seems to
be crazy enough to exploit parallel execution of the same work item.

This adds an additional busy_hash lookup if the work item was
previously queued on a different CPU.  This shouldn't be noticeable
under any sane workload.  Work item queueing isn't a very
high-frequency operation and they don't jump across CPUs all the time.
In a micro benchmark to exaggerate this difference - measuring the
time it takes for two work items to repeatedly jump between two CPUs a
number (10M) of times with busy_hash table densely populated, the
difference was around 3%.

While the overhead is measureable, it is only visible in pathological
cases and the difference isn't huge.  This change brings much needed
sanity to workqueue and makes its behavior consistent with timer.  I
think this is the right tradeoff to make.

This enables significant simplification of workqueue API.
Simplification patches will follow.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index de429ba000ee..c4feef9798ea 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1225,14 +1225,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			cpu = raw_smp_processor_id();
 
 		/*
-		 * It's multi cpu.  If @wq is non-reentrant and @work
-		 * was previously on a different cpu, it might still
-		 * be running there, in which case the work needs to
-		 * be queued on that cpu to guarantee non-reentrance.
+		 * It's multi cpu.  If @work was previously on a different
+		 * cpu, it might still be running there, in which case the
+		 * work needs to be queued on that cpu to guarantee
+		 * non-reentrancy.
 		 */
 		gcwq = get_gcwq(cpu);
-		if (wq->flags & WQ_NON_REENTRANT &&
-		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
+		last_gcwq = get_work_gcwq(work);
+
+		if (last_gcwq && last_gcwq != gcwq) {
 			struct worker *worker;
 
 			spin_lock(&last_gcwq->lock);
-- 
cgit v1.2.3


From 606a5020b9bdceb20b4f43e11db0054afa349028 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 20 Aug 2012 14:51:23 -0700
Subject: workqueue: gut flush[_delayed]_work_sync()

Now that all workqueues are non-reentrant, flush[_delayed]_work_sync()
are equivalent to flush[_delayed]_work().  Drop the separate
implementation and make them thin wrappers around
flush[_delayed]_work().

* start_flush_work() no longer takes @wait_executing as the only left
  user - flush_work() - always sets it to %true.

* __cancel_work_timer() uses flush_work() instead of wait_on_work().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  14 +++++-
 kernel/workqueue.c        | 122 ++++------------------------------------------
 2 files changed, 22 insertions(+), 114 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b14d5d59af7c..4f9d3bc161a2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,11 +412,9 @@ extern int keventd_up(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
-extern bool flush_work_sync(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
-extern bool flush_delayed_work_sync(struct delayed_work *work);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
@@ -456,6 +454,18 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+/* used to be different but now identical to flush_work(), deprecated */
+static inline bool flush_work_sync(struct work_struct *work)
+{
+	return flush_work(work);
+}
+
+/* used to be different but now identical to flush_delayed_work(), deprecated */
+static inline bool flush_delayed_work_sync(struct delayed_work *dwork)
+{
+	return flush_delayed_work(dwork);
+}
+
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c4feef9798ea..5f13a9a2c792 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2801,8 +2801,7 @@ reflush:
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
-static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
-			     bool wait_executing)
+static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct global_cwq *gcwq;
@@ -2824,13 +2823,12 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 		cwq = get_work_cwq(work);
 		if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
 			goto already_gone;
-	} else if (wait_executing) {
+	} else {
 		worker = find_worker_executing_work(gcwq, work);
 		if (!worker)
 			goto already_gone;
 		cwq = worker->current_cwq;
-	} else
-		goto already_gone;
+	}
 
 	insert_wq_barrier(cwq, barr, work, worker);
 	spin_unlock_irq(&gcwq->lock);
@@ -2857,15 +2855,8 @@ already_gone:
  * flush_work - wait for a work to finish executing the last queueing instance
  * @work: the work to flush
  *
- * Wait until @work has finished execution.  This function considers
- * only the last queueing instance of @work.  If @work has been
- * enqueued across different CPUs on a non-reentrant workqueue or on
- * multiple workqueues, @work might still be executing on return on
- * some of the CPUs from earlier queueing.
- *
- * If @work was queued only on a non-reentrant, ordered or unbound
- * workqueue, @work is guaranteed to be idle on return if it hasn't
- * been requeued since flush started.
+ * Wait until @work has finished execution.  @work is guaranteed to be idle
+ * on return if it hasn't been requeued since flush started.
  *
  * RETURNS:
  * %true if flush_work() waited for the work to finish execution,
@@ -2878,85 +2869,15 @@ bool flush_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	if (start_flush_work(work, &barr, true)) {
+	if (start_flush_work(work, &barr)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 		return true;
-	} else
-		return false;
-}
-EXPORT_SYMBOL_GPL(flush_work);
-
-static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
-{
-	struct wq_barrier barr;
-	struct worker *worker;
-
-	spin_lock_irq(&gcwq->lock);
-
-	worker = find_worker_executing_work(gcwq, work);
-	if (unlikely(worker))
-		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
-
-	spin_unlock_irq(&gcwq->lock);
-
-	if (unlikely(worker)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-		return true;
-	} else
+	} else {
 		return false;
-}
-
-static bool wait_on_work(struct work_struct *work)
-{
-	bool ret = false;
-	int cpu;
-
-	might_sleep();
-
-	lock_map_acquire(&work->lockdep_map);
-	lock_map_release(&work->lockdep_map);
-
-	for_each_gcwq_cpu(cpu)
-		ret |= wait_on_cpu_work(get_gcwq(cpu), work);
-	return ret;
-}
-
-/**
- * flush_work_sync - wait until a work has finished execution
- * @work: the work to flush
- *
- * Wait until @work has finished execution.  On return, it's
- * guaranteed that all queueing instances of @work which happened
- * before this function is called are finished.  In other words, if
- * @work hasn't been requeued since this function was called, @work is
- * guaranteed to be idle on return.
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_work_sync(struct work_struct *work)
-{
-	struct wq_barrier barr;
-	bool pending, waited;
-
-	/* we'll wait for executions separately, queue barr only if pending */
-	pending = start_flush_work(work, &barr, false);
-
-	/* wait for executions to finish */
-	waited = wait_on_work(work);
-
-	/* wait for the pending one */
-	if (pending) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
 	}
-
-	return pending || waited;
 }
-EXPORT_SYMBOL_GPL(flush_work_sync);
+EXPORT_SYMBOL_GPL(flush_work);
 
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
@@ -2970,14 +2891,14 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 		 * would be waiting for before retrying.
 		 */
 		if (unlikely(ret == -ENOENT))
-			wait_on_work(work);
+			flush_work(work);
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
 	mark_work_canceling(work);
 	local_irq_restore(flags);
 
-	wait_on_work(work);
+	flush_work(work);
 	clear_work_data(work);
 	return ret;
 }
@@ -3029,29 +2950,6 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
-/**
- * flush_delayed_work_sync - wait for a dwork to finish
- * @dwork: the delayed work to flush
- *
- * Delayed timer is cancelled and the pending work is queued for
- * execution immediately.  Other than timer handling, its behavior
- * is identical to flush_work_sync().
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	local_irq_disable();
-	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
-	local_irq_enable();
-	return flush_work_sync(&dwork->work);
-}
-EXPORT_SYMBOL(flush_delayed_work_sync);
-
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From ae930e0f4e66fd540c6fbad9f1e2a7743d8b9afe Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 20 Aug 2012 14:51:23 -0700
Subject: workqueue: gut system_nrt[_freezable]_wq()

Now that all workqueues are non-reentrant, system[_freezable]_wq() are
equivalent to system_nrt[_freezable]_wq().  Replace the latter with
wrappers around system[_freezable]_wq().  The wrapping goes through
inline functions so that __deprecated can be added easily.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 23 ++++++++++++++---------
 kernel/workqueue.c        | 10 +---------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f9d3bc161a2..855fcdaa2d72 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,10 +297,6 @@ enum {
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
- * system_nrt_wq is non-reentrant and guarantees that any given work
- * item is never executed in parallel by multiple CPUs.  Queue
- * flushing might take relatively long.
- *
  * system_unbound_wq is unbound workqueue.  Workers are not bound to
  * any specific CPU, not concurrency managed, and all queued works are
  * executed immediately as long as max_active limit is not reached and
@@ -308,16 +304,25 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
- *
- * system_nrt_freezable_wq is equivalent to system_nrt_wq except that
- * it's freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
-extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
-extern struct workqueue_struct *system_nrt_freezable_wq;
+
+static inline struct workqueue_struct *__system_nrt_wq(void)
+{
+	return system_wq;
+}
+
+static inline struct workqueue_struct *__system_nrt_freezable_wq(void)
+{
+	return system_freezable_wq;
+}
+
+/* equivlalent to system_wq and system_freezable_wq, deprecated */
+#define system_nrt_wq			__system_nrt_wq()
+#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5f13a9a2c792..85bd3409b9f5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -274,14 +274,10 @@ struct workqueue_struct *system_highpri_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
 struct workqueue_struct *system_long_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_long_wq);
-struct workqueue_struct *system_nrt_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_nrt_wq);
 struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
-struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3838,16 +3834,12 @@ static int __init init_workqueues(void)
 	system_wq = alloc_workqueue("events", 0, 0);
 	system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
-	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
-	system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
-			WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
 	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-	       !system_nrt_wq || !system_unbound_wq || !system_freezable_wq ||
-	       !system_nrt_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 43829731dd372d04d6706c51052b9dabab9ca356 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 20 Aug 2012 14:51:24 -0700
Subject: workqueue: deprecate flush[_delayed]_work_sync()

flush[_delayed]_work_sync() are now spurious.  Mark them deprecated
and convert all users to flush[_delayed]_work().

If you're cc'd and wondering what's going on: Now all workqueues are
non-reentrant and the regular flushes guarantee that the work item is
not pending or running on any CPU on return, so there's no reason to
use the sync flushes at all and they're going away.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mattia Dongili <malattia@linux.it>
Cc: Kent Yoder <key@linux.vnet.ibm.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: Bryan Wu <bryan.wu@canonical.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-wireless@vger.kernel.org
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Petr Vandrovec <petr@vandrovec.name>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Avi Kivity <avi@redhat.com> 
---
 arch/arm/mach-pxa/sharpsl_pm.c                  |  4 ++--
 arch/arm/plat-omap/mailbox.c                    |  2 +-
 arch/sh/drivers/push-switch.c                   |  2 +-
 drivers/block/xen-blkfront.c                    |  4 ++--
 drivers/cdrom/gdrom.c                           |  2 +-
 drivers/char/sonypi.c                           |  2 +-
 drivers/char/tpm/tpm.c                          |  4 ++--
 drivers/gpu/drm/exynos/exynos_drm_g2d.c         |  2 +-
 drivers/gpu/drm/nouveau/nouveau_gpio.c          |  2 +-
 drivers/gpu/drm/radeon/radeon_irq_kms.c         |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c              |  2 +-
 drivers/hid/hid-picolcd.c                       |  2 +-
 drivers/input/touchscreen/wm831x-ts.c           |  2 +-
 drivers/isdn/mISDN/hwchannel.c                  |  4 ++--
 drivers/leds/leds-lm3533.c                      |  6 +++---
 drivers/leds/leds-lp8788.c                      |  2 +-
 drivers/leds/leds-wm8350.c                      |  2 +-
 drivers/macintosh/ams/ams-core.c                |  2 +-
 drivers/md/dm-mpath.c                           |  2 +-
 drivers/md/dm-raid1.c                           |  2 +-
 drivers/md/dm-stripe.c                          |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c            |  4 ++--
 drivers/media/dvb/mantis/mantis_evm.c           |  2 +-
 drivers/media/dvb/mantis/mantis_uart.c          |  2 +-
 drivers/media/video/bt8xx/bttv-driver.c         |  2 +-
 drivers/media/video/cx18/cx18-driver.c          |  2 +-
 drivers/media/video/cx231xx/cx231xx-cards.c     |  2 +-
 drivers/media/video/cx23885/cx23885-input.c     |  6 +++---
 drivers/media/video/cx88/cx88-mpeg.c            |  2 +-
 drivers/media/video/em28xx/em28xx-cards.c       |  2 +-
 drivers/media/video/omap24xxcam.c               |  6 +++---
 drivers/media/video/saa7134/saa7134-core.c      |  2 +-
 drivers/media/video/saa7134/saa7134-empress.c   |  2 +-
 drivers/media/video/tm6000/tm6000-cards.c       |  2 +-
 drivers/mfd/menelaus.c                          |  4 ++--
 drivers/misc/ioc4.c                             |  2 +-
 drivers/mtd/mtdoops.c                           |  4 ++--
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c |  2 +-
 drivers/net/ethernet/neterion/vxge/vxge-main.c  |  2 +-
 drivers/net/ethernet/sun/cassini.c              |  2 +-
 drivers/net/ethernet/sun/niu.c                  |  2 +-
 drivers/net/wireless/hostap/hostap_ap.c         |  4 ++--
 drivers/net/wireless/hostap/hostap_hw.c         | 10 +++++-----
 drivers/power/collie_battery.c                  |  2 +-
 drivers/power/tosa_battery.c                    |  2 +-
 drivers/power/wm97xx_battery.c                  |  2 +-
 drivers/power/z2_battery.c                      |  2 +-
 drivers/regulator/core.c                        |  2 +-
 drivers/scsi/arcmsr/arcmsr_hba.c                |  4 ++--
 drivers/scsi/ipr.c                              |  2 +-
 drivers/scsi/pmcraid.c                          |  2 +-
 drivers/scsi/qla2xxx/qla_target.c               |  2 +-
 drivers/tty/hvc/hvsi.c                          |  2 +-
 drivers/tty/ipwireless/hardware.c               |  2 +-
 drivers/tty/ipwireless/network.c                |  4 ++--
 drivers/tty/serial/kgdboc.c                     |  2 +-
 drivers/tty/serial/omap-serial.c                |  2 +-
 drivers/tty/tty_ldisc.c                         |  6 +++---
 drivers/usb/atm/speedtch.c                      |  2 +-
 drivers/usb/atm/ueagle-atm.c                    |  2 +-
 drivers/usb/gadget/u_ether.c                    |  2 +-
 drivers/usb/host/ohci-hcd.c                     |  2 +-
 drivers/usb/otg/isp1301_omap.c                  |  2 +-
 fs/affs/super.c                                 |  2 +-
 fs/gfs2/lock_dlm.c                              |  2 +-
 fs/gfs2/super.c                                 |  2 +-
 fs/hfs/inode.c                                  |  2 +-
 fs/ncpfs/inode.c                                |  6 +++---
 fs/ocfs2/cluster/quorum.c                       |  2 +-
 fs/xfs/xfs_super.c                              |  2 +-
 fs/xfs/xfs_sync.c                               |  2 +-
 include/linux/workqueue.h                       |  4 ++--
 net/9p/trans_fd.c                               |  2 +-
 net/dsa/dsa.c                                   |  2 +-
 sound/i2c/other/ak4113.c                        |  2 +-
 sound/i2c/other/ak4114.c                        |  2 +-
 sound/pci/oxygen/oxygen_lib.c                   |  8 ++++----
 sound/soc/codecs/wm8350.c                       |  2 +-
 sound/soc/codecs/wm8753.c                       |  2 +-
 sound/soc/soc-core.c                            |  6 +++---
 virt/kvm/eventfd.c                              |  2 +-
 81 files changed, 111 insertions(+), 111 deletions(-)

diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index bdf4cb88ca0a..7875ad6456bd 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -579,8 +579,8 @@ static int sharpsl_ac_check(void)
 static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	sharpsl_pm.flags |= SHARPSL_SUSPENDED;
-	flush_delayed_work_sync(&toggle_charger);
-	flush_delayed_work_sync(&sharpsl_bat);
+	flush_delayed_work(&toggle_charger);
+	flush_delayed_work(&sharpsl_bat);
 
 	if (sharpsl_pm.charge_mode == CHRG_ON)
 		sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG;
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 5e13c3884aa4..42377ef9ea3d 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -310,7 +310,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
 		omap_mbox_disable_irq(mbox, IRQ_RX);
 		free_irq(mbox->irq, mbox);
 		tasklet_kill(&mbox->txq->tasklet);
-		flush_work_sync(&mbox->rxq->work);
+		flush_work(&mbox->rxq->work);
 		mbox_queue_free(mbox->txq);
 		mbox_queue_free(mbox->rxq);
 	}
diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c
index 637b79b09657..5bfb341cc5c4 100644
--- a/arch/sh/drivers/push-switch.c
+++ b/arch/sh/drivers/push-switch.c
@@ -107,7 +107,7 @@ static int switch_drv_remove(struct platform_device *pdev)
 		device_remove_file(&pdev->dev, &dev_attr_switch);
 
 	platform_set_drvdata(pdev, NULL);
-	flush_work_sync(&psw->work);
+	flush_work(&psw->work);
 	del_timer_sync(&psw->debounce);
 	free_irq(irq, pdev);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2c2d2e5c1597..007db8986e84 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -670,7 +670,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
 	spin_unlock_irqrestore(&info->io_lock, flags);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_work_sync(&info->work);
+	flush_work(&info->work);
 
 	del_gendisk(info->gd);
 
@@ -719,7 +719,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	spin_unlock_irq(&info->io_lock);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_work_sync(&info->work);
+	flush_work(&info->work);
 
 	/* Free resources associated with old device channel. */
 	if (info->ring_ref != GRANT_INVALID_REF) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3ceaf006e7f0..75d485afe56c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -840,7 +840,7 @@ probe_fail_no_mem:
 
 static int __devexit remove_gdrom(struct platform_device *devptr)
 {
-	flush_work_sync(&work);
+	flush_work(&work);
 	blk_cleanup_queue(gd.gdrom_rq);
 	free_irq(HW_EVENT_GDROM_CMD, &gd);
 	free_irq(HW_EVENT_GDROM_DMA, &gd);
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index f87780502b41..320debbe32fa 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1433,7 +1433,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	sonypi_disable();
 
 	synchronize_irq(sonypi_device.irq);
-	flush_work_sync(&sonypi_device.input_work);
+	flush_work(&sonypi_device.input_work);
 
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 817f0ee202b6..3af9f4d1a23f 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1172,7 +1172,7 @@ int tpm_release(struct inode *inode, struct file *file)
 	struct tpm_chip *chip = file->private_data;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
-	flush_work_sync(&chip->work);
+	flush_work(&chip->work);
 	file->private_data = NULL;
 	atomic_set(&chip->data_pending, 0);
 	kfree(chip->data_buffer);
@@ -1225,7 +1225,7 @@ ssize_t tpm_read(struct file *file, char __user *buf,
 	int rc;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
-	flush_work_sync(&chip->work);
+	flush_work(&chip->work);
 	ret_size = atomic_read(&chip->data_pending);
 	atomic_set(&chip->data_pending, 0);
 	if (ret_size > 0) {	/* relay data */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index d2d88f22a037..a40808e51338 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -908,7 +908,7 @@ static int g2d_suspend(struct device *dev)
 		/* FIXME: good range? */
 		usleep_range(500, 1000);
 
-	flush_work_sync(&g2d->runqueue_work);
+	flush_work(&g2d->runqueue_work);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_gpio.c b/drivers/gpu/drm/nouveau/nouveau_gpio.c
index 82c19e82ff02..0fe4e17c461d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gpio.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gpio.c
@@ -302,7 +302,7 @@ nouveau_gpio_isr_del(struct drm_device *dev, int idx, u8 tag, u8 line,
 		spin_unlock_irqrestore(&pgpio->lock, flags);
 
 		list_for_each_entry_safe(isr, tmp, &tofree, head) {
-			flush_work_sync(&isr->work);
+			flush_work(&isr->work);
 			kfree(isr);
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index afaa1727abd2..50b596ec7b7e 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -277,7 +277,7 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 		if (rdev->msi_enabled)
 			pci_disable_msi(rdev->pdev);
 	}
-	flush_work_sync(&rdev->hotplug_work);
+	flush_work(&rdev->hotplug_work);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 3c447bf317cb..a32f2e96dd02 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -594,7 +594,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->dirty.active = false;
 	spin_unlock_irqrestore(&par->dirty.lock, flags);
 
-	flush_delayed_work_sync(&info->deferred_work);
+	flush_delayed_work(&info->deferred_work);
 
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index 27c8ebdfad01..3e90dee30ff6 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -2737,7 +2737,7 @@ static void __exit picolcd_exit(void)
 {
 	hid_unregister_driver(&picolcd_driver);
 #ifdef CONFIG_HID_PICOLCD_FB
-	flush_work_sync(&picolcd_fb_cleanup);
+	flush_work(&picolcd_fb_cleanup);
 	WARN_ON(fb_pending);
 #endif
 }
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index e83410721e38..52abb98a8ae5 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -221,7 +221,7 @@ static void wm831x_ts_input_close(struct input_dev *idev)
 	synchronize_irq(wm831x_ts->pd_irq);
 
 	/* Make sure the IRQ completion work is quiesced */
-	flush_work_sync(&wm831x_ts->pd_data_work);
+	flush_work(&wm831x_ts->pd_data_work);
 
 	/* If we ended up with the pen down then make sure we revert back
 	 * to pen detection state for the next time we start up.
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
index ef34fd40867c..4b85f7279f18 100644
--- a/drivers/isdn/mISDN/hwchannel.c
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -116,7 +116,7 @@ mISDN_freedchannel(struct dchannel *ch)
 	}
 	skb_queue_purge(&ch->squeue);
 	skb_queue_purge(&ch->rqueue);
-	flush_work_sync(&ch->workq);
+	flush_work(&ch->workq);
 	return 0;
 }
 EXPORT_SYMBOL(mISDN_freedchannel);
@@ -157,7 +157,7 @@ mISDN_freebchannel(struct bchannel *ch)
 	mISDN_clear_bchannel(ch);
 	skb_queue_purge(&ch->rqueue);
 	ch->rcount = 0;
-	flush_work_sync(&ch->workq);
+	flush_work(&ch->workq);
 	return 0;
 }
 EXPORT_SYMBOL(mISDN_freebchannel);
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
index f56b6e7ffdac..f6837b99908c 100644
--- a/drivers/leds/leds-lm3533.c
+++ b/drivers/leds/leds-lm3533.c
@@ -737,7 +737,7 @@ err_sysfs_remove:
 	sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
 err_unregister:
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return ret;
 }
@@ -751,7 +751,7 @@ static int __devexit lm3533_led_remove(struct platform_device *pdev)
 	lm3533_ctrlbank_disable(&led->cb);
 	sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return 0;
 }
@@ -765,7 +765,7 @@ static void lm3533_led_shutdown(struct platform_device *pdev)
 
 	lm3533_ctrlbank_disable(&led->cb);
 	lm3533_led_set(&led->cdev, LED_OFF);		/* disable blink */
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 }
 
 static struct platform_driver lm3533_led_driver = {
diff --git a/drivers/leds/leds-lp8788.c b/drivers/leds/leds-lp8788.c
index 53bd136f1ef0..cb76bc46a021 100644
--- a/drivers/leds/leds-lp8788.c
+++ b/drivers/leds/leds-lp8788.c
@@ -172,7 +172,7 @@ static int __devexit lp8788_led_remove(struct platform_device *pdev)
 	struct lp8788_led *led = platform_get_drvdata(pdev);
 
 	led_classdev_unregister(&led->led_dev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return 0;
 }
diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c
index 918d4baff1c7..4c62113f7a77 100644
--- a/drivers/leds/leds-wm8350.c
+++ b/drivers/leds/leds-wm8350.c
@@ -275,7 +275,7 @@ static int wm8350_led_remove(struct platform_device *pdev)
 	struct wm8350_led *led = platform_get_drvdata(pdev);
 
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 	wm8350_led_disable(led);
 	regulator_put(led->dcdc);
 	regulator_put(led->isink);
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 5c6a2d876562..36a4fdddd64a 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -226,7 +226,7 @@ void ams_sensor_detach(void)
 	 * We do this after ams_info.exit(), because an interrupt might
 	 * have arrived before disabling them.
 	 */
-	flush_work_sync(&ams_info.worker);
+	flush_work(&ams_info.worker);
 
 	/* Remove device */
 	of_device_unregister(ams_info.of_dev);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d8abb90a6c2f..bb18eafcc85a 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -944,7 +944,7 @@ static void flush_multipath_work(struct multipath *m)
 	flush_workqueue(kmpath_handlerd);
 	multipath_wait_for_pg_init_completion(m);
 	flush_workqueue(kmultipathd);
-	flush_work_sync(&m->trigger_event);
+	flush_work(&m->trigger_event);
 }
 
 static void multipath_dtr(struct dm_target *ti)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index bc5ddba8045b..fd61f98ee1f6 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1146,7 +1146,7 @@ static void mirror_dtr(struct dm_target *ti)
 
 	del_timer_sync(&ms->timer);
 	flush_workqueue(ms->kmirrord_wq);
-	flush_work_sync(&ms->trigger_event);
+	flush_work(&ms->trigger_event);
 	dm_kcopyd_client_destroy(ms->kcopyd_client);
 	destroy_workqueue(ms->kmirrord_wq);
 	free_context(ms, ti, ms->nr_mirrors);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a087bf2a8d66..e2f876539743 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -199,7 +199,7 @@ static void stripe_dtr(struct dm_target *ti)
 	for (i = 0; i < sc->stripes; i++)
 		dm_put_device(ti, sc->stripe[i].dev);
 
-	flush_work_sync(&sc->trigger_event);
+	flush_work(&sc->trigger_event);
 	kfree(sc);
 }
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 8766ce8c354d..c2117688aa23 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1329,8 +1329,8 @@ static int dvb_net_remove_if(struct dvb_net *dvbnet, unsigned long num)
 		return -EBUSY;
 
 	dvb_net_stop(net);
-	flush_work_sync(&priv->set_multicast_list_wq);
-	flush_work_sync(&priv->restart_net_feed_wq);
+	flush_work(&priv->set_multicast_list_wq);
+	flush_work(&priv->restart_net_feed_wq);
 	printk("dvb_net: removed network interface %s\n", net->name);
 	unregister_netdev(net);
 	dvbnet->state[num]=0;
diff --git a/drivers/media/dvb/mantis/mantis_evm.c b/drivers/media/dvb/mantis/mantis_evm.c
index 71ce52875c38..909ff54868a3 100644
--- a/drivers/media/dvb/mantis/mantis_evm.c
+++ b/drivers/media/dvb/mantis/mantis_evm.c
@@ -111,7 +111,7 @@ void mantis_evmgr_exit(struct mantis_ca *ca)
 	struct mantis_pci *mantis = ca->ca_priv;
 
 	dprintk(MANTIS_DEBUG, 1, "Mantis Host I/F Event manager exiting");
-	flush_work_sync(&ca->hif_evm_work);
+	flush_work(&ca->hif_evm_work);
 	mantis_hif_exit(ca);
 	mantis_pcmcia_exit(ca);
 }
diff --git a/drivers/media/dvb/mantis/mantis_uart.c b/drivers/media/dvb/mantis/mantis_uart.c
index 18340dafa426..85e977861b4a 100644
--- a/drivers/media/dvb/mantis/mantis_uart.c
+++ b/drivers/media/dvb/mantis/mantis_uart.c
@@ -183,6 +183,6 @@ void mantis_uart_exit(struct mantis_pci *mantis)
 {
 	/* disable interrupt */
 	mmwrite(mmread(MANTIS_UART_CTL) & 0xffef, MANTIS_UART_CTL);
-	flush_work_sync(&mantis->uart_work);
+	flush_work(&mantis->uart_work);
 }
 EXPORT_SYMBOL_GPL(mantis_uart_exit);
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index b58ff87db771..2ce7179a3864 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -196,7 +196,7 @@ static void request_modules(struct bttv *dev)
 
 static void flush_request_modules(struct bttv *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 7e5ffd6f5178..75c890907920 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -272,7 +272,7 @@ static void request_modules(struct cx18 *dev)
 
 static void flush_request_modules(struct cx18 *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index 02d4d36735d3..b84ebc54d91b 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -1002,7 +1002,7 @@ static void request_modules(struct cx231xx *dev)
 
 static void flush_request_modules(struct cx231xx *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx23885/cx23885-input.c b/drivers/media/video/cx23885/cx23885-input.c
index ce765e3f77bd..bcbf7faf1bab 100644
--- a/drivers/media/video/cx23885/cx23885-input.c
+++ b/drivers/media/video/cx23885/cx23885-input.c
@@ -231,9 +231,9 @@ static void cx23885_input_ir_stop(struct cx23885_dev *dev)
 		v4l2_subdev_call(dev->sd_ir, ir, rx_s_parameters, &params);
 		v4l2_subdev_call(dev->sd_ir, ir, rx_g_parameters, &params);
 	}
-	flush_work_sync(&dev->cx25840_work);
-	flush_work_sync(&dev->ir_rx_work);
-	flush_work_sync(&dev->ir_tx_work);
+	flush_work(&dev->cx25840_work);
+	flush_work(&dev->ir_rx_work);
+	flush_work(&dev->ir_tx_work);
 }
 
 static void cx23885_input_ir_close(struct rc_dev *rc)
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index cd5386ee210c..c04fb618e10b 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -70,7 +70,7 @@ static void request_modules(struct cx8802_dev *dev)
 
 static void flush_request_modules(struct cx8802_dev *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index ca62b9981380..f7831e73f077 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -2900,7 +2900,7 @@ static void request_modules(struct em28xx *dev)
 
 static void flush_request_modules(struct em28xx *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c
index e5015b0d5508..8d7283bbd431 100644
--- a/drivers/media/video/omap24xxcam.c
+++ b/drivers/media/video/omap24xxcam.c
@@ -1198,7 +1198,7 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 
 	atomic_inc(&cam->reset_disable);
 
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	rval = videobuf_streamoff(q);
 	if (!rval) {
@@ -1512,7 +1512,7 @@ static int omap24xxcam_release(struct file *file)
 
 	atomic_inc(&cam->reset_disable);
 
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	/* stop streaming capture */
 	videobuf_streamoff(&fh->vbq);
@@ -1536,7 +1536,7 @@ static int omap24xxcam_release(struct file *file)
 	 * not be scheduled anymore since streaming is already
 	 * disabled.)
 	 */
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	mutex_lock(&cam->mutex);
 	if (atomic_dec_return(&cam->users) == 0) {
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 5fbb4e49495c..f2b37e05b964 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -170,7 +170,7 @@ static void request_submodules(struct saa7134_dev *dev)
 
 static void flush_request_submodules(struct saa7134_dev *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 
 #else
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index dde361a9194e..4df79c656909 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -556,7 +556,7 @@ static int empress_fini(struct saa7134_dev *dev)
 
 	if (NULL == dev->empress_dev)
 		return 0;
-	flush_work_sync(&dev->empress_workqueue);
+	flush_work(&dev->empress_workqueue);
 	video_unregister_device(dev->empress_dev);
 	dev->empress_dev = NULL;
 	return 0;
diff --git a/drivers/media/video/tm6000/tm6000-cards.c b/drivers/media/video/tm6000/tm6000-cards.c
index 034659b13174..307d8c5fb7cd 100644
--- a/drivers/media/video/tm6000/tm6000-cards.c
+++ b/drivers/media/video/tm6000/tm6000-cards.c
@@ -1074,7 +1074,7 @@ static void request_modules(struct tm6000_core *dev)
 
 static void flush_request_modules(struct tm6000_core *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c
index cb4910ac4d12..55d589981412 100644
--- a/drivers/mfd/menelaus.c
+++ b/drivers/mfd/menelaus.c
@@ -1259,7 +1259,7 @@ static int menelaus_probe(struct i2c_client *client,
 	return 0;
 fail2:
 	free_irq(client->irq, menelaus);
-	flush_work_sync(&menelaus->work);
+	flush_work(&menelaus->work);
 fail1:
 	kfree(menelaus);
 	return err;
@@ -1270,7 +1270,7 @@ static int __exit menelaus_remove(struct i2c_client *client)
 	struct menelaus_chip	*menelaus = i2c_get_clientdata(client);
 
 	free_irq(client->irq, menelaus);
-	flush_work_sync(&menelaus->work);
+	flush_work(&menelaus->work);
 	kfree(menelaus);
 	the_menelaus = NULL;
 	return 0;
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
index df03dd3bd0e2..6a7710603a90 100644
--- a/drivers/misc/ioc4.c
+++ b/drivers/misc/ioc4.c
@@ -487,7 +487,7 @@ static void __exit
 ioc4_exit(void)
 {
 	/* Ensure ioc4_load_modules() has completed before exiting */
-	flush_work_sync(&ioc4_load_modules_work);
+	flush_work(&ioc4_load_modules_work);
 	pci_unregister_driver(&ioc4_driver);
 }
 
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 551e316e4454..438737a1f59a 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -387,8 +387,8 @@ static void mtdoops_notify_remove(struct mtd_info *mtd)
 		printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n");
 
 	cxt->mtd = NULL;
-	flush_work_sync(&cxt->work_erase);
-	flush_work_sync(&cxt->work_write);
+	flush_work(&cxt->work_erase);
+	flush_work(&cxt->work_write);
 }
 
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 6505070abcfa..089ed134369b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -1394,7 +1394,7 @@ static int offload_close(struct t3cdev *tdev)
 	sysfs_remove_group(&tdev->lldev->dev.kobj, &offload_attr_group);
 
 	/* Flush work scheduled while releasing TIDs */
-	flush_work_sync(&td->tid_release_task);
+	flush_work(&td->tid_release_task);
 
 	tdev->lldev = NULL;
 	cxgb3_set_dummy_ops(tdev);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index de2190443510..30ce10bc9149 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3521,7 +3521,7 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
 
 	strncpy(buf, dev->name, IFNAMSIZ);
 
-	flush_work_sync(&vdev->reset_task);
+	flush_work(&vdev->reset_task);
 
 	/* in 2.6 will call stop() if device is up */
 	unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index ce4df61b4b56..c8251be104d6 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -3890,7 +3890,7 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu)
 	schedule_work(&cp->reset_task);
 #endif
 
-	flush_work_sync(&cp->reset_task);
+	flush_work(&cp->reset_task);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index c2a0fe393267..710f35395195 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9932,7 +9932,7 @@ static int niu_suspend(struct pci_dev *pdev, pm_message_t state)
 	if (!netif_running(dev))
 		return 0;
 
-	flush_work_sync(&np->reset_task);
+	flush_work(&np->reset_task);
 	niu_netif_stop(np);
 
 	del_timer_sync(&np->timer);
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index e1f410277242..c6ea995750db 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -860,10 +860,10 @@ void hostap_free_data(struct ap_data *ap)
 		return;
 	}
 
-	flush_work_sync(&ap->add_sta_proc_queue);
+	flush_work(&ap->add_sta_proc_queue);
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
-	flush_work_sync(&ap->wds_oper_queue);
+	flush_work(&ap->wds_oper_queue);
 	if (ap->crypt)
 		ap->crypt->deinit(ap->crypt_priv);
 	ap->crypt = ap->crypt_priv = NULL;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 50f87b60b0bd..8e7000fd4414 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3311,13 +3311,13 @@ static void prism2_free_local_data(struct net_device *dev)
 
 	unregister_netdev(local->dev);
 
-	flush_work_sync(&local->reset_queue);
-	flush_work_sync(&local->set_multicast_list_queue);
-	flush_work_sync(&local->set_tim_queue);
+	flush_work(&local->reset_queue);
+	flush_work(&local->set_multicast_list_queue);
+	flush_work(&local->set_tim_queue);
 #ifndef PRISM2_NO_STATION_MODES
-	flush_work_sync(&local->info_queue);
+	flush_work(&local->info_queue);
 #endif
-	flush_work_sync(&local->comms_qual_update);
+	flush_work(&local->comms_qual_update);
 
 	lib80211_crypt_info_free(&local->crypt_info);
 
diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c
index 74c6b23aeabf..b19bfe400f8c 100644
--- a/drivers/power/collie_battery.c
+++ b/drivers/power/collie_battery.c
@@ -290,7 +290,7 @@ static struct gpio collie_batt_gpios[] = {
 static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state)
 {
 	/* flush all pending status updates */
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c
index 28bbe7e094e3..51199b5ce221 100644
--- a/drivers/power/tosa_battery.c
+++ b/drivers/power/tosa_battery.c
@@ -327,7 +327,7 @@ static struct gpio tosa_bat_gpios[] = {
 static int tosa_bat_suspend(struct platform_device *dev, pm_message_t state)
 {
 	/* flush all pending status updates */
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index d2d4c08c681c..1245fe1f48c3 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -146,7 +146,7 @@ static irqreturn_t wm97xx_chrg_irq(int irq, void *data)
 #ifdef CONFIG_PM
 static int wm97xx_bat_suspend(struct device *dev)
 {
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/z2_battery.c b/drivers/power/z2_battery.c
index 8c9a607ea77a..5757d0d6782f 100644
--- a/drivers/power/z2_battery.c
+++ b/drivers/power/z2_battery.c
@@ -276,7 +276,7 @@ static int z2_batt_suspend(struct device *dev)
 	struct i2c_client *client = to_i2c_client(dev);
 	struct z2_charger *charger = i2c_get_clientdata(client);
 
-	flush_work_sync(&charger->bat_work);
+	flush_work(&charger->bat_work);
 	return 0;
 }
 
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f092588a078c..1d2b70017a8d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3335,7 +3335,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 		regulator_put(rdev->supply);
 	mutex_lock(&regulator_list_mutex);
 	debugfs_remove_recursive(rdev->debugfs);
-	flush_work_sync(&rdev->disable_work.work);
+	flush_work(&rdev->disable_work.work);
 	WARN_ON(rdev->open_count);
 	unset_regulator_supplies(rdev);
 	list_del(&rdev->list);
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index def24a1079ad..33c52bc2c7b4 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -999,7 +999,7 @@ static void arcmsr_remove(struct pci_dev *pdev)
 	int poll_count = 0;
 	arcmsr_free_sysfs_attr(acb);
 	scsi_remove_host(host);
-	flush_work_sync(&acb->arcmsr_do_message_isr_bh);
+	flush_work(&acb->arcmsr_do_message_isr_bh);
 	del_timer_sync(&acb->eternal_timer);
 	arcmsr_disable_outbound_ints(acb);
 	arcmsr_stop_adapter_bgrb(acb);
@@ -1045,7 +1045,7 @@ static void arcmsr_shutdown(struct pci_dev *pdev)
 		(struct AdapterControlBlock *)host->hostdata;
 	del_timer_sync(&acb->eternal_timer);
 	arcmsr_disable_outbound_ints(acb);
-	flush_work_sync(&acb->arcmsr_do_message_isr_bh);
+	flush_work(&acb->arcmsr_do_message_isr_bh);
 	arcmsr_stop_adapter_bgrb(acb);
 	arcmsr_flush_adapter_cache(acb);
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 467dc38246f9..2ffeb9afd1b7 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -9020,7 +9020,7 @@ static void __ipr_remove(struct pci_dev *pdev)
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-	flush_work_sync(&ioa_cfg->work_q);
+	flush_work(&ioa_cfg->work_q);
 	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 
 	spin_lock(&ipr_driver_lock);
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index ea8a0b47d66d..af763eab2039 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -5459,7 +5459,7 @@ static void __devexit pmcraid_remove(struct pci_dev *pdev)
 	pmcraid_shutdown(pdev);
 
 	pmcraid_disable_interrupts(pinstance, ~0);
-	flush_work_sync(&pinstance->worker_q);
+	flush_work(&pinstance->worker_q);
 
 	pmcraid_kill_tasklets(pinstance);
 	pmcraid_unregister_interrupt_handler(pinstance);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 5b30132960c7..bddc97c5c8e9 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -969,7 +969,7 @@ void qlt_stop_phase1(struct qla_tgt *tgt)
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	mutex_unlock(&ha->tgt.tgt_mutex);
 
-	flush_delayed_work_sync(&tgt->sess_del_work);
+	flush_delayed_work(&tgt->sess_del_work);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009,
 	    "Waiting for sess works (tgt %p)", tgt);
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 6f5bc49c441f..1f8e8b37ed23 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -765,7 +765,7 @@ static void hvsi_flush_output(struct hvsi_struct *hp)
 
 	/* 'writer' could still be pending if it didn't see n_outbuf = 0 yet */
 	cancel_delayed_work_sync(&hp->writer);
-	flush_work_sync(&hp->handshaker);
+	flush_work(&hp->handshaker);
 
 	/*
 	 * it's also possible that our timeout expired and hvsi_write_worker
diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c
index 0aeb5a38d296..b4ba0670dc54 100644
--- a/drivers/tty/ipwireless/hardware.c
+++ b/drivers/tty/ipwireless/hardware.c
@@ -1729,7 +1729,7 @@ void ipwireless_hardware_free(struct ipw_hardware *hw)
 
 	ipwireless_stop_interrupts(hw);
 
-	flush_work_sync(&hw->work_rx);
+	flush_work(&hw->work_rx);
 
 	for (i = 0; i < NL_NUM_OF_ADDRESSES; i++)
 		if (hw->packet_assembler[i] != NULL)
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index 57c8b481113f..90ea902ff191 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -430,8 +430,8 @@ void ipwireless_network_free(struct ipw_network *network)
 	network->shutting_down = 1;
 
 	ipwireless_ppp_close(network);
-	flush_work_sync(&network->work_go_online);
-	flush_work_sync(&network->work_go_offline);
+	flush_work(&network->work_go_online);
+	flush_work(&network->work_go_offline);
 
 	ipwireless_stop_interrupts(network->hardware);
 	ipwireless_associate_network(network->hardware, NULL);
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 2b42a01a81c6..ebabf929f460 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -122,7 +122,7 @@ static void kgdboc_unregister_kbd(void)
 			i--;
 		}
 	}
-	flush_work_sync(&kgdboc_restore_input_work);
+	flush_work(&kgdboc_restore_input_work);
 }
 #else /* ! CONFIG_KDB_KEYBOARD */
 #define kgdboc_register_kbd(x) 0
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index d3cda0cb2df0..0952d71bdf28 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1205,7 +1205,7 @@ static int serial_omap_suspend(struct device *dev)
 
 	if (up) {
 		uart_suspend_port(&serial_omap_reg, &up->port);
-		flush_work_sync(&up->qos_work);
+		flush_work(&up->qos_work);
 	}
 
 	return 0;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 6f99c9959f0c..ac5be812dbe3 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -523,9 +523,9 @@ static int tty_ldisc_halt(struct tty_struct *tty)
  */
 static void tty_ldisc_flush_works(struct tty_struct *tty)
 {
-	flush_work_sync(&tty->hangup_work);
-	flush_work_sync(&tty->SAK_work);
-	flush_work_sync(&tty->buf.work);
+	flush_work(&tty->hangup_work);
+	flush_work(&tty->SAK_work);
+	flush_work(&tty->buf.work);
 }
 
 /**
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index 975e9c6691d6..807627b36cc8 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -718,7 +718,7 @@ static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_de
 	del_timer_sync(&instance->resubmit_timer);
 	usb_free_urb(int_urb);
 
-	flush_work_sync(&instance->status_check_work);
+	flush_work(&instance->status_check_work);
 }
 
 static int speedtch_pre_reset(struct usb_interface *intf)
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index d7e422dc0ef7..9d1a044504e8 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2234,7 +2234,7 @@ static void uea_stop(struct uea_softc *sc)
 	usb_free_urb(sc->urb_int);
 
 	/* flush the work item, when no one can schedule it */
-	flush_work_sync(&sc->task);
+	flush_work(&sc->task);
 
 	release_firmware(sc->dsp_firm);
 	uea_leaves(INS_TO_USBDEV(sc));
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 90e82e288eb9..f1e5fbf5c2c7 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -834,7 +834,7 @@ void gether_cleanup(void)
 		return;
 
 	unregister_netdev(the_dev->net);
-	flush_work_sync(&the_dev->work);
+	flush_work(&the_dev->work);
 	free_netdev(the_dev->net);
 
 	the_dev = NULL;
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 2b1e8d84c873..2364098ea83c 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -893,7 +893,7 @@ static void ohci_stop (struct usb_hcd *hcd)
 	ohci_dump (ohci, 1);
 
 	if (quirk_nec(ohci))
-		flush_work_sync(&ohci->nec_work);
+		flush_work(&ohci->nec_work);
 
 	ohci_usb_reset (ohci);
 	ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index 7a88667742b6..720df35561c9 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -1230,7 +1230,7 @@ static int __exit isp1301_remove(struct i2c_client *i2c)
 	isp->timer.data = 0;
 	set_bit(WORK_STOP, &isp->todo);
 	del_timer_sync(&isp->timer);
-	flush_work_sync(&isp->work);
+	flush_work(&isp->work);
 
 	put_device(&i2c->dev);
 	the_transceiver = NULL;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..022cecb0757d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -551,7 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
-	flush_delayed_work_sync(&sbi->sb_work);
+	flush_delayed_work(&sbi->sb_work);
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 4a38db739ca0..0fb6539b0c8c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
 	spin_lock(&ls->ls_recover_spin);
 	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
 	spin_unlock(&ls->ls_recover_spin);
-	flush_delayed_work_sync(&sdp->sd_control_work);
+	flush_delayed_work(&sdp->sd_control_work);
 
 	/* mounted_lock and control_lock will be purged in dlm recovery */
 release:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a14..867700aba536 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1572,7 +1572,7 @@ out:
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
-	flush_delayed_work_sync(&ip->i_gl->gl_work);
+	flush_delayed_work(&ip->i_gl->gl_work);
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index ee1bc55677f1..553909395270 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
+	flush_delayed_work(&HFS_SB(sb)->mdb_work);
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
 	if (!ret)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 333df07ae3bd..eaa74323663a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -314,11 +314,11 @@ static void ncp_stop_tasks(struct ncp_server *server) {
 	release_sock(sk);
 	del_timer_sync(&server->timeout_tm);
 
-	flush_work_sync(&server->rcv.tq);
+	flush_work(&server->rcv.tq);
 	if (sk->sk_socket->type == SOCK_STREAM)
-		flush_work_sync(&server->tx.tq);
+		flush_work(&server->tx.tq);
 	else
-		flush_work_sync(&server->timeout_tq);
+		flush_work(&server->timeout_tq);
 }
 
 static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 8f9cea1597af..c19897d0fe14 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -327,5 +327,5 @@ void o2quo_exit(void)
 {
 	struct o2quo_state *qs = &o2quo_state;
 
-	flush_work_sync(&qs->qs_work);
+	flush_work(&qs->qs_work);
 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a2..e8e6be439bcd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_sync_fs(
 		 * We schedule xfssyncd now (now that the disk is
 		 * active) instead of later (when it might not be).
 		 */
-		flush_delayed_work_sync(&mp->m_sync_work);
+		flush_delayed_work(&mp->m_sync_work);
 	}
 
 	return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 96548176db80..9500caf15acf 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -475,7 +475,7 @@ xfs_flush_inodes(
 	struct xfs_mount	*mp = ip->i_mount;
 
 	queue_work(xfs_syncd_wq, &mp->m_flush_work);
-	flush_work_sync(&mp->m_flush_work);
+	flush_work(&mp->m_flush_work);
 }
 
 STATIC void
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 855fcdaa2d72..a351be7c3e91 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -460,13 +460,13 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 }
 
 /* used to be different but now identical to flush_work(), deprecated */
-static inline bool flush_work_sync(struct work_struct *work)
+static inline bool __deprecated flush_work_sync(struct work_struct *work)
 {
 	return flush_work(work);
 }
 
 /* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool flush_delayed_work_sync(struct delayed_work *dwork)
+static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
 {
 	return flush_delayed_work(dwork);
 }
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6449bae15702..505f0ce3f10b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
-	flush_work_sync(&p9_poll_work);
+	flush_work(&p9_poll_work);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 88e7c2f3fa0d..45295ca09571 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev)
 	if (dst->link_poll_needed)
 		del_timer_sync(&dst->link_poll_timer);
 
-	flush_work_sync(&dst->link_poll_work);
+	flush_work(&dst->link_poll_work);
 
 	for (i = 0; i < dst->pd->nr_chips; i++) {
 		struct dsa_switch *ds = dst->ds[i];
diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c
index dde5c9c92132..ef68d710d08c 100644
--- a/sound/i2c/other/ak4113.c
+++ b/sound/i2c/other/ak4113.c
@@ -141,7 +141,7 @@ void snd_ak4113_reinit(struct ak4113 *chip)
 {
 	chip->init = 1;
 	mb();
-	flush_delayed_work_sync(&chip->work);
+	flush_delayed_work(&chip->work);
 	ak4113_init_regs(chip);
 	/* bring up statistics / event queing */
 	chip->init = 0;
diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c
index fdf3c1b65e38..816e7d225fb0 100644
--- a/sound/i2c/other/ak4114.c
+++ b/sound/i2c/other/ak4114.c
@@ -154,7 +154,7 @@ void snd_ak4114_reinit(struct ak4114 *chip)
 {
 	chip->init = 1;
 	mb();
-	flush_delayed_work_sync(&chip->work);
+	flush_delayed_work(&chip->work);
 	ak4114_init_regs(chip);
 	/* bring up statistics / event queing */
 	chip->init = 0;
diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c
index ab8738e21ad1..e9fa2d07951d 100644
--- a/sound/pci/oxygen/oxygen_lib.c
+++ b/sound/pci/oxygen/oxygen_lib.c
@@ -573,8 +573,8 @@ static void oxygen_card_free(struct snd_card *card)
 	oxygen_shutdown(chip);
 	if (chip->irq >= 0)
 		free_irq(chip->irq, chip);
-	flush_work_sync(&chip->spdif_input_bits_work);
-	flush_work_sync(&chip->gpio_work);
+	flush_work(&chip->spdif_input_bits_work);
+	flush_work(&chip->gpio_work);
 	chip->model.cleanup(chip);
 	kfree(chip->model_data);
 	mutex_destroy(&chip->mutex);
@@ -751,8 +751,8 @@ static int oxygen_pci_suspend(struct device *dev)
 	spin_unlock_irq(&chip->reg_lock);
 
 	synchronize_irq(chip->irq);
-	flush_work_sync(&chip->spdif_input_bits_work);
-	flush_work_sync(&chip->gpio_work);
+	flush_work(&chip->spdif_input_bits_work);
+	flush_work(&chip->gpio_work);
 	chip->interrupt_mask = saved_interrupt_mask;
 
 	pci_disable_device(pci);
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index d26c8ae4e6d9..a4cae060bf26 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1601,7 +1601,7 @@ static int  wm8350_codec_remove(struct snd_soc_codec *codec)
 
 	/* if there was any work waiting then we run it now and
 	 * wait for its completion */
-	flush_delayed_work_sync(&codec->dapm.delayed_work);
+	flush_delayed_work(&codec->dapm.delayed_work);
 
 	wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 13bff87ddcf5..2e4a775ae560 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -1509,7 +1509,7 @@ static int wm8753_probe(struct snd_soc_codec *codec)
 /* power down chip */
 static int wm8753_remove(struct snd_soc_codec *codec)
 {
-	flush_delayed_work_sync(&codec->dapm.delayed_work);
+	flush_delayed_work(&codec->dapm.delayed_work);
 	wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF);
 
 	return 0;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f219b2f7ee68..f6eec782c61e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -591,7 +591,7 @@ int snd_soc_suspend(struct device *dev)
 
 	/* close any waiting streams and save state */
 	for (i = 0; i < card->num_rtd; i++) {
-		flush_delayed_work_sync(&card->rtd[i].delayed_work);
+		flush_delayed_work(&card->rtd[i].delayed_work);
 		card->rtd[i].codec->dapm.suspend_bias_level = card->rtd[i].codec->dapm.bias_level;
 	}
 
@@ -1846,7 +1846,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	/* make sure any delayed work runs */
 	for (i = 0; i < card->num_rtd; i++) {
 		struct snd_soc_pcm_runtime *rtd = &card->rtd[i];
-		flush_delayed_work_sync(&rtd->delayed_work);
+		flush_delayed_work(&rtd->delayed_work);
 	}
 
 	/* remove auxiliary devices */
@@ -1890,7 +1890,7 @@ int snd_soc_poweroff(struct device *dev)
 	 * now, we're shutting down so no imminent restart. */
 	for (i = 0; i < card->num_rtd; i++) {
 		struct snd_soc_pcm_runtime *rtd = &card->rtd[i];
-		flush_delayed_work_sync(&rtd->delayed_work);
+		flush_delayed_work(&rtd->delayed_work);
 	}
 
 	snd_soc_dapm_shutdown(card);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7d7e2aaffece..67a35e90384c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -90,7 +90,7 @@ irqfd_shutdown(struct work_struct *work)
 	 * We know no new events will be scheduled at this point, so block
 	 * until all previously outstanding events have completed
 	 */
-	flush_work_sync(&irqfd->inject);
+	flush_work(&irqfd->inject);
 
 	/*
 	 * It is now safe to release the object's resources
-- 
cgit v1.2.3


From 3b07e9ca26866697616097044f25fbe53dbab693 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Mon, 20 Aug 2012 14:51:24 -0700
Subject: workqueue: deprecate system_nrt[_freezable]_wq

system_nrt[_freezable]_wq are now spurious.  Mark them deprecated and
convert all users to system[_freezable]_wq.

If you're cc'd and wondering what's going on: Now all workqueues are
non-reentrant, so there's no reason to use system_nrt[_freezable]_wq.
Please use system[_freezable]_wq instead.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-By: Lai Jiangshan <laijs@cn.fujitsu.com>

Cc: Jens Axboe <axboe@kernel.dk>
Cc: David Airlie <airlied@linux.ie>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
---
 block/blk-throttle.c              |  7 +++----
 block/genhd.c                     | 10 +++++-----
 drivers/gpu/drm/drm_crtc_helper.c |  6 +++---
 drivers/hid/hid-wiimote-ext.c     |  2 +-
 drivers/mmc/core/host.c           |  4 ++--
 drivers/net/virtio_net.c          | 12 ++++++------
 include/linux/workqueue.h         |  4 ++--
 kernel/srcu.c                     |  4 ++--
 security/keys/gc.c                |  8 ++++----
 security/keys/key.c               |  2 +-
 10 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e287c19908c8..5a58e779912b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -180,7 +180,7 @@ static inline unsigned int total_nr_queued(struct throtl_data *td)
 
 /*
  * Worker for allocating per cpu stat for tgs. This is scheduled on the
- * system_nrt_wq once there are some groups on the alloc_list waiting for
+ * system_wq once there are some groups on the alloc_list waiting for
  * allocation.
  */
 static void tg_stats_alloc_fn(struct work_struct *work)
@@ -194,8 +194,7 @@ alloc_stats:
 		stats_cpu = alloc_percpu(struct tg_stats_cpu);
 		if (!stats_cpu) {
 			/* allocation failed, try again after some time */
-			queue_delayed_work(system_nrt_wq, dwork,
-					   msecs_to_jiffies(10));
+			schedule_delayed_work(dwork, msecs_to_jiffies(10));
 			return;
 		}
 	}
@@ -238,7 +237,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 	 */
 	spin_lock_irqsave(&tg_stats_alloc_lock, flags);
 	list_add(&tg->stats_alloc_node, &tg_stats_alloc_list);
-	queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0);
+	schedule_delayed_work(&tg_stats_alloc_work, 0);
 	spin_unlock_irqrestore(&tg_stats_alloc_lock, flags);
 }
 
diff --git a/block/genhd.c b/block/genhd.c
index 5d8b44a6442b..a2f3d6a5f55c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1490,9 +1490,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	intv = disk_events_poll_jiffies(disk);
 	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	else if (intv)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 out_unlock:
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1535,7 +1535,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
 	if (!ev->block)
-		mod_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+		mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	spin_unlock_irq(&ev->lock);
 }
 
@@ -1571,7 +1571,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 
 	/* uncondtionally schedule event check and wait for it to finish */
 	disk_block_events(disk);
-	queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+	queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	flush_delayed_work(&ev->dwork);
 	__disk_unblock_events(disk, false);
 
@@ -1608,7 +1608,7 @@ static void disk_events_workfn(struct work_struct *work)
 
 	intv = disk_events_poll_jiffies(disk);
 	if (!ev->block && intv)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 
 	spin_unlock_irq(&ev->lock);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 3252e7067d8b..8fa9d52820d9 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -968,7 +968,7 @@ static void output_poll_execute(struct work_struct *work)
 	}
 
 	if (repoll)
-		queue_delayed_work(system_nrt_wq, delayed_work, DRM_OUTPUT_POLL_PERIOD);
+		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
 }
 
 void drm_kms_helper_poll_disable(struct drm_device *dev)
@@ -993,7 +993,7 @@ void drm_kms_helper_poll_enable(struct drm_device *dev)
 	}
 
 	if (poll)
-		queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
+		schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
@@ -1020,6 +1020,6 @@ void drm_helper_hpd_irq_event(struct drm_device *dev)
 	/* kill timer and schedule immediate execution, this doesn't block */
 	cancel_delayed_work(&dev->mode_config.output_poll_work);
 	if (drm_kms_helper_poll)
-		queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0);
+		schedule_delayed_work(&dev->mode_config.output_poll_work, 0);
 }
 EXPORT_SYMBOL(drm_helper_hpd_irq_event);
diff --git a/drivers/hid/hid-wiimote-ext.c b/drivers/hid/hid-wiimote-ext.c
index 0a1805c9b0e5..d37cd092ffc7 100644
--- a/drivers/hid/hid-wiimote-ext.c
+++ b/drivers/hid/hid-wiimote-ext.c
@@ -204,7 +204,7 @@ static void wiiext_worker(struct work_struct *work)
 /* schedule work only once, otherwise mark for reschedule */
 static void wiiext_schedule(struct wiimote_ext *ext)
 {
-	queue_work(system_nrt_wq, &ext->worker);
+	schedule_work(&ext->worker);
 }
 
 /*
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 597f189b4427..ee2e16b17017 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -204,8 +204,8 @@ void mmc_host_clk_release(struct mmc_host *host)
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		queue_delayed_work(system_nrt_wq, &host->clk_gate_work,
-				msecs_to_jiffies(host->clkgate_delay));
+		schedule_delayed_work(&host->clk_gate_work,
+				      msecs_to_jiffies(host->clkgate_delay));
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 83d2b0c34c5e..9650c413e11f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -521,7 +521,7 @@ static void refill_work(struct work_struct *work)
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
 	if (still_empty)
-		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
+		schedule_delayed_work(&vi->refill, HZ/2);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -540,7 +540,7 @@ again:
 
 	if (vi->num < vi->max / 2) {
 		if (!try_fill_recv(vi, GFP_ATOMIC))
-			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+			schedule_delayed_work(&vi->refill, 0);
 	}
 
 	/* Out of packets? */
@@ -745,7 +745,7 @@ static int virtnet_open(struct net_device *dev)
 
 	/* Make sure we have some buffers: if oom use wq. */
 	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+		schedule_delayed_work(&vi->refill, 0);
 
 	virtnet_napi_enable(vi);
 	return 0;
@@ -1020,7 +1020,7 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
-	queue_work(system_nrt_wq, &vi->config_work);
+	schedule_work(&vi->config_work);
 }
 
 static int init_vqs(struct virtnet_info *vi)
@@ -1152,7 +1152,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	   otherwise get link status from config. */
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
 		netif_carrier_off(dev);
-		queue_work(system_nrt_wq, &vi->config_work);
+		schedule_work(&vi->config_work);
 	} else {
 		vi->status = VIRTIO_NET_S_LINK_UP;
 		netif_carrier_on(dev);
@@ -1264,7 +1264,7 @@ static int virtnet_restore(struct virtio_device *vdev)
 	netif_device_attach(vi->dev);
 
 	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+		schedule_delayed_work(&vi->refill, 0);
 
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = true;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a351be7c3e91..1ce3fb08308d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -310,12 +310,12 @@ extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 
-static inline struct workqueue_struct *__system_nrt_wq(void)
+static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
 {
 	return system_wq;
 }
 
-static inline struct workqueue_struct *__system_nrt_freezable_wq(void)
+static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
 {
 	return system_freezable_wq;
 }
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 2095be3318d5..97c465ebd844 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -379,7 +379,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	rcu_batch_queue(&sp->batch_queue, head);
 	if (!sp->running) {
 		sp->running = true;
-		queue_delayed_work(system_nrt_wq, &sp->work, 0);
+		schedule_delayed_work(&sp->work, 0);
 	}
 	spin_unlock_irqrestore(&sp->queue_lock, flags);
 }
@@ -631,7 +631,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
 	}
 
 	if (pending)
-		queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL);
+		schedule_delayed_work(&sp->work, SRCU_INTERVAL);
 }
 
 /*
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 61ab7c82ebb1..d67c97bb1025 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -62,7 +62,7 @@ void key_schedule_gc(time_t gc_at)
 
 	if (gc_at <= now || test_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags)) {
 		kdebug("IMMEDIATE");
-		queue_work(system_nrt_wq, &key_gc_work);
+		schedule_work(&key_gc_work);
 	} else if (gc_at < key_gc_next_run) {
 		kdebug("DEFERRED");
 		key_gc_next_run = gc_at;
@@ -77,7 +77,7 @@ void key_schedule_gc(time_t gc_at)
 void key_schedule_gc_links(void)
 {
 	set_bit(KEY_GC_KEY_EXPIRED, &key_gc_flags);
-	queue_work(system_nrt_wq, &key_gc_work);
+	schedule_work(&key_gc_work);
 }
 
 /*
@@ -120,7 +120,7 @@ void key_gc_keytype(struct key_type *ktype)
 	set_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags);
 
 	kdebug("schedule");
-	queue_work(system_nrt_wq, &key_gc_work);
+	schedule_work(&key_gc_work);
 
 	kdebug("sleep");
 	wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit,
@@ -369,7 +369,7 @@ maybe_resched:
 	}
 
 	if (gc_state & KEY_GC_REAP_AGAIN)
-		queue_work(system_nrt_wq, &key_gc_work);
+		schedule_work(&key_gc_work);
 	kleave(" [end %x]", gc_state);
 	return;
 
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..3cbe3529c418 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -598,7 +598,7 @@ void key_put(struct key *key)
 		key_check(key);
 
 		if (atomic_dec_and_test(&key->usage))
-			queue_work(system_nrt_wq, &key_gc_work);
+			schedule_work(&key_gc_work);
 	}
 }
 EXPORT_SYMBOL(key_put);
-- 
cgit v1.2.3


From ee64e7f697ad7e5575e6ac8900cfb71975484421 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: cosmetic whitespace updates for macro definitions

Consistently use the last tab position for '\' line continuation in
complex macro definitions.  This is to help the following patches.

This patch is cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 126 +++++++++++++++++++++++-----------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ce3fb08308d..26c5b4c63861 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -126,43 +126,43 @@ struct execute_work {
 #define __WORK_INIT_LOCKDEP_MAP(n, k)
 #endif
 
-#define __WORK_INITIALIZER(n, f) {				\
-	.data = WORK_DATA_STATIC_INIT(),			\
-	.entry	= { &(n).entry, &(n).entry },			\
-	.func = (f),						\
-	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
+#define __WORK_INITIALIZER(n, f) {					\
+	.data = WORK_DATA_STATIC_INIT(),				\
+	.entry	= { &(n).entry, &(n).entry },				\
+	.func = (f),							\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))				\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f) {			\
-	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,	\
-				0, (unsigned long)&(n)),	\
+#define __DELAYED_WORK_INITIALIZER(n, f) {				\
+	.work = __WORK_INITIALIZER((n).work, (f)),			\
+	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,		\
+				0, (unsigned long)&(n)),		\
 	}
 
-#define __DEFERRED_WORK_INITIALIZER(n, f) {			\
-	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn, \
-				0, (unsigned long)&(n)),	\
+#define __DEFERRED_WORK_INITIALIZER(n, f) {				\
+	.work = __WORK_INITIALIZER((n).work, (f)),			\
+	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
+				0, (unsigned long)&(n)),		\
 	}
 
-#define DECLARE_WORK(n, f)					\
+#define DECLARE_WORK(n, f)						\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
-#define DECLARE_DELAYED_WORK(n, f)				\
+#define DECLARE_DELAYED_WORK(n, f)					\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
-#define DECLARE_DEFERRED_WORK(n, f)				\
+#define DECLARE_DEFERRED_WORK(n, f)					\
 	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
 
 /*
  * initialize a work item's function pointer
  */
-#define PREPARE_WORK(_work, _func)				\
-	do {							\
-		(_work)->func = (_func);			\
+#define PREPARE_WORK(_work, _func)					\
+	do {								\
+		(_work)->func = (_func);				\
 	} while (0)
 
-#define PREPARE_DELAYED_WORK(_work, _func)			\
+#define PREPARE_DELAYED_WORK(_work, _func)				\
 	PREPARE_WORK(&(_work)->work, (_func))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -192,7 +192,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 									\
 		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
-		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
@@ -206,38 +206,38 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	} while (0)
 #endif
 
-#define INIT_WORK(_work, _func)					\
-	do {							\
-		__INIT_WORK((_work), (_func), 0);		\
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		__INIT_WORK((_work), (_func), 0);			\
 	} while (0)
 
-#define INIT_WORK_ONSTACK(_work, _func)				\
-	do {							\
-		__INIT_WORK((_work), (_func), 1);		\
+#define INIT_WORK_ONSTACK(_work, _func)					\
+	do {								\
+		__INIT_WORK((_work), (_func), 1);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func)				\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK(_work, _func)					\
+	do {								\
+		INIT_WORK(&(_work)->work, (_func));			\
+		init_timer(&(_work)->timer);				\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
-	do {							\
-		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
-		init_timer_on_stack(&(_work)->timer);		\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+	do {								\
+		INIT_WORK_ONSTACK(&(_work)->work, (_func));		\
+		init_timer_on_stack(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer_deferrable(&(_work)->timer);		\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+	do {								\
+		INIT_WORK(&(_work)->work, (_func));			\
+		init_timer_deferrable(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
 /**
@@ -345,22 +345,22 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(fmt, flags, max_active, args...)	\
-({								\
-	static struct lock_class_key __key;			\
-	const char *__lock_name;				\
-								\
-	if (__builtin_constant_p(fmt))				\
-		__lock_name = (fmt);				\
-	else							\
-		__lock_name = #fmt;				\
-								\
-	__alloc_workqueue_key((fmt), (flags), (max_active),	\
-			      &__key, __lock_name, ##args);	\
+#define alloc_workqueue(fmt, flags, max_active, args...)		\
+({									\
+	static struct lock_class_key __key;				\
+	const char *__lock_name;					\
+									\
+	if (__builtin_constant_p(fmt))					\
+		__lock_name = (fmt);					\
+	else								\
+		__lock_name = #fmt;					\
+									\
+	__alloc_workqueue_key((fmt), (flags), (max_active),		\
+			      &__key, __lock_name, ##args);		\
 })
 #else
-#define alloc_workqueue(fmt, flags, max_active, args...)	\
-	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+#define alloc_workqueue(fmt, flags, max_active, args...)		\
+	__alloc_workqueue_key((fmt), (flags), (max_active),		\
 			      NULL, NULL, ##args)
 #endif
 
@@ -377,14 +377,14 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-#define alloc_ordered_workqueue(fmt, flags, args...)		\
+#define alloc_ordered_workqueue(fmt, flags, args...)			\
 	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
 
-#define create_workqueue(name)					\
+#define create_workqueue(name)						\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
-#define create_freezable_workqueue(name)			\
+#define create_freezable_workqueue(name)				\
 	alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
-#define create_singlethread_workqueue(name)			\
+#define create_singlethread_workqueue(name)				\
 	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
-- 
cgit v1.2.3


From 203b42f7317494ae5e5efc7be6fb7f29c927f102 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: make deferrable delayed_work initializer names consistent

Initalizers for deferrable delayed_work are confused.

* __DEFERRED_WORK_INITIALIZER()
* DECLARE_DEFERRED_WORK()
* INIT_DELAYED_WORK_DEFERRABLE()

Rename them to

* __DEFERRABLE_WORK_INITIALIZER()
* DECLARE_DEFERRABLE_WORK()
* INIT_DEFERRABLE_WORK()

This patch doesn't cause any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/powerpc/platforms/cell/cpufreq_spudemand.c | 2 +-
 drivers/cpufreq/cpufreq_conservative.c          | 2 +-
 drivers/cpufreq/cpufreq_ondemand.c              | 2 +-
 drivers/devfreq/devfreq.c                       | 2 +-
 drivers/net/ethernet/mellanox/mlx4/sense.c      | 2 +-
 drivers/power/ab8500_btemp.c                    | 2 +-
 drivers/power/ab8500_charger.c                  | 8 ++++----
 drivers/power/ab8500_fg.c                       | 8 ++++----
 drivers/power/abx500_chargalg.c                 | 4 ++--
 drivers/power/max17040_battery.c                | 2 +-
 drivers/video/omap2/displays/panel-taal.c       | 6 +++---
 drivers/video/omap2/dss/dsi.c                   | 4 ++--
 include/linux/workqueue.h                       | 8 ++++----
 mm/slab.c                                       | 2 +-
 mm/vmstat.c                                     | 2 +-
 net/core/neighbour.c                            | 2 +-
 net/ipv4/inetpeer.c                             | 2 +-
 net/sunrpc/cache.c                              | 2 +-
 18 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 23bc9db4317e..82607d621aca 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -76,7 +76,7 @@ static void spu_gov_work(struct work_struct *work)
 static void spu_gov_init_work(struct spu_gov_info_struct *info)
 {
 	int delay = usecs_to_jiffies(info->poll_int);
-	INIT_DELAYED_WORK_DEFERRABLE(&info->work, spu_gov_work);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
 	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
 }
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 235a340e81f2..55f0354864e2 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -466,7 +466,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 	delay -= jiffies % delay;
 
 	dbs_info->enable = 1;
-	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
 	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 }
 
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 836e9b062e5e..14c1af5a264f 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -644,7 +644,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 		delay -= jiffies % delay;
 
 	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
-	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
 	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 }
 
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 70c31d43fff3..b146d76f04cf 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -607,7 +607,7 @@ static int __init devfreq_start_polling(void)
 	mutex_lock(&devfreq_list_lock);
 	polling = false;
 	devfreq_wq = create_freezable_workqueue("devfreq_wq");
-	INIT_DELAYED_WORK_DEFERRABLE(&devfreq_work, devfreq_monitor);
+	INIT_DEFERRABLE_WORK(&devfreq_work, devfreq_monitor);
 	mutex_unlock(&devfreq_list_lock);
 
 	devfreq_monitor(&devfreq_work.work);
diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c
index 802498293528..37b23780a4d8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/sense.c
+++ b/drivers/net/ethernet/mellanox/mlx4/sense.c
@@ -153,5 +153,5 @@ void  mlx4_sense_init(struct mlx4_dev *dev)
 	for (port = 1; port <= dev->caps.num_ports; port++)
 		sense->do_sense_port[port] = 1;
 
-	INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
+	INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
 }
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index bba3ccac72fe..3041514f4d3f 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -1018,7 +1018,7 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for measuring temperature periodically */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->btemp_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->btemp_periodic_work,
 		ab8500_btemp_periodic_work);
 
 	/* Identify the battery */
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d4f0c98428cb..0701dbc2b7e1 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2618,9 +2618,9 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for HW failure check */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_hw_failure_work,
+	INIT_DEFERRABLE_WORK(&di->check_hw_failure_work,
 		ab8500_charger_check_hw_failure_work);
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_usbchgnotok_work,
+	INIT_DEFERRABLE_WORK(&di->check_usbchgnotok_work,
 		ab8500_charger_check_usbchargernotok_work);
 
 	/*
@@ -2632,10 +2632,10 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	 * watchdog have to be kicked by the charger driver
 	 * when the AC charger is disabled
 	 */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->kick_wd_work,
+	INIT_DEFERRABLE_WORK(&di->kick_wd_work,
 		ab8500_charger_kick_watchdog_work);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_vbat_work,
+	INIT_DEFERRABLE_WORK(&di->check_vbat_work,
 		ab8500_charger_check_vbat_work);
 
 	/* Init work for charger detection */
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index bf022255994c..5c9e7c263c38 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -2516,19 +2516,19 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	INIT_WORK(&di->fg_acc_cur_work, ab8500_fg_acc_cur_work);
 
 	/* Init work for reinitialising the fg algorithm */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_reinit_work,
+	INIT_DEFERRABLE_WORK(&di->fg_reinit_work,
 		ab8500_fg_reinit_work);
 
 	/* Work delayed Queue to run the state machine */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->fg_periodic_work,
 		ab8500_fg_periodic_work);
 
 	/* Work to check low battery condition */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_low_bat_work,
+	INIT_DEFERRABLE_WORK(&di->fg_low_bat_work,
 		ab8500_fg_low_bat_work);
 
 	/* Init work for HW failure check */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_check_hw_failure_work,
+	INIT_DEFERRABLE_WORK(&di->fg_check_hw_failure_work,
 		ab8500_fg_check_hw_failure_work);
 
 	/* Initialize OVV, and other registers */
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 804b88c760d6..4d302803ffcc 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -1848,9 +1848,9 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for chargalg */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->chargalg_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->chargalg_periodic_work,
 		abx500_chargalg_periodic_work);
-	INIT_DELAYED_WORK_DEFERRABLE(&di->chargalg_wd_work,
+	INIT_DEFERRABLE_WORK(&di->chargalg_wd_work,
 		abx500_chargalg_wd_work);
 
 	/* Init work for chargalg */
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
index c284143cfcd7..58e67830143c 100644
--- a/drivers/power/max17040_battery.c
+++ b/drivers/power/max17040_battery.c
@@ -232,7 +232,7 @@ static int __devinit max17040_probe(struct i2c_client *client,
 	max17040_reset(client);
 	max17040_get_version(client);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work);
+	INIT_DEFERRABLE_WORK(&chip->work, max17040_work);
 	schedule_delayed_work(&chip->work, MAX17040_DELAY);
 
 	return 0;
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index 3f5acc7771da..6b5e6e0e202f 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -906,7 +906,7 @@ static int taal_probe(struct omap_dss_device *dssdev)
 		r = -ENOMEM;
 		goto err_wq;
 	}
-	INIT_DELAYED_WORK_DEFERRABLE(&td->esd_work, taal_esd_work);
+	INIT_DEFERRABLE_WORK(&td->esd_work, taal_esd_work);
 	INIT_DELAYED_WORK(&td->ulps_work, taal_ulps_work);
 
 	dev_set_drvdata(&dssdev->dev, td);
@@ -962,8 +962,8 @@ static int taal_probe(struct omap_dss_device *dssdev)
 			goto err_irq;
 		}
 
-		INIT_DELAYED_WORK_DEFERRABLE(&td->te_timeout_work,
-					taal_te_timeout_work_callback);
+		INIT_DEFERRABLE_WORK(&td->te_timeout_work,
+				     taal_te_timeout_work_callback);
 
 		dev_dbg(&dssdev->dev, "Using GPIO TE\n");
 	}
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index b07e8864f82f..fd40f2625051 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -4863,8 +4863,8 @@ static int __init omap_dsihw_probe(struct platform_device *dsidev)
 	mutex_init(&dsi->lock);
 	sema_init(&dsi->bus_lock, 1);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&dsi->framedone_timeout_work,
-			dsi_framedone_timeout_work_callback);
+	INIT_DEFERRABLE_WORK(&dsi->framedone_timeout_work,
+			     dsi_framedone_timeout_work_callback);
 
 #ifdef DSI_CATCH_MISSING_TE
 	init_timer(&dsi->te_timer);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26c5b4c63861..49a9c51f9ee3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -139,7 +139,7 @@ struct execute_work {
 				0, (unsigned long)&(n)),		\
 	}
 
-#define __DEFERRED_WORK_INITIALIZER(n, f) {				\
+#define __DEFERRABLE_WORK_INITIALIZER(n, f) {				\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
 	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
 				0, (unsigned long)&(n)),		\
@@ -151,8 +151,8 @@ struct execute_work {
 #define DECLARE_DELAYED_WORK(n, f)					\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
-#define DECLARE_DEFERRED_WORK(n, f)					\
-	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+#define DECLARE_DEFERRABLE_WORK(n, f)					\
+	struct delayed_work n = __DEFERRABLE_WORK_INITIALIZER(n, f)
 
 /*
  * initialize a work item's function pointer
@@ -232,7 +232,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+#define INIT_DEFERRABLE_WORK(_work, _func)				\
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
 		init_timer_deferrable(&(_work)->timer);			\
diff --git a/mm/slab.c b/mm/slab.c
index f8b0d539b482..35b5cb0da554 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -900,7 +900,7 @@ static void __cpuinit start_cpu_timer(int cpu)
 	 */
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
+		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work,
 					__round_jiffies_relative(HZ, cpu));
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index df7a6748231d..b3e3b9d525d0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1157,7 +1157,7 @@ static void __cpuinit start_cpu_timer(int cpu)
 {
 	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
 
-	INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
+	INIT_DEFERRABLE_WORK(work, vmstat_update);
 	schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
 }
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 117afaf51268..112c6e2266e9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot allocate neighbour cache hashes");
 
 	rwlock_init(&tbl->lock);
-	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
 	skb_queue_head_init_class(&tbl->proxy_queue,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e1e0a4e8fd34..7b55c8648d4b 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -194,7 +194,7 @@ void __init inet_initpeers(void)
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2afd2a84dc35..2a68bb3db772 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1635,7 +1635,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 
 void __init cache_initialize(void)
 {
-	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+	INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean);
 }
 
 int cache_register_net(struct cache_detail *cd, struct net *net)
-- 
cgit v1.2.3


From f991b318cc6627a493b0d317a565bb7c3271f36b Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: clean up delayed_work initializers and add missing one

Reimplement delayed_work initializers using new timer initializers
which take timer flags.  This reduces code duplications and will ease
further initializer changes.  This patch also adds a missing
initializer - INIT_DEFERRABLE_WORK_ONSTACK().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 48 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 49a9c51f9ee3..e84ebb69607d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -133,26 +133,20 @@ struct execute_work {
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))				\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f) {				\
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
-	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,		\
-				0, (unsigned long)&(n)),		\
-	}
-
-#define __DEFERRABLE_WORK_INITIALIZER(n, f) {				\
-	.work = __WORK_INITIALIZER((n).work, (f)),			\
-	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
-				0, (unsigned long)&(n)),		\
+	.timer = __TIMER_INITIALIZER(delayed_work_timer_fn,		\
+			0, (unsigned long)&(n), (tflags)),		\
 	}
 
 #define DECLARE_WORK(n, f)						\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)					\
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
 
 #define DECLARE_DEFERRABLE_WORK(n, f)					\
-	struct delayed_work n = __DEFERRABLE_WORK_INITIALIZER(n, f)
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)
 
 /*
  * initialize a work item's function pointer
@@ -216,29 +210,33 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__INIT_WORK((_work), (_func), 1);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func)					\
+#define __INIT_DELAYED_WORK(_work, _func, _tflags)			\
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
-		init_timer(&(_work)->timer);				\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
+		__setup_timer(&(_work)->timer, delayed_work_timer_fn,	\
+			      (unsigned long)(_work), (_tflags));	\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+#define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)		\
 	do {								\
 		INIT_WORK_ONSTACK(&(_work)->work, (_func));		\
-		init_timer_on_stack(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
+		__setup_timer_on_stack(&(_work)->timer,			\
+				       delayed_work_timer_fn,		\
+				       (unsigned long)(_work),		\
+				       (_tflags));			\
 	} while (0)
 
+#define INIT_DELAYED_WORK(_work, _func)					\
+	__INIT_DELAYED_WORK(_work, _func, 0)
+
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+	__INIT_DELAYED_WORK_ONSTACK(_work, _func, 0)
+
 #define INIT_DEFERRABLE_WORK(_work, _func)				\
-	do {								\
-		INIT_WORK(&(_work)->work, (_func));			\
-		init_timer_deferrable(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
-	} while (0)
+	__INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE)
+
+#define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func)			\
+	__INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)
 
 /**
  * work_pending - Find out whether a work item is currently pending
-- 
cgit v1.2.3


From e0aecdd874d78b7129a64b056c20e529e2c916df Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: use irqsafe timer for delayed_work

Up to now, for delayed_works, try_to_grab_pending() couldn't be used
from IRQ handlers because IRQs may happen while
delayed_work_timer_fn() is in progress leading to indefinite -EAGAIN.

This patch makes delayed_work use the new TIMER_IRQSAFE flag for
delayed_work->timer.  This makes try_to_grab_pending() and thus
mod_delayed_work_on() safe to call from IRQ handlers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  8 +++++---
 kernel/workqueue.c        | 20 +++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e84ebb69607d..d86b320319e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -136,7 +136,8 @@ struct execute_work {
 #define __DELAYED_WORK_INITIALIZER(n, f, tflags) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
 	.timer = __TIMER_INITIALIZER(delayed_work_timer_fn,		\
-			0, (unsigned long)&(n), (tflags)),		\
+				     0, (unsigned long)&(n),		\
+				     (tflags) | TIMER_IRQSAFE),		\
 	}
 
 #define DECLARE_WORK(n, f)						\
@@ -214,7 +215,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
 		__setup_timer(&(_work)->timer, delayed_work_timer_fn,	\
-			      (unsigned long)(_work), (_tflags));	\
+			      (unsigned long)(_work),			\
+			      (_tflags) | TIMER_IRQSAFE);		\
 	} while (0)
 
 #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)		\
@@ -223,7 +225,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__setup_timer_on_stack(&(_work)->timer,			\
 				       delayed_work_timer_fn,		\
 				       (unsigned long)(_work),		\
-				       (_tflags));			\
+				       (_tflags) | TIMER_IRQSAFE);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK(_work, _func)					\
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 85bd3409b9f5..b394df8beaee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1048,16 +1048,14 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
  *		for arbitrarily long
  *
  * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
- * preempted while holding PENDING and @work off queue, preemption must be
- * disabled on entry.  This ensures that we don't return -EAGAIN while
- * another task is preempted in this function.
+ * interrupted while holding PENDING and @work off queue, irq must be
+ * disabled on entry.  This, combined with delayed_work->timer being
+ * irqsafe, ensures that we return -EAGAIN for finite short period of time.
  *
  * On successful return, >= 0, irq is disabled and the caller is
  * responsible for releasing it using local_irq_restore(*@flags).
  *
- * This function is safe to call from any context other than IRQ handler.
- * An IRQ handler may run on top of delayed_work_timer_fn() which can make
- * this function return -EAGAIN perpetually.
+ * This function is safe to call from any context including IRQ handler.
  */
 static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
@@ -1072,6 +1070,11 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	if (is_dwork) {
 		struct delayed_work *dwork = to_delayed_work(work);
 
+		/*
+		 * dwork->timer is irqsafe.  If del_timer() fails, it's
+		 * guaranteed that the timer is not queued anywhere and not
+		 * running on the local CPU.
+		 */
 		if (likely(del_timer(&dwork->timer)))
 			return 1;
 	}
@@ -1327,9 +1330,8 @@ void delayed_work_timer_fn(unsigned long __data)
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
-	local_irq_disable();
+	/* should have been called from irqsafe timer with irq already off */
 	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
-	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1444,7 +1446,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * Returns %false if @dwork was idle and queued, %true if @dwork was
  * pending and its timer was modified.
  *
- * This function is safe to call from any context other than IRQ handler.
+ * This function is safe to call from any context including IRQ handler.
  * See try_to_grab_pending() for details.
  */
 bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
-- 
cgit v1.2.3


From e7c2f967445dd2041f0f8e3179cca22bb8bb7f79 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: use mod_delayed_work() instead of __cancel + queue

Now that mod_delayed_work() is safe to call from IRQ handlers,
__cancel_delayed_work() followed by queue_delayed_work() can be
replaced with mod_delayed_work().

Most conversions are straight-forward except for the following.

* net/core/link_watch.c: linkwatch_schedule_work() was doing a quite
  elaborate dancing around its delayed_work.  Collapse it such that
  linkwatch_work is queued for immediate execution if LW_URGENT and
  existing timer is kept otherwise.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com> 
---
 block/blk-core.c                    |  6 ++----
 block/blk-throttle.c                |  7 +------
 drivers/block/floppy.c              |  3 +--
 drivers/infiniband/core/mad.c       | 14 +++++---------
 drivers/input/keyboard/qt2160.c     |  3 +--
 drivers/input/mouse/synaptics_i2c.c |  7 +------
 net/core/link_watch.c               | 21 ++++++---------------
 7 files changed, 17 insertions(+), 44 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4b4dbdfbca89..4b8b606dbb01 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -319,10 +319,8 @@ EXPORT_SYMBOL(__blk_run_queue);
  */
 void blk_run_queue_async(struct request_queue *q)
 {
-	if (likely(!blk_queue_stopped(q))) {
-		__cancel_delayed_work(&q->delay_work);
-		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
-	}
+	if (likely(!blk_queue_stopped(q)))
+		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(blk_run_queue_async);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5a58e779912b..a9664fa0b609 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -929,12 +929,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 
 	/* schedule work if limits changed even if no bio is queued */
 	if (total_nr_queued(td) || td->limits_changed) {
-		/*
-		 * We might have a work scheduled to be executed in future.
-		 * Cancel that and schedule a new one.
-		 */
-		__cancel_delayed_work(dwork);
-		queue_delayed_work(kthrotld_workqueue, dwork, delay);
+		mod_delayed_work(kthrotld_workqueue, dwork, delay);
 		throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
 				delay, jiffies);
 	}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a7d6347aaa79..55a5bc002c06 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -672,7 +672,6 @@ static void __reschedule_timeout(int drive, const char *message)
 
 	if (drive == current_reqD)
 		drive = current_drive;
-	__cancel_delayed_work(&fd_timeout);
 
 	if (drive < 0 || drive >= N_DRIVE) {
 		delay = 20UL * HZ;
@@ -680,7 +679,7 @@ static void __reschedule_timeout(int drive, const char *message)
 	} else
 		delay = UDP->timeout;
 
-	queue_delayed_work(floppy_wq, &fd_timeout, delay);
+	mod_delayed_work(floppy_wq, &fd_timeout, delay);
 	if (UDP->flags & FD_DEBUG)
 		DPRINT("reschedule timeout %s\n", message);
 	timeout_message = message;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b0d0bc8a6fb6..b5938147fc89 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2013,13 +2013,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
 		if (time_after(mad_agent_priv->timeout,
 			       mad_send_wr->timeout)) {
 			mad_agent_priv->timeout = mad_send_wr->timeout;
-			__cancel_delayed_work(&mad_agent_priv->timed_work);
 			delay = mad_send_wr->timeout - jiffies;
 			if ((long)delay <= 0)
 				delay = 1;
-			queue_delayed_work(mad_agent_priv->qp_info->
-					   port_priv->wq,
-					   &mad_agent_priv->timed_work, delay);
+			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+					 &mad_agent_priv->timed_work, delay);
 		}
 	}
 }
@@ -2052,11 +2050,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
 	list_add(&mad_send_wr->agent_list, list_item);
 
 	/* Reschedule a work item if we have a shorter timeout */
-	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
-		__cancel_delayed_work(&mad_agent_priv->timed_work);
-		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
-				   &mad_agent_priv->timed_work, delay);
-	}
+	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
+		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+				 &mad_agent_priv->timed_work, delay);
 }
 
 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c
index e7a5e36e1203..76b7d430d03a 100644
--- a/drivers/input/keyboard/qt2160.c
+++ b/drivers/input/keyboard/qt2160.c
@@ -156,8 +156,7 @@ static irqreturn_t qt2160_irq(int irq, void *_qt2160)
 
 	spin_lock_irqsave(&qt2160->lock, flags);
 
-	__cancel_delayed_work(&qt2160->dwork);
-	schedule_delayed_work(&qt2160->dwork, 0);
+	mod_delayed_work(system_wq, &qt2160->dwork, 0);
 
 	spin_unlock_irqrestore(&qt2160->lock, flags);
 
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index f14675702c0f..063a174d3a88 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -376,12 +376,7 @@ static void synaptics_i2c_reschedule_work(struct synaptics_i2c *touch,
 
 	spin_lock_irqsave(&touch->lock, flags);
 
-	/*
-	 * If work is already scheduled then subsequent schedules will not
-	 * change the scheduled time that's why we have to cancel it first.
-	 */
-	__cancel_delayed_work(&touch->dwork);
-	schedule_delayed_work(&touch->dwork, delay);
+	mod_delayed_work(system_wq, &touch->dwork, delay);
 
 	spin_unlock_irqrestore(&touch->lock, flags);
 }
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c3519c6d1b16..8e397a69005a 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -120,22 +120,13 @@ static void linkwatch_schedule_work(int urgent)
 		delay = 0;
 
 	/*
-	 * This is true if we've scheduled it immeditately or if we don't
-	 * need an immediate execution and it's already pending.
+	 * If urgent, schedule immediate execution; otherwise, don't
+	 * override the existing timer.
 	 */
-	if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
-		return;
-
-	/* Don't bother if there is nothing urgent. */
-	if (!test_bit(LW_URGENT, &linkwatch_flags))
-		return;
-
-	/* It's already running which is good enough. */
-	if (!__cancel_delayed_work(&linkwatch_work))
-		return;
-
-	/* Otherwise we reschedule it again for immediate execution. */
-	schedule_delayed_work(&linkwatch_work, 0);
+	if (test_bit(LW_URGENT, &linkwatch_flags))
+		mod_delayed_work(system_wq, &linkwatch_work, 0);
+	else
+		schedule_delayed_work(&linkwatch_work, delay);
 }
 
 
-- 
cgit v1.2.3


From 57b30ae77bf00d2318df711ef9a4d2a9be0a3a2a Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: reimplement cancel_delayed_work() using
 try_to_grab_pending()

cancel_delayed_work() can't be called from IRQ handlers due to its use
of del_timer_sync() and can't cancel work items which are already
transferred from timer to worklist.

Also, unlike other flush and cancel functions, a canceled delayed_work
would still point to the last associated cpu_workqueue.  If the
workqueue is destroyed afterwards and the work item is re-used on a
different workqueue, the queueing code can oops trying to dereference
already freed cpu_workqueue.

This patch reimplements cancel_delayed_work() using
try_to_grab_pending() and set_work_cpu_and_clear_pending().  This
allows the function to be called from IRQ handlers and makes its
behavior consistent with other flush / cancel functions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 17 +----------------
 kernel/workqueue.c        | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d86b320319e0..4898289564ab 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -420,6 +420,7 @@ extern bool flush_work(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work(struct delayed_work *dwork);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
@@ -428,22 +429,6 @@ extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
 extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
-/*
- * Kill off a pending schedule_delayed_work().  Note that the work callback
- * function may still be running on return from cancel_delayed_work(), unless
- * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
- * cancel_work_sync() to wait on it.
- */
-static inline bool cancel_delayed_work(struct delayed_work *work)
-{
-	bool ret;
-
-	ret = del_timer_sync(&work->timer);
-	if (ret)
-		work_clear_pending(&work->work);
-	return ret;
-}
-
 /*
  * Like above, but uses del_timer() instead of del_timer_sync(). This means,
  * if it returns 0 the timer function may be running and the queueing is in
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b394df8beaee..039d0fae171a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2948,6 +2948,36 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
+/**
+ * cancel_delayed_work - cancel a delayed work
+ * @dwork: delayed_work to cancel
+ *
+ * Kill off a pending delayed_work.  Returns %true if @dwork was pending
+ * and canceled; %false if wasn't pending.  Note that the work callback
+ * function may still be running on return, unless it returns %true and the
+ * work doesn't re-arm itself.  Explicitly flush or use
+ * cancel_delayed_work_sync() to wait on it.
+ *
+ * This function is safe to call from any context including IRQ handler.
+ */
+bool cancel_delayed_work(struct delayed_work *dwork)
+{
+	unsigned long flags;
+	int ret;
+
+	do {
+		ret = try_to_grab_pending(&dwork->work, true, &flags);
+	} while (unlikely(ret == -EAGAIN));
+
+	if (unlikely(ret < 0))
+		return false;
+
+	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	local_irq_restore(flags);
+	return true;
+}
+EXPORT_SYMBOL(cancel_delayed_work);
+
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From 136b5721d75a62a8f02c601c89122e32c1a85a84 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: deprecate __cancel_delayed_work()

Now that cancel_delayed_work() can be safely called from IRQ handlers,
there's no reason to use __cancel_delayed_work().  Use
cancel_delayed_work() instead of __cancel_delayed_work() and mark the
latter deprecated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Roland Dreier <roland@kernel.org>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 block/blk-core.c              | 2 +-
 drivers/block/floppy.c        | 2 +-
 drivers/infiniband/core/mad.c | 2 +-
 drivers/video/omap2/dss/dsi.c | 2 +-
 include/linux/workqueue.h     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4b8b606dbb01..dc04a9013027 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -262,7 +262,7 @@ EXPORT_SYMBOL(blk_start_queue);
  **/
 void blk_stop_queue(struct request_queue *q)
 {
-	__cancel_delayed_work(&q->delay_work);
+	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 55a5bc002c06..17c675c52295 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -890,7 +890,7 @@ static void unlock_fdc(void)
 
 	raw_cmd = NULL;
 	command_status = FD_COMMAND_NONE;
-	__cancel_delayed_work(&fd_timeout);
+	cancel_delayed_work(&fd_timeout);
 	do_floppy = NULL;
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b5938147fc89..dc3fd1e8af07 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2004,7 +2004,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
 	unsigned long delay;
 
 	if (list_empty(&mad_agent_priv->wait_list)) {
-		__cancel_delayed_work(&mad_agent_priv->timed_work);
+		cancel_delayed_work(&mad_agent_priv->timed_work);
 	} else {
 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
 					 struct ib_mad_send_wr_private,
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index fd40f2625051..05ee04667af1 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -4306,7 +4306,7 @@ static void dsi_framedone_irq_callback(void *data, u32 mask)
 	 * and is sending the data.
 	 */
 
-	__cancel_delayed_work(&dsi->framedone_timeout_work);
+	cancel_delayed_work(&dsi->framedone_timeout_work);
 
 	dsi_handle_framedone(dsidev, 0);
 }
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4898289564ab..2b58905d3504 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -434,7 +434,7 @@ extern unsigned int work_busy(struct work_struct *work);
  * if it returns 0 the timer function may be running and the queueing is in
  * progress.
  */
-static inline bool __cancel_delayed_work(struct delayed_work *work)
+static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work)
 {
 	bool ret;
 
-- 
cgit v1.2.3


From ea1abd6197d5805655da1bb589929762f4b4aa08 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:22 -0700
Subject: workqueue: reimplement idle worker rebinding

Currently rebind_workers() uses rebinds idle workers synchronously
before proceeding to requesting busy workers to rebind.  This is
necessary because all workers on @worker_pool->idle_list must be bound
before concurrency management local wake-ups from the busy workers
take place.

Unfortunately, the synchronous idle rebinding is quite complicated.
This patch reimplements idle rebinding to simplify the code path.

Rather than trying to make all idle workers bound before rebinding
busy workers, we simply remove all to-be-bound idle workers from the
idle list and let them add themselves back after completing rebinding
(successful or not).

As only workers which finished rebinding can on on the idle worker
list, the idle worker list is guaranteed to have only bound workers
unless CPU went down again and local wake-ups are safe.

After the change, @worker_pool->nr_idle may deviate than the actual
number of idle workers on @worker_pool->idle_list.  More specifically,
nr_idle may be non-zero while ->idle_list is empty.  All users of
->nr_idle and ->idle_list are audited.  The only affected one is
too_many_workers() which is updated to check %false if ->idle_list is
empty regardless of ->nr_idle.

After this patch, rebind_workers() no longer performs the nasty
idle-rebind retries which require temporary release of gcwq->lock, and
both unbinding and rebinding are atomic w.r.t. global_cwq->lock.

worker->idle_rebind and global_cwq->rebind_hold are now unnecessary
and removed along with the definition of struct idle_rebind.

Changed from V1:
	1) remove unlikely from too_many_workers(), ->idle_list can be empty
	   anytime, even before this patch, no reason to use unlikely.
	2) fix a small rebasing mistake.
	   (which is from rebasing the orignal fixing patch to for-next)
	3) add a lot of comments.
	4) clear WORKER_REBIND unconditionaly in idle_worker_rebind()

tj: Updated comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 141 ++++++++++++++++-------------------------------------
 1 file changed, 42 insertions(+), 99 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 31d8a4586d4c..770c1a8128bf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -126,7 +126,6 @@ enum {
 
 struct global_cwq;
 struct worker_pool;
-struct idle_rebind;
 
 /*
  * The poor guys doing the actual heavy lifting.  All on-duty workers
@@ -150,7 +149,6 @@ struct worker {
 	int			id;		/* I: worker id */
 
 	/* for rebinding worker to CPU */
-	struct idle_rebind	*idle_rebind;	/* L: for idle worker */
 	struct work_struct	rebind_work;	/* L: for busy worker */
 };
 
@@ -160,6 +158,8 @@ struct worker_pool {
 
 	struct list_head	worklist;	/* L: list of pending works */
 	int			nr_workers;	/* L: total number of workers */
+
+	/* nr_idle includes the ones off idle_list for rebinding */
 	int			nr_idle;	/* L: currently idle ones */
 
 	struct list_head	idle_list;	/* X: list of idle workers */
@@ -186,8 +186,6 @@ struct global_cwq {
 
 	struct worker_pool	pools[NR_WORKER_POOLS];
 						/* normal and highpri pools */
-
-	wait_queue_head_t	rebind_hold;	/* rebind hold wait */
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -687,6 +685,13 @@ static bool too_many_workers(struct worker_pool *pool)
 	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 	int nr_busy = pool->nr_workers - nr_idle;
 
+	/*
+	 * nr_idle and idle_list may disagree if idle rebinding is in
+	 * progress.  Never return %true if idle_list is empty.
+	 */
+	if (list_empty(&pool->idle_list))
+		return false;
+
 	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
 }
 
@@ -1611,37 +1616,26 @@ __acquires(&gcwq->lock)
 	}
 }
 
-struct idle_rebind {
-	int			cnt;		/* # workers to be rebound */
-	struct completion	done;		/* all workers rebound */
-};
-
 /*
- * Rebind an idle @worker to its CPU.  During CPU onlining, this has to
- * happen synchronously for idle workers.  worker_thread() will test
+ * Rebind an idle @worker to its CPU.  worker_thread() will test
  * %WORKER_REBIND before leaving idle and call this function.
  */
 static void idle_worker_rebind(struct worker *worker)
 {
 	struct global_cwq *gcwq = worker->pool->gcwq;
 
-	/* CPU must be online at this point */
-	WARN_ON(!worker_maybe_bind_and_lock(worker));
-	if (!--worker->idle_rebind->cnt)
-		complete(&worker->idle_rebind->done);
-	spin_unlock_irq(&worker->pool->gcwq->lock);
-
-	/* we did our part, wait for rebind_workers() to finish up */
-	wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
-
 	/*
-	 * rebind_workers() shouldn't finish until all workers passed the
-	 * above WORKER_REBIND wait.  Tell it when done.
+	 * CPU may go down again inbetween.  If rebinding fails, reinstate
+	 * UNBOUND.  We're off idle_list and nobody else can do it for us.
 	 */
-	spin_lock_irq(&worker->pool->gcwq->lock);
-	if (!--worker->idle_rebind->cnt)
-		complete(&worker->idle_rebind->done);
-	spin_unlock_irq(&worker->pool->gcwq->lock);
+	if (!worker_maybe_bind_and_lock(worker))
+		worker->flags |= WORKER_UNBOUND;
+
+	worker_clr_flags(worker, WORKER_REBIND);
+
+	/* rebind complete, become available again */
+	list_add(&worker->entry, &worker->pool->idle_list);
+	spin_unlock_irq(&gcwq->lock);
 }
 
 /*
@@ -1676,29 +1670,25 @@ static void busy_worker_rebind_fn(struct work_struct *work)
  * @gcwq->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
  * is different for idle and busy ones.
  *
- * The idle ones should be rebound synchronously and idle rebinding should
- * be complete before any worker starts executing work items with
- * concurrency management enabled; otherwise, scheduler may oops trying to
- * wake up non-local idle worker from wq_worker_sleeping().
- *
- * This is achieved by repeatedly requesting rebinding until all idle
- * workers are known to have been rebound under @gcwq->lock and holding all
- * idle workers from becoming busy until idle rebinding is complete.
+ * Idle ones will be removed from the idle_list and woken up.  They will
+ * add themselves back after completing rebind.  This ensures that the
+ * idle_list doesn't contain any unbound workers when re-bound busy workers
+ * try to perform local wake-ups for concurrency management.
  *
- * Once idle workers are rebound, busy workers can be rebound as they
- * finish executing their current work items.  Queueing the rebind work at
- * the head of their scheduled lists is enough.  Note that nr_running will
- * be properbly bumped as busy workers rebind.
+ * Busy workers can rebind after they finish their current work items.
+ * Queueing the rebind work item at the head of the scheduled list is
+ * enough.  Note that nr_running will be properly bumped as busy workers
+ * rebind.
  *
- * On return, all workers are guaranteed to either be bound or have rebind
- * work item scheduled.
+ * On return, all non-manager workers are scheduled for rebind - see
+ * manage_workers() for the manager special case.  Any idle worker
+ * including the manager will not appear on @idle_list until rebind is
+ * complete, making local wake-ups safe.
  */
 static void rebind_workers(struct global_cwq *gcwq)
-	__releases(&gcwq->lock) __acquires(&gcwq->lock)
 {
-	struct idle_rebind idle_rebind;
 	struct worker_pool *pool;
-	struct worker *worker;
+	struct worker *worker, *n;
 	struct hlist_node *pos;
 	int i;
 
@@ -1707,46 +1697,29 @@ static void rebind_workers(struct global_cwq *gcwq)
 	for_each_worker_pool(pool, gcwq)
 		lockdep_assert_held(&pool->manager_mutex);
 
-	/*
-	 * Rebind idle workers.  Interlocked both ways.  We wait for
-	 * workers to rebind via @idle_rebind.done.  Workers will wait for
-	 * us to finish up by watching %WORKER_REBIND.
-	 */
-	init_completion(&idle_rebind.done);
-retry:
-	idle_rebind.cnt = 1;
-	INIT_COMPLETION(idle_rebind.done);
-
-	/* set REBIND and kick idle ones, we'll wait for these later */
+	/* set REBIND and kick idle ones */
 	for_each_worker_pool(pool, gcwq) {
-		list_for_each_entry(worker, &pool->idle_list, entry) {
+		list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
 			unsigned long worker_flags = worker->flags;
 
-			if (worker->flags & WORKER_REBIND)
-				continue;
-
 			/* morph UNBOUND to REBIND atomically */
 			worker_flags &= ~WORKER_UNBOUND;
 			worker_flags |= WORKER_REBIND;
 			ACCESS_ONCE(worker->flags) = worker_flags;
 
-			idle_rebind.cnt++;
-			worker->idle_rebind = &idle_rebind;
+			/*
+			 * idle workers should be off @pool->idle_list
+			 * until rebind is complete to avoid receiving
+			 * premature local wake-ups.
+			 */
+			list_del_init(&worker->entry);
 
 			/* worker_thread() will call idle_worker_rebind() */
 			wake_up_process(worker->task);
 		}
 	}
 
-	if (--idle_rebind.cnt) {
-		spin_unlock_irq(&gcwq->lock);
-		wait_for_completion(&idle_rebind.done);
-		spin_lock_irq(&gcwq->lock);
-		/* busy ones might have become idle while waiting, retry */
-		goto retry;
-	}
-
-	/* all idle workers are rebound, rebind busy workers */
+	/* rebind busy workers */
 	for_each_busy_worker(worker, i, pos, gcwq) {
 		unsigned long worker_flags = worker->flags;
 		struct work_struct *rebind_work = &worker->rebind_work;
@@ -1776,34 +1749,6 @@ retry:
 			worker->scheduled.next,
 			work_color_to_flags(WORK_NO_COLOR));
 	}
-
-	/*
-	 * All idle workers are rebound and waiting for %WORKER_REBIND to
-	 * be cleared inside idle_worker_rebind().  Clear and release.
-	 * Clearing %WORKER_REBIND from this foreign context is safe
-	 * because these workers are still guaranteed to be idle.
-	 *
-	 * We need to make sure all idle workers passed WORKER_REBIND wait
-	 * in idle_worker_rebind() before returning; otherwise, workers can
-	 * get stuck at the wait if hotplug cycle repeats.
-	 */
-	idle_rebind.cnt = 1;
-	INIT_COMPLETION(idle_rebind.done);
-
-	for_each_worker_pool(pool, gcwq) {
-		list_for_each_entry(worker, &pool->idle_list, entry) {
-			worker->flags &= ~WORKER_REBIND;
-			idle_rebind.cnt++;
-		}
-	}
-
-	wake_up_all(&gcwq->rebind_hold);
-
-	if (--idle_rebind.cnt) {
-		spin_unlock_irq(&gcwq->lock);
-		wait_for_completion(&idle_rebind.done);
-		spin_lock_irq(&gcwq->lock);
-	}
 }
 
 static struct worker *alloc_worker(void)
@@ -3916,8 +3861,6 @@ static int __init init_workqueues(void)
 			mutex_init(&pool->manager_mutex);
 			ida_init(&pool->worker_ida);
 		}
-
-		init_waitqueue_head(&gcwq->rebind_hold);
 	}
 
 	/* create the initial worker */
-- 
cgit v1.2.3


From eab6d82843ee1df244f8847d1bf8bb89160ec4aa Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:22 -0700
Subject: workqueue: WORKER_REBIND is no longer necessary for busy rebinding

Because the old unbind/rebinding implementation wasn't atomic w.r.t.
GCWQ_DISASSOCIATED manipulation which is protected by
global_cwq->lock, we had to use two flags, WORKER_UNBOUND and
WORKER_REBIND, to avoid incorrectly losing all NOT_RUNNING bits with
back-to-back CPU hotplug operations; otherwise, completion of
rebinding while another unbinding is in progress could clear UNBIND
prematurely.

Now that both unbind/rebinding are atomic w.r.t. GCWQ_DISASSOCIATED,
there's no need to use two flags.  Just one is enough.  Don't use
WORKER_REBIND for busy rebinding.

tj: Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 770c1a8128bf..794724efb733 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1649,16 +1649,8 @@ static void busy_worker_rebind_fn(struct work_struct *work)
 	struct worker *worker = container_of(work, struct worker, rebind_work);
 	struct global_cwq *gcwq = worker->pool->gcwq;
 
-	worker_maybe_bind_and_lock(worker);
-
-	/*
-	 * %WORKER_REBIND must be cleared even if the above binding failed;
-	 * otherwise, we may confuse the next CPU_UP cycle or oops / get
-	 * stuck by calling idle_worker_rebind() prematurely.  If CPU went
-	 * down again inbetween, %WORKER_UNBOUND would be set, so clearing
-	 * %WORKER_REBIND is always safe.
-	 */
-	worker_clr_flags(worker, WORKER_REBIND);
+	if (worker_maybe_bind_and_lock(worker))
+		worker_clr_flags(worker, WORKER_UNBOUND);
 
 	spin_unlock_irq(&gcwq->lock);
 }
@@ -1721,15 +1713,9 @@ static void rebind_workers(struct global_cwq *gcwq)
 
 	/* rebind busy workers */
 	for_each_busy_worker(worker, i, pos, gcwq) {
-		unsigned long worker_flags = worker->flags;
 		struct work_struct *rebind_work = &worker->rebind_work;
 		struct workqueue_struct *wq;
 
-		/* morph UNBOUND to REBIND atomically */
-		worker_flags &= ~WORKER_UNBOUND;
-		worker_flags |= WORKER_REBIND;
-		ACCESS_ONCE(worker->flags) = worker_flags;
-
 		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
 				     work_data_bits(rebind_work)))
 			continue;
-- 
cgit v1.2.3


From 5f7dabfd5cb115937afb4649e4c73b02f927f6ae Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:23 -0700
Subject: workqueue: WORKER_REBIND is no longer necessary for idle rebinding

Now both worker destruction and idle rebinding remove the worker from
idle list while it's still idle, so list_empty(&worker->entry) can be
used to test whether either is pending and WORKER_DIE to distinguish
between the two instead making WORKER_REBIND unnecessary.

Use list_empty(&worker->entry) to determine whether destruction or
rebinding is pending.  This simplifies worker state transitions.

WORKER_REBIND is not needed anymore.  Remove it.

tj: Updated comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 41 +++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 794724efb733..cdc6bfc84b78 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -73,11 +73,10 @@ enum {
 	WORKER_DIE		= 1 << 1,	/* die die die */
 	WORKER_IDLE		= 1 << 2,	/* is idle */
 	WORKER_PREP		= 1 << 3,	/* preparing to run works */
-	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
 	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
 	WORKER_UNBOUND		= 1 << 7,	/* worker is unbound */
 
-	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_REBIND | WORKER_UNBOUND |
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_UNBOUND |
 				  WORKER_CPU_INTENSIVE,
 
 	NR_WORKER_POOLS		= 2,		/* # worker pools per gcwq */
@@ -1618,20 +1617,15 @@ __acquires(&gcwq->lock)
 
 /*
  * Rebind an idle @worker to its CPU.  worker_thread() will test
- * %WORKER_REBIND before leaving idle and call this function.
+ * list_empty(@worker->entry) before leaving idle and call this function.
  */
 static void idle_worker_rebind(struct worker *worker)
 {
 	struct global_cwq *gcwq = worker->pool->gcwq;
 
-	/*
-	 * CPU may go down again inbetween.  If rebinding fails, reinstate
-	 * UNBOUND.  We're off idle_list and nobody else can do it for us.
-	 */
-	if (!worker_maybe_bind_and_lock(worker))
-		worker->flags |= WORKER_UNBOUND;
-
-	worker_clr_flags(worker, WORKER_REBIND);
+	/* CPU may go down again inbetween, clear UNBOUND only on success */
+	if (worker_maybe_bind_and_lock(worker))
+		worker_clr_flags(worker, WORKER_UNBOUND);
 
 	/* rebind complete, become available again */
 	list_add(&worker->entry, &worker->pool->idle_list);
@@ -1689,16 +1683,9 @@ static void rebind_workers(struct global_cwq *gcwq)
 	for_each_worker_pool(pool, gcwq)
 		lockdep_assert_held(&pool->manager_mutex);
 
-	/* set REBIND and kick idle ones */
+	/* dequeue and kick idle ones */
 	for_each_worker_pool(pool, gcwq) {
 		list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
-			unsigned long worker_flags = worker->flags;
-
-			/* morph UNBOUND to REBIND atomically */
-			worker_flags &= ~WORKER_UNBOUND;
-			worker_flags |= WORKER_REBIND;
-			ACCESS_ONCE(worker->flags) = worker_flags;
-
 			/*
 			 * idle workers should be off @pool->idle_list
 			 * until rebind is complete to avoid receiving
@@ -1706,7 +1693,10 @@ static void rebind_workers(struct global_cwq *gcwq)
 			 */
 			list_del_init(&worker->entry);
 
-			/* worker_thread() will call idle_worker_rebind() */
+			/*
+			 * worker_thread() will see the above dequeuing
+			 * and call idle_worker_rebind().
+			 */
 			wake_up_process(worker->task);
 		}
 	}
@@ -2176,7 +2166,7 @@ __acquires(&gcwq->lock)
 	 * necessary to avoid spurious warnings from rescuers servicing the
 	 * unbound or a disassociated gcwq.
 	 */
-	WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) &&
+	WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
 		     !(gcwq->flags & GCWQ_DISASSOCIATED) &&
 		     raw_smp_processor_id() != gcwq->cpu);
 
@@ -2300,18 +2290,17 @@ static int worker_thread(void *__worker)
 woke_up:
 	spin_lock_irq(&gcwq->lock);
 
-	/*
-	 * DIE can be set only while idle and REBIND set while busy has
-	 * @worker->rebind_work scheduled.  Checking here is enough.
-	 */
-	if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) {
+	/* we are off idle list if destruction or rebind is requested */
+	if (unlikely(list_empty(&worker->entry))) {
 		spin_unlock_irq(&gcwq->lock);
 
+		/* if DIE is set, destruction is requested */
 		if (worker->flags & WORKER_DIE) {
 			worker->task->flags &= ~PF_WQ_WORKER;
 			return 0;
 		}
 
+		/* otherwise, rebind */
 		idle_worker_rebind(worker);
 		goto woke_up;
 	}
-- 
cgit v1.2.3


From b2eb83d123c1cc9f96a8e452b26a6ebe631b3ad7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:23 -0700
Subject: workqueue: rename manager_mutex to assoc_mutex

Now that manager_mutex's role has changed from synchronizing manager
role to excluding hotplug against manager, the name is misleading.

As it is protecting the CPU-association of the gcwq now, rename it to
assoc_mutex.

This patch is pure rename and doesn't introduce any functional change.

tj: Updated comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cdc6bfc84b78..e651239f1ece 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -58,7 +58,7 @@ enum {
 	 * be executing on any CPU.  The gcwq behaves as an unbound one.
 	 *
 	 * Note that DISASSOCIATED can be flipped only while holding
-	 * managership of all pools on the gcwq to avoid changing binding
+	 * assoc_mutex of all pools on the gcwq to avoid changing binding
 	 * state while create_worker() is in progress.
 	 */
 	GCWQ_DISASSOCIATED	= 1 << 0,	/* cpu can't serve workers */
@@ -165,7 +165,7 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
-	struct mutex		manager_mutex;	/* mutex manager should hold */
+	struct mutex		assoc_mutex;	/* protect GCWQ_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 };
 
@@ -1681,7 +1681,7 @@ static void rebind_workers(struct global_cwq *gcwq)
 	lockdep_assert_held(&gcwq->lock);
 
 	for_each_worker_pool(pool, gcwq)
-		lockdep_assert_held(&pool->manager_mutex);
+		lockdep_assert_held(&pool->assoc_mutex);
 
 	/* dequeue and kick idle ones */
 	for_each_worker_pool(pool, gcwq) {
@@ -2081,22 +2081,22 @@ static bool manage_workers(struct worker *worker)
 	 * grab %POOL_MANAGING_WORKERS to achieve this because that can
 	 * lead to idle worker depletion (all become busy thinking someone
 	 * else is managing) which in turn can result in deadlock under
-	 * extreme circumstances.  Use @pool->manager_mutex to synchronize
+	 * extreme circumstances.  Use @pool->assoc_mutex to synchronize
 	 * manager against CPU hotplug.
 	 *
-	 * manager_mutex would always be free unless CPU hotplug is in
+	 * assoc_mutex would always be free unless CPU hotplug is in
 	 * progress.  trylock first without dropping @gcwq->lock.
 	 */
-	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
+	if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
 		spin_unlock_irq(&pool->gcwq->lock);
-		mutex_lock(&pool->manager_mutex);
+		mutex_lock(&pool->assoc_mutex);
 		/*
 		 * CPU hotplug could have happened while we were waiting
-		 * for manager_mutex.  Hotplug itself can't handle us
+		 * for assoc_mutex.  Hotplug itself can't handle us
 		 * because manager isn't either on idle or busy list, and
 		 * @gcwq's state and ours could have deviated.
 		 *
-		 * As hotplug is now excluded via manager_mutex, we can
+		 * As hotplug is now excluded via assoc_mutex, we can
 		 * simply try to bind.  It will succeed or fail depending
 		 * on @gcwq's current state.  Try it and adjust
 		 * %WORKER_UNBOUND accordingly.
@@ -2119,7 +2119,7 @@ static bool manage_workers(struct worker *worker)
 	ret |= maybe_create_worker(pool);
 
 	pool->flags &= ~POOL_MANAGING_WORKERS;
-	mutex_unlock(&pool->manager_mutex);
+	mutex_unlock(&pool->assoc_mutex);
 	return ret;
 }
 
@@ -3474,23 +3474,23 @@ EXPORT_SYMBOL_GPL(work_busy);
  */
 
 /* claim manager positions of all pools */
-static void gcwq_claim_management_and_lock(struct global_cwq *gcwq)
+static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
 {
 	struct worker_pool *pool;
 
 	for_each_worker_pool(pool, gcwq)
-		mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools);
+		mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
 	spin_lock_irq(&gcwq->lock);
 }
 
 /* release manager positions */
-static void gcwq_release_management_and_unlock(struct global_cwq *gcwq)
+static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
 {
 	struct worker_pool *pool;
 
 	spin_unlock_irq(&gcwq->lock);
 	for_each_worker_pool(pool, gcwq)
-		mutex_unlock(&pool->manager_mutex);
+		mutex_unlock(&pool->assoc_mutex);
 }
 
 static void gcwq_unbind_fn(struct work_struct *work)
@@ -3503,7 +3503,7 @@ static void gcwq_unbind_fn(struct work_struct *work)
 
 	BUG_ON(gcwq->cpu != smp_processor_id());
 
-	gcwq_claim_management_and_lock(gcwq);
+	gcwq_claim_assoc_and_lock(gcwq);
 
 	/*
 	 * We've claimed all manager positions.  Make all workers unbound
@@ -3520,7 +3520,7 @@ static void gcwq_unbind_fn(struct work_struct *work)
 
 	gcwq->flags |= GCWQ_DISASSOCIATED;
 
-	gcwq_release_management_and_unlock(gcwq);
+	gcwq_release_assoc_and_unlock(gcwq);
 
 	/*
 	 * Call schedule() so that we cross rq->lock and thus can guarantee
@@ -3576,10 +3576,10 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		gcwq_claim_management_and_lock(gcwq);
+		gcwq_claim_assoc_and_lock(gcwq);
 		gcwq->flags &= ~GCWQ_DISASSOCIATED;
 		rebind_workers(gcwq);
-		gcwq_release_management_and_unlock(gcwq);
+		gcwq_release_assoc_and_unlock(gcwq);
 		break;
 	}
 	return NOTIFY_OK;
@@ -3833,7 +3833,7 @@ static int __init init_workqueues(void)
 			setup_timer(&pool->mayday_timer, gcwq_mayday_timeout,
 				    (unsigned long)pool);
 
-			mutex_init(&pool->manager_mutex);
+			mutex_init(&pool->assoc_mutex);
 			ida_init(&pool->worker_ida);
 		}
 	}
-- 
cgit v1.2.3


From 9fdf9b73d61c87a9c16f101bb8bbe069d13046f5 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:23 -0700
Subject: workqueue: use __cpuinit instead of __devinit for cpu callbacks

For workqueue hotplug callbacks, it makes less sense to use __devinit
which discards the memory after boot if !HOTPLUG.  __cpuinit, which
discards the memory after boot if !HOTPLUG_CPU fits better.

tj: Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e651239f1ece..942bb750a650 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3548,7 +3548,7 @@ static void gcwq_unbind_fn(struct work_struct *work)
  * Workqueues should be brought up before normal priority CPU notifiers.
  * This will be registered high priority CPU notifier.
  */
-static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
+static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 					       unsigned long action,
 					       void *hcpu)
 {
@@ -3589,7 +3589,7 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
  * Workqueues should be brought down after normal priority CPU notifiers.
  * This will be registered as low priority CPU notifier.
  */
-static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 unsigned long action,
 						 void *hcpu)
 {
-- 
cgit v1.2.3


From a5b4e57d7cc07cb28ccf16de0876a4770ae84920 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 09:59:23 -0700
Subject: workqueue: use hotcpu_notifier() for workqueue_cpu_down_callback()

workqueue_cpu_down_callback() is used only if HOTPLUG_CPU=y, so
hotcpu_notifier() fits better than cpu_notifier().

When HOTPLUG_CPU=y, hotcpu_notifier() and cpu_notifier() are the same.

When HOTPLUG_CPU=n, if we use cpu_notifier(),
workqueue_cpu_down_callback() will be called during boot to do
nothing, and the memory of workqueue_cpu_down_callback() and
gcwq_unbind_fn() will be discarded after boot.

If we use hotcpu_notifier(), we can avoid the no-op call of
workqueue_cpu_down_callback() and the memory of
workqueue_cpu_down_callback() and gcwq_unbind_fn() will be discard at
build time:

$ ls -l kernel/workqueue.o.cpu_notifier kernel/workqueue.o.hotcpu_notifier
-rw-rw-r-- 1 laijs laijs 484080 Sep 15 11:31 kernel/workqueue.o.cpu_notifier
-rw-rw-r-- 1 laijs laijs 478240 Sep 15 11:31 kernel/workqueue.o.hotcpu_notifier

$ size kernel/workqueue.o.cpu_notifier kernel/workqueue.o.hotcpu_notifier
   text	   data	    bss	    dec	    hex	filename
  18513	   2387	   1221	  22121	   5669	kernel/workqueue.o.cpu_notifier
  18082	   2355	   1221	  21658	   549a	kernel/workqueue.o.hotcpu_notifier

tj: Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 942bb750a650..48becaba1c94 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3807,7 +3807,7 @@ static int __init init_workqueues(void)
 		     WORK_CPU_LAST);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
-	cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
+	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
 	/* initialize gcwqs */
 	for_each_gcwq_cpu(cpu) {
-- 
cgit v1.2.3


From 3aa62497594430ea522050b75c033f71f2c60ee6 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 10:40:00 -0700
Subject: workqueue: fix possible stall on try_to_grab_pending() of a delayed
 work item

Currently, when try_to_grab_pending() grabs a delayed work item, it
leaves its linked work items alone on the delayed_works.  The linked
work items are always NO_COLOR and will cause future
cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and
may cause the whole cwq to stall.  For example,

state: cwq->max_active = 1, cwq->nr_active = 1
       one work in cwq->pool, many in cwq->delayed_works.

step1: try_to_grab_pending() removes a work item from delayed_works
       but leaves its NO_COLOR linked work items on it.

step2: Later on, cwq_activate_first_delayed() activates the linked
       work item increasing ->nr_active.

step3: cwq->nr_active = 1, but all activated work items of the cwq are
       NO_COLOR.  When they finish, cwq->nr_active will not be
       decreased due to NO_COLOR, and no further work items will be
       activated from cwq->delayed_works. the cwq stalls.

Fix it by ensuring the target work item is activated before stealing
PENDING in try_to_grab_pending().  This ensures that all the linked
work items are activated without incorrectly bumping cwq->nr_active.

tj: Updated comment and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@kernel.org
---
 kernel/workqueue.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 48becaba1c94..d2fe8e77ceb7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -977,10 +977,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void cwq_activate_delayed_work(struct work_struct *work)
 {
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
-						    struct work_struct, entry);
+	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 
 	trace_workqueue_activate_work(work);
 	move_linked_works(work, &cwq->pool->worklist, NULL);
@@ -988,6 +987,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 	cwq->nr_active++;
 }
 
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+						    struct work_struct, entry);
+
+	cwq_activate_delayed_work(work);
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -1106,6 +1113,18 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 		smp_rmb();
 		if (gcwq == get_work_gcwq(work)) {
 			debug_work_deactivate(work);
+
+			/*
+			 * A delayed work item cannot be grabbed directly
+			 * because it might have linked NO_COLOR work items
+			 * which, if left on the delayed_list, will confuse
+			 * cwq->nr_active management later on and cause
+			 * stall.  Make sure the work item is activated
+			 * before grabbing.
+			 */
+			if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
+				cwq_activate_delayed_work(work);
+
 			list_del_init(&work->entry);
 			cwq_dec_nr_in_flight(get_work_cwq(work),
 				get_work_color(work),
-- 
cgit v1.2.3


From b3f9f405a21a29c06c31fb2d6ab36ef9ba7c027b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Tue, 18 Sep 2012 10:40:00 -0700
Subject: workqueue: remove @delayed from cwq_dec_nr_in_flight()

@delayed is now always false for all callers, remove it.

tj: Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d2fe8e77ceb7..3e324aae3c98 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -999,7 +999,6 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
  * @color: color of work which left the queue
- * @delayed: for a delayed work
  *
  * A work either has completed or is removed from pending queue,
  * decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1007,8 +1006,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * CONTEXT:
  * spin_lock_irq(gcwq->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
-				 bool delayed)
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
@@ -1016,13 +1014,11 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
 
 	cwq->nr_in_flight[color]--;
 
-	if (!delayed) {
-		cwq->nr_active--;
-		if (!list_empty(&cwq->delayed_works)) {
-			/* one down, submit a delayed one */
-			if (cwq->nr_active < cwq->max_active)
-				cwq_activate_first_delayed(cwq);
-		}
+	cwq->nr_active--;
+	if (!list_empty(&cwq->delayed_works)) {
+		/* one down, submit a delayed one */
+		if (cwq->nr_active < cwq->max_active)
+			cwq_activate_first_delayed(cwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -1127,8 +1123,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 
 			list_del_init(&work->entry);
 			cwq_dec_nr_in_flight(get_work_cwq(work),
-				get_work_color(work),
-				*work_data_bits(work) & WORK_STRUCT_DELAYED);
+				get_work_color(work));
 
 			spin_unlock(&gcwq->lock);
 			return 1;
@@ -2264,7 +2259,7 @@ __acquires(&gcwq->lock)
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color, false);
+	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
 /**
-- 
cgit v1.2.3


From 9f4bd4cddbb50d7617353102e10ce511c5ef6df2 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 19 Sep 2012 10:40:48 -0700
Subject: workqueue: introduce cwq_set_max_active() helper for
 thaw_workqueues()

Using a helper instead of open code makes thaw_workqueues() clearer.
The helper will also be used by the next patch.

tj: Slight update to comment and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3e324aae3c98..b5d722548ffd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3366,6 +3366,26 @@ void destroy_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
+/**
+ * cwq_set_max_active - adjust max_active of a cwq
+ * @cwq: target cpu_workqueue_struct
+ * @max_active: new max_active value.
+ *
+ * Set @cwq->max_active to @max_active and activate delayed works if
+ * increased.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
+{
+	cwq->max_active = max_active;
+
+	while (!list_empty(&cwq->delayed_works) &&
+	       cwq->nr_active < cwq->max_active)
+		cwq_activate_first_delayed(cwq);
+}
+
 /**
  * workqueue_set_max_active - adjust max_active of a workqueue
  * @wq: target workqueue
@@ -3792,11 +3812,7 @@ void thaw_workqueues(void)
 				continue;
 
 			/* restore max_active and repopulate worklist */
-			cwq->max_active = wq->saved_max_active;
-
-			while (!list_empty(&cwq->delayed_works) &&
-			       cwq->nr_active < cwq->max_active)
-				cwq_activate_first_delayed(cwq);
+			cwq_set_max_active(cwq, wq->saved_max_active);
 		}
 
 		for_each_worker_pool(pool, gcwq)
-- 
cgit v1.2.3


From 70369b117a8fc5ac18a635ced23ee49f8e722e7b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 19 Sep 2012 10:40:48 -0700
Subject: workqueue: use cwq_set_max_active() helper for
 workqueue_set_max_active()

workqueue_set_max_active() may increase ->max_active without
activating delayed works and may make the activation order differ from
the queueing order.  Both aren't strictly bugs but the resulting
behavior could be a bit odd.

To make things more consistent, use cwq_set_max_active() helper which
immediately makes use of the newly increased max_mactive if there are
delayed work items and also keeps the activation order.

tj: Slight update to description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b5d722548ffd..4f5c61f8b0e7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3413,7 +3413,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(gcwq->flags & GCWQ_FREEZING))
-			get_cwq(gcwq->cpu, wq)->max_active = max_active;
+			cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active);
 
 		spin_unlock_irq(&gcwq->lock);
 	}
-- 
cgit v1.2.3


From 7c6e72e46c9ea4a88f3f8ba96edce9db4bd48726 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Thu, 20 Sep 2012 10:03:19 -0700
Subject: workqueue: remove spurious WARN_ON_ONCE(in_irq()) from
 try_to_grab_pending()

e0aecdd874 ("workqueue: use irqsafe timer for delayed_work") made
try_to_grab_pending() safe to use from irq context but forgot to
remove WARN_ON_ONCE(in_irq()).  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
---
 kernel/workqueue.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4f5c61f8b0e7..143fd8c751f4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1070,8 +1070,6 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 {
 	struct global_cwq *gcwq;
 
-	WARN_ON_ONCE(in_irq());
-
 	local_irq_save(*flags);
 
 	/* try to steal the timer if it exists */
-- 
cgit v1.2.3