aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/preempt.h3
-rw-r--r--include/linux/freezer.h50
-rw-r--r--include/linux/init_task.h10
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/sched.h87
-rw-r--r--include/linux/wait.h80
-rw-r--r--include/net/sock.h1
-rw-r--r--include/trace/events/sched.h9
-rw-r--r--include/uapi/linux/sched.h4
9 files changed, 167 insertions, 82 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 1cd3f5d767a8..eb6f9e6c3075 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -23,9 +23,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
-#define task_preempt_count(p) \
- (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)
-
#define init_task_preempt_count(p) do { \
task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
} while (0)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7fd81b8c4897..6b7fd9cf5ea2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
* defined in <linux/wait.h>
*/
-#define wait_event_freezekillable(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_killable(wq, (condition)); \
- freezer_count(); \
- __retval; \
-})
-
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
#define wait_event_freezekillable_unsafe(wq, condition) \
({ \
@@ -265,35 +256,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
__retval; \
})
-#define wait_event_freezable(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible(wq, (condition)); \
- freezer_count(); \
- __retval; \
-})
-
-#define wait_event_freezable_timeout(wq, condition, timeout) \
-({ \
- long __retval = timeout; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible_timeout(wq, (condition), \
- __retval); \
- freezer_count(); \
- __retval; \
-})
-
-#define wait_event_freezable_exclusive(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible_exclusive(wq, condition); \
- freezer_count(); \
- __retval; \
-})
-
-
#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
@@ -331,18 +293,6 @@ static inline void set_freezable(void) {}
#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
schedule_hrtimeout_range(expires, delta, mode)
-#define wait_event_freezable(wq, condition) \
- wait_event_interruptible(wq, condition)
-
-#define wait_event_freezable_timeout(wq, condition, timeout) \
- wait_event_interruptible_timeout(wq, condition, timeout)
-
-#define wait_event_freezable_exclusive(wq, condition) \
- wait_event_interruptible_exclusive(wq, condition)
-
-#define wait_event_freezekillable(wq, condition) \
- wait_event_killable(wq, condition)
-
#define wait_event_freezekillable_unsafe(wq, condition) \
wait_event_killable(wq, condition)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d996aef8044f..3037fc085e8e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -166,6 +166,15 @@ extern struct task_group root_task_group;
# define INIT_RT_MUTEXES(tsk)
#endif
+#ifdef CONFIG_NUMA_BALANCING
+# define INIT_NUMA_BALANCING(tsk) \
+ .numa_preferred_nid = -1, \
+ .numa_group = NULL, \
+ .numa_faults = NULL,
+#else
+# define INIT_NUMA_BALANCING(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -237,6 +246,7 @@ extern struct task_group root_task_group;
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_VTIME(tsk) \
+ INIT_NUMA_BALANCING(tsk) \
}
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3d770f5564b8..446d76a87ba1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -162,6 +162,7 @@ extern int _cond_resched(void);
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ void ___might_sleep(const char *file, int line, int preempt_offset);
void __might_sleep(const char *file, int line, int preempt_offset);
/**
* might_sleep - annotation for functions that can sleep
@@ -175,10 +176,14 @@ extern int _cond_resched(void);
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+# define sched_annotate_sleep() __set_current_state(TASK_RUNNING)
#else
+ static inline void ___might_sleep(const char *file, int line,
+ int preempt_offset) { }
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
+# define sched_annotate_sleep() do { } while (0)
#endif
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 706a9f744909..55f5ee7cc3d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0)
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+
+#define __set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ (tsk)->state = (state_value); \
+ } while (0)
+#define set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ set_mb((tsk)->state, (state_value)); \
+ } while (0)
+
+/*
+ * set_current_state() includes a barrier so that the write of current->state
+ * is correctly serialised wrt the caller's subsequent test of whether to
+ * actually sleep:
+ *
+ * set_current_state(TASK_UNINTERRUPTIBLE);
+ * if (do_i_need_to_sleep())
+ * schedule();
+ *
+ * If the caller does not need such serialisation then use __set_current_state()
+ */
+#define __set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ current->state = (state_value); \
+ } while (0)
+#define set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ set_mb(current->state, (state_value)); \
+ } while (0)
+
+#else
+
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
*
* If the caller does not need such serialisation then use __set_current_state()
*/
-#define __set_current_state(state_value) \
+#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
+#define set_current_state(state_value) \
set_mb(current->state, (state_value))
+#endif
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -1558,28 +1597,23 @@ struct task_struct {
struct numa_group *numa_group;
/*
- * Exponential decaying average of faults on a per-node basis.
- * Scheduling placement decisions are made based on the these counts.
- * The values remain static for the duration of a PTE scan
+ * numa_faults is an array split into four regions:
+ * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
+ * in this precise order.
+ *
+ * faults_memory: Exponential decaying average of faults on a per-node
+ * basis. Scheduling placement decisions are made based on these
+ * counts. The values remain static for the duration of a PTE scan.
+ * faults_cpu: Track the nodes the process was running on when a NUMA
+ * hinting fault was incurred.
+ * faults_memory_buffer and faults_cpu_buffer: Record faults per node
+ * during the current scan window. When the scan completes, the counts
+ * in faults_memory and faults_cpu decay and these values are copied.
*/
- unsigned long *numa_faults_memory;
+ unsigned long *numa_faults;
unsigned long total_numa_faults;
/*
- * numa_faults_buffer records faults per node during the current
- * scan window. When the scan completes, the counts in
- * numa_faults_memory decay and these values are copied.
- */
- unsigned long *numa_faults_buffer_memory;
-
- /*
- * Track the nodes the process was running on when a NUMA hinting
- * fault was incurred.
- */
- unsigned long *numa_faults_cpu;
- unsigned long *numa_faults_buffer_cpu;
-
- /*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local. The task scan period is adapted
* based on the locality of the faults with different weights
@@ -1661,6 +1695,9 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ unsigned long task_state_change;
+#endif
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2052,6 +2089,10 @@ static inline void tsk_restore_flags(struct task_struct *task,
task->flags |= orig_flags & flags;
}
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p,
+ const struct cpumask *cs_cpus_allowed);
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
@@ -2760,7 +2801,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
extern int _cond_resched(void);
#define cond_resched() ({ \
- __might_sleep(__FILE__, __LINE__, 0); \
+ ___might_sleep(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@@ -2773,14 +2814,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
#endif
#define cond_resched_lock(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
})
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
- __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})
diff --git a/include/linux/wait.h b/include/linux/wait.h
index e4a8eb9312ea..2232ed16635a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+/* __wait_queue::flags */
+#define WQ_FLAG_EXCLUSIVE 0x01
+#define WQ_FLAG_WOKEN 0x02
+
struct __wait_queue {
unsigned int flags;
-#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
@@ -258,11 +261,37 @@ __out: __ret; \
*/
#define wait_event(wq, condition) \
do { \
+ might_sleep(); \
if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
+#define __wait_event_freezable(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ schedule(); try_to_freeze())
+
+/**
+ * wait_event - sleep (or freeze) until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
+ * to system load) until the @condition evaluates to true. The
+ * @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_freezable(wq, condition) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_freezable(wq, condition); \
+ __ret; \
+})
+
#define __wait_event_timeout(wq, condition, timeout) \
___wait_event(wq, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
@@ -290,11 +319,30 @@ do { \
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
+ might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_timeout(wq, condition, timeout); \
__ret; \
})
+#define __wait_event_freezable_timeout(wq, condition, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_INTERRUPTIBLE, 0, timeout, \
+ __ret = schedule_timeout(__ret); try_to_freeze())
+
+/*
+ * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
+ * increasing load and is freezable.
+ */
+#define wait_event_freezable_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ might_sleep(); \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_freezable_timeout(wq, condition, timeout); \
+ __ret; \
+})
+
#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)
@@ -315,6 +363,7 @@ do { \
*/
#define wait_event_cmd(wq, condition, cmd1, cmd2) \
do { \
+ might_sleep(); \
if (condition) \
break; \
__wait_event_cmd(wq, condition, cmd1, cmd2); \
@@ -342,6 +391,7 @@ do { \
#define wait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible(wq, condition); \
__ret; \
@@ -375,6 +425,7 @@ do { \
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
+ might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_interruptible_timeout(wq, \
condition, timeout); \
@@ -425,6 +476,7 @@ do { \
#define wait_event_hrtimeout(wq, condition, timeout) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
TASK_UNINTERRUPTIBLE); \
@@ -450,6 +502,7 @@ do { \
#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
({ \
long __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
TASK_INTERRUPTIBLE); \
@@ -463,12 +516,27 @@ do { \
#define wait_event_interruptible_exclusive(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible_exclusive(wq, condition);\
__ret; \
})
+#define __wait_event_freezable_exclusive(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
+ schedule(); try_to_freeze())
+
+#define wait_event_freezable_exclusive(wq, condition) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_freezable_exclusive(wq, condition);\
+ __ret; \
+})
+
+
#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
({ \
int __ret = 0; \
@@ -637,6 +705,7 @@ do { \
#define wait_event_killable(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_killable(wq, condition); \
__ret; \
@@ -830,6 +899,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
+long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
+int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
@@ -886,6 +957,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
static inline int
wait_on_bit(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
@@ -910,6 +982,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_io(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
@@ -936,6 +1009,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit, action, mode);
@@ -963,6 +1037,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
static inline int
wait_on_bit_lock(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
@@ -986,6 +1061,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_lock_io(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
@@ -1011,6 +1087,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, action, mode);
@@ -1029,6 +1106,7 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned
static inline
int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
{
+ might_sleep();
if (atomic_read(val) == 0)
return 0;
return out_of_line_wait_on_atomic_t(val, action, mode);
diff --git a/include/net/sock.h b/include/net/sock.h
index 7db3db112baa..e6f235ebf6c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -897,6 +897,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
if (!__rc) { \
*(__timeo) = schedule_timeout(*(__timeo)); \
} \
+ sched_annotate_sleep(); \
lock_sock(__sk); \
__rc = __condition; \
__rc; \
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0a68d5ae584e..30fedaf3e56a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -97,16 +97,19 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
long state = p->state;
#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_SCHED_DEBUG
+ BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
/*
* For all intents and purposes a preempted task is a running task.
*/
- if (task_preempt_count(p) & PREEMPT_ACTIVE)
+ if (preempt_count() & PREEMPT_ACTIVE)
state = TASK_RUNNING | TASK_STATE_MAX;
-#endif
+#endif /* CONFIG_PREEMPT */
return state;
}
-#endif
+#endif /* CREATE_TRACE_POINTS */
/*
* Tracepoint for task switches, performed by the scheduler:
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index b932be9f5c5b..cc89ddefa926 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -23,8 +23,8 @@
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
and is now available for re-use. */
-#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
-#define CLONE_NEWIPC 0x08000000 /* New ipcs */
+#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
+#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */