From b47e8608a08766ef8121cd747d3aaf6c3dc22649 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: increase SCHED_LOAD_SCALE_FUZZ increase SCHED_LOAD_SCALE_FUZZ that adds a small amount of over-balancing: to help distribute CPU-bound tasks more fairly on SMP systems. the problem of unfair balancing was noticed and reported by Tong N Li. 10 CPU-bound tasks running on 8 CPUs, v2.6.23-rc1: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2572 mingo 20 0 1576 244 196 R 100 0.0 1:03.61 loop 2578 mingo 20 0 1576 248 196 R 100 0.0 1:03.59 loop 2576 mingo 20 0 1576 248 196 R 100 0.0 1:03.52 loop 2571 mingo 20 0 1576 244 196 R 100 0.0 1:03.46 loop 2569 mingo 20 0 1576 244 196 R 99 0.0 1:03.36 loop 2570 mingo 20 0 1576 244 196 R 95 0.0 1:00.55 loop 2577 mingo 20 0 1576 248 196 R 50 0.0 0:31.88 loop 2574 mingo 20 0 1576 248 196 R 50 0.0 0:31.87 loop 2573 mingo 20 0 1576 248 196 R 50 0.0 0:31.86 loop 2575 mingo 20 0 1576 248 196 R 50 0.0 0:31.86 loop v2.6.23-rc1 + patch: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2681 mingo 20 0 1576 244 196 R 85 0.0 3:51.68 loop 2688 mingo 20 0 1576 244 196 R 81 0.0 3:46.35 loop 2682 mingo 20 0 1576 244 196 R 80 0.0 3:43.68 loop 2685 mingo 20 0 1576 248 196 R 80 0.0 3:45.97 loop 2683 mingo 20 0 1576 248 196 R 80 0.0 3:40.25 loop 2679 mingo 20 0 1576 244 196 R 80 0.0 3:33.53 loop 2680 mingo 20 0 1576 244 196 R 79 0.0 3:43.53 loop 2686 mingo 20 0 1576 244 196 R 79 0.0 3:39.31 loop 2687 mingo 20 0 1576 244 196 R 78 0.0 3:33.31 loop 2684 mingo 20 0 1576 244 196 R 77 0.0 3:27.52 loop so they now nicely converge to the expected 80% long-term CPU usage. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33b9b4841ee7..7c61b50823fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -681,7 +681,7 @@ enum cpu_idle_type { #define SCHED_LOAD_SHIFT 10 #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) -#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) +#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 1) #ifdef CONFIG_SMP #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ -- cgit v1.2.3 From e107be36efb2a233833e8c9899039a370e4b2318 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: arch preempt notifier mechanism This adds a general mechanism whereby a task can request the scheduler to notify it whenever it is preempted or scheduled back in. This allows the task to swap any special-purpose registers like the fpu or Intel's VT registers. Signed-off-by: Avi Kivity [ mingo@elte.hu: fixes, cleanups ] Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 44 +++++++++++++++++++++++++++++ include/linux/sched.h | 5 ++++ kernel/Kconfig.preempt | 3 ++ kernel/sched.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 123 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index d0926d63406c..484988ed301e 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -8,6 +8,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_PREEMPT extern void fastcall add_preempt_count(int val); @@ -60,4 +61,47 @@ do { \ #endif +#ifdef CONFIG_PREEMPT_NOTIFIERS + +struct preempt_notifier; + +/** + * preempt_ops - notifiers called when a task is preempted and rescheduled + * @sched_in: we're about to be rescheduled: + * notifier: struct preempt_notifier for the task being scheduled + * cpu: cpu we're scheduled on + * @sched_out: we've just been preempted + * notifier: struct preempt_notifier for the task being preempted + * next: the task that's kicking us out + */ +struct preempt_ops { + void (*sched_in)(struct preempt_notifier *notifier, int cpu); + void (*sched_out)(struct preempt_notifier *notifier, + struct task_struct *next); +}; + +/** + * preempt_notifier - key for installing preemption notifiers + * @link: internal use + * @ops: defines the notifier functions to be called + * + * Usually used in conjunction with container_of(). + */ +struct preempt_notifier { + struct hlist_node link; + struct preempt_ops *ops; +}; + +void preempt_notifier_register(struct preempt_notifier *notifier); +void preempt_notifier_unregister(struct preempt_notifier *notifier); + +static inline void preempt_notifier_init(struct preempt_notifier *notifier, + struct preempt_ops *ops) +{ + INIT_HLIST_NODE(¬ifier->link); + notifier->ops = ops; +} + +#endif + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7c61b50823fa..7a4de8768748 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -935,6 +935,11 @@ struct task_struct { struct sched_class *sched_class; struct sched_entity se; +#ifdef CONFIG_PREEMPT_NOTIFIERS + /* list of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; +#endif + unsigned short ioprio; #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c64ce9c14207..6b066632e40c 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -63,3 +63,6 @@ config PREEMPT_BKL Say Y here if you are building a kernel for a desktop system. Say N if you are unsure. +config PREEMPT_NOTIFIERS + bool + diff --git a/kernel/sched.c b/kernel/sched.c index 93cf241cfbe9..e901aa59f206 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1592,6 +1592,10 @@ static void __sched_fork(struct task_struct *p) INIT_LIST_HEAD(&p->run_list); p->se.on_rq = 0; +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif + /* * We mark the process as running here, but have not actually * inserted it onto the runqueue yet. This guarantees that @@ -1673,6 +1677,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) task_rq_unlock(rq, &flags); } +#ifdef CONFIG_PREEMPT_NOTIFIERS + +/** + * preempt_notifier_register - tell me when current is being being preempted + * and rescheduled + */ +void preempt_notifier_register(struct preempt_notifier *notifier) +{ + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); +} +EXPORT_SYMBOL_GPL(preempt_notifier_register); + +/** + * preempt_notifier_unregister - no longer interested in preemption notifications + * + * This is safe to call from within a preemption notifier. + */ +void preempt_notifier_unregister(struct preempt_notifier *notifier) +{ + hlist_del(¬ifier->link); +} +EXPORT_SYMBOL_GPL(preempt_notifier_unregister); + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_in(notifier, raw_smp_processor_id()); +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_out(notifier, next); +} + +#else + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ +} + +#endif + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -1685,8 +1746,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * prepare_task_switch sets up locking and calls architecture specific * hooks. */ -static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) +static inline void +prepare_task_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) { + fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -1728,6 +1792,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { @@ -1768,7 +1833,7 @@ context_switch(struct rq *rq, struct task_struct *prev, { struct mm_struct *mm, *oldmm; - prepare_task_switch(rq, next); + prepare_task_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* @@ -6335,6 +6400,10 @@ void __init sched_init(void) set_load_weight(&init_task); +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&init_task.preempt_notifiers); +#endif + #ifdef CONFIG_SMP nr_cpu_ids = highest_cpu + 1; open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); -- cgit v1.2.3 From d02c7a8cf208eb80a3ccbff40a6bebe8902af35a Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: add above_background_load() function Add an above_background_load() function which can be used by other subsystems to detect if there is anything besides niced tasks running. Place it in sched.h to allow it to be compiled out if not used. Unused for now, but it is a useful hint to the IO scheduler and to swap-prefetch. Signed-off-by: Con Kolivas Cc: Peter Williams Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a4de8768748..2e490271acf6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -786,6 +786,22 @@ extern int partition_sched_domains(cpumask_t *partition1, #endif /* CONFIG_SMP */ +/* + * A runqueue laden with a single nice 0 task scores a weighted_cpuload of + * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a + * task of nice 0 or enough lower priority tasks to bring up the + * weighted_cpuload + */ +static inline int above_background_load(void) +{ + unsigned long cpu; + + for_each_online_cpu(cpu) { + if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) + return 1; + } + return 0; +} struct io_context; /* See blkdev.h */ struct cpuset; -- cgit v1.2.3