diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cpuset.c | 82 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/lockdep.c | 203 | ||||
-rw-r--r-- | kernel/module.c | 25 | ||||
-rw-r--r-- | kernel/nsproxy.c | 4 | ||||
-rw-r--r-- | kernel/power/Kconfig | 9 | ||||
-rw-r--r-- | kernel/power/process.c | 21 | ||||
-rw-r--r-- | kernel/relay.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 3 | ||||
-rw-r--r-- | kernel/timer.c | 14 | ||||
-rw-r--r-- | kernel/unwind.c | 1305 | ||||
-rw-r--r-- | kernel/workqueue.c | 16 |
15 files changed, 255 insertions, 1442 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 5e3f3b75563a..14f4d45e0ae9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o -obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2c3b4431472b..232aed2b10f9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) } /** - * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? + * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node? * @z: is this zone on an allowed node? - * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) + * @gfp_mask: memory allocation flags * - * If we're in interrupt, yes, we can always allocate. If zone + * If we're in interrupt, yes, we can always allocate. If + * __GFP_THISNODE is set, yes, we can always allocate. If zone * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest * mem_exclusive cpuset ancestor to this tasks cpuset, yes. * Otherwise, no. * + * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() + * reduces to cpuset_zone_allowed_hardwall(). Otherwise, + * cpuset_zone_allowed_softwall() might sleep, and might allow a zone + * from an enclosing cpuset. + * + * cpuset_zone_allowed_hardwall() only handles the simpler case of + * hardwall cpusets, and never sleeps. + * + * The __GFP_THISNODE placement logic is really handled elsewhere, + * by forcibly using a zonelist starting at a specified node, and by + * (in get_page_from_freelist()) refusing to consider the zones for + * any node on the zonelist except the first. By the time any such + * calls get to this routine, we should just shut up and say 'yes'. + * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, * and do not allow allocations outside the current tasks cpuset. * GFP_KERNEL allocations are not so marked, so can escape to the - * nearest mem_exclusive ancestor cpuset. + * nearest enclosing mem_exclusive ancestor cpuset. * - * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() - * routine only calls here with __GFP_HARDWALL bit _not_ set if - * it's a GFP_KERNEL allocation, and all nodes in the current tasks - * mems_allowed came up empty on the first pass over the zonelist. - * So only GFP_KERNEL allocations, if all nodes in the cpuset are - * short of memory, might require taking the callback_mutex mutex. + * Scanning up parent cpusets requires callback_mutex. The + * __alloc_pages() routine only calls here with __GFP_HARDWALL bit + * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the + * current tasks mems_allowed came up empty on the first pass over + * the zonelist. So only GFP_KERNEL allocations, if all nodes in the + * cpuset are short of memory, might require taking the callback_mutex + * mutex. * * The first call here from mm/page_alloc:get_page_from_freelist() - * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so - * no allocation on a node outside the cpuset is allowed (unless in - * interrupt, of course). + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, + * so no allocation on a node outside the cpuset is allowed (unless + * in interrupt, of course). * * The second pass through get_page_from_freelist() doesn't even call * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() @@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. * * Rule: - * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables * the code that might scan up ancestor cpusets and sleep. - **/ + */ -int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ @@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) return allowed; } +/* + * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node? + * @z: is this zone on an allowed node? + * @gfp_mask: memory allocation flags + * + * If we're in interrupt, yes, we can always allocate. + * If __GFP_THISNODE is set, yes, we can always allocate. If zone + * z's node is in our tasks mems_allowed, yes. Otherwise, no. + * + * The __GFP_THISNODE placement logic is really handled elsewhere, + * by forcibly using a zonelist starting at a specified node, and by + * (in get_page_from_freelist()) refusing to consider the zones for + * any node on the zonelist except the first. By the time any such + * calls get to this routine, we should just shut up and say 'yes'. + * + * Unlike the cpuset_zone_allowed_softwall() variant, above, + * this variant requires that the zone be in the current tasks + * mems_allowed or that we're in interrupt. It does not scan up the + * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. + * It never sleeps. + */ + +int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) +{ + int node; /* node that zone z is on */ + + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) + return 1; + node = zone_to_nid(z); + if (node_isset(node, current->mems_allowed)) + return 1; + return 0; +} + /** * cpuset_lock - lock out any changes to cpuset structures * diff --git a/kernel/fork.c b/kernel/fork.c index d16c566eb645..fc723e595cd5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -203,7 +203,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct mempolicy *pol; down_write(&oldmm->mmap_sem); - flush_cache_mm(oldmm); + flush_cache_dup_mm(oldmm); /* * Not linked in yet - no deadlock potential: */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b02032476dc2..01e750559034 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -43,13 +43,49 @@ #include "lockdep_internals.h" /* - * hash_lock: protects the lockdep hashes and class/list/hash allocators. + * lockdep_lock: protects the lockdep graph, the hashes and the + * class/list/hash allocators. * * This is one of the rare exceptions where it's justified * to use a raw spinlock - we really dont want the spinlock - * code to recurse back into the lockdep code. + * code to recurse back into the lockdep code... */ -static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +static int graph_lock(void) +{ + __raw_spin_lock(&lockdep_lock); + /* + * Make sure that if another CPU detected a bug while + * walking the graph we dont change it (while the other + * CPU is busy printing out stuff with the graph lock + * dropped already) + */ + if (!debug_locks) { + __raw_spin_unlock(&lockdep_lock); + return 0; + } + return 1; +} + +static inline int graph_unlock(void) +{ + __raw_spin_unlock(&lockdep_lock); + return 0; +} + +/* + * Turn lock debugging off and return with 0 if it was off already, + * and also release the graph lock: + */ +static inline int debug_locks_off_graph_unlock(void) +{ + int ret = debug_locks_off(); + + __raw_spin_unlock(&lockdep_lock); + + return ret; +} static int lockdep_initialized; @@ -57,14 +93,15 @@ unsigned long nr_list_entries; static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; /* - * Allocate a lockdep entry. (assumes hash_lock held, returns + * Allocate a lockdep entry. (assumes the graph_lock held, returns * with NULL on failure) */ static struct lock_list *alloc_list_entry(void) { if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { - __raw_spin_unlock(&hash_lock); - debug_locks_off(); + if (!debug_locks_off_graph_unlock()) + return NULL; + printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); printk("turning off the locking correctness validator.\n"); return NULL; @@ -145,9 +182,7 @@ EXPORT_SYMBOL(lockdep_on); */ #define VERBOSE 0 -#ifdef VERBOSE -# define VERY_VERBOSE 0 -#endif +#define VERY_VERBOSE 0 #if VERBOSE # define HARDIRQ_VERBOSE 1 @@ -172,8 +207,8 @@ static int class_filter(struct lock_class *class) !strcmp(class->name, "&struct->lockfield")) return 1; #endif - /* Allow everything else. 0 would be filter everything else */ - return 1; + /* Filter everything else. 1 would be to allow everything else */ + return 0; } #endif @@ -207,7 +242,7 @@ static int softirq_verbose(struct lock_class *class) /* * Stack-trace: tightly packed array of stack backtrace - * addresses. Protected by the hash_lock. + * addresses. Protected by the graph_lock. */ unsigned long nr_stack_trace_entries; static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; @@ -226,18 +261,15 @@ static int save_trace(struct stack_trace *trace) trace->max_entries = trace->nr_entries; nr_stack_trace_entries += trace->nr_entries; - if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) { - __raw_spin_unlock(&hash_lock); - return 0; - } if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { - __raw_spin_unlock(&hash_lock); - if (debug_locks_off()) { - printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); - } + if (!debug_locks_off_graph_unlock()) + return 0; + + printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + return 0; } @@ -526,9 +558,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth) { struct task_struct *curr = current; - __raw_spin_unlock(&hash_lock); - debug_locks_off(); - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; printk("\n=======================================================\n"); @@ -556,12 +586,10 @@ static noinline int print_circular_bug_tail(void) if (debug_locks_silent) return 0; - /* hash_lock unlocked by the header */ - __raw_spin_lock(&hash_lock); this.class = check_source->class; if (!save_trace(&this.trace)) return 0; - __raw_spin_unlock(&hash_lock); + print_circular_bug_entry(&this, 0); printk("\nother info that might help us debug this:\n\n"); @@ -577,8 +605,10 @@ static noinline int print_circular_bug_tail(void) static int noinline print_infinite_recursion_bug(void) { - __raw_spin_unlock(&hash_lock); - DEBUG_LOCKS_WARN_ON(1); + if (!debug_locks_off_graph_unlock()) + return 0; + + WARN_ON(1); return 0; } @@ -713,9 +743,7 @@ print_bad_irq_dependency(struct task_struct *curr, enum lock_usage_bit bit2, const char *irqclass) { - __raw_spin_unlock(&hash_lock); - debug_locks_off(); - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; printk("\n======================================================\n"); @@ -796,9 +824,7 @@ static int print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, struct held_lock *next) { - debug_locks_off(); - __raw_spin_unlock(&hash_lock); - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; printk("\n=============================================\n"); @@ -974,14 +1000,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Debugging printouts: */ if (verbose(prev->class) || verbose(next->class)) { - __raw_spin_unlock(&hash_lock); + graph_unlock(); printk("\n new dependency: "); print_lock_name(prev->class); printk(" => "); print_lock_name(next->class); printk("\n"); dump_stack(); - __raw_spin_lock(&hash_lock); + return graph_lock(); } return 1; } @@ -1046,8 +1072,10 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) } return 1; out_bug: - __raw_spin_unlock(&hash_lock); - DEBUG_LOCKS_WARN_ON(1); + if (!debug_locks_off_graph_unlock()) + return 0; + + WARN_ON(1); return 0; } @@ -1201,7 +1229,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) hash_head = classhashentry(key); raw_local_irq_save(flags); - __raw_spin_lock(&hash_lock); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } /* * We have to do the hash-walk again, to avoid races * with another CPU: @@ -1214,9 +1245,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * the hash: */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { - __raw_spin_unlock(&hash_lock); + if (!debug_locks_off_graph_unlock()) { + raw_local_irq_restore(flags); + return NULL; + } raw_local_irq_restore(flags); - debug_locks_off(); + printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); printk("turning off the locking correctness validator.\n"); return NULL; @@ -1237,18 +1271,23 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) list_add_tail_rcu(&class->hash_entry, hash_head); if (verbose(class)) { - __raw_spin_unlock(&hash_lock); + graph_unlock(); raw_local_irq_restore(flags); + printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) printk("#%d", class->name_version); printk("\n"); dump_stack(); + raw_local_irq_save(flags); - __raw_spin_lock(&hash_lock); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } } out_unlock_set: - __raw_spin_unlock(&hash_lock); + graph_unlock(); raw_local_irq_restore(flags); if (!subclass || force) @@ -1264,7 +1303,7 @@ out_unlock_set: * add it and return 0 - in this case the new dependency chain is * validated. If the key is already hashed, return 1. */ -static inline int lookup_chain_cache(u64 chain_key) +static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class) { struct list_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; @@ -1278,34 +1317,32 @@ static inline int lookup_chain_cache(u64 chain_key) if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(&chain_lookup_hits); - /* - * In the debugging case, force redundant checking - * by returning 1: - */ -#ifdef CONFIG_DEBUG_LOCKDEP - __raw_spin_lock(&hash_lock); - return 1; -#endif + if (very_verbose(class)) + printk("\nhash chain already cached, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name); return 0; } } + if (very_verbose(class)) + printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name); /* * Allocate a new chain entry from the static array, and add * it to the hash: */ - __raw_spin_lock(&hash_lock); + if (!graph_lock()) + return 0; /* * We have to walk the chain again locked - to avoid duplicates: */ list_for_each_entry(chain, hash_head, entry) { if (chain->chain_key == chain_key) { - __raw_spin_unlock(&hash_lock); + graph_unlock(); goto cache_hit; } } if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { - __raw_spin_unlock(&hash_lock); - debug_locks_off(); + if (!debug_locks_off_graph_unlock()) + return 0; + printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); printk("turning off the locking correctness validator.\n"); return 0; @@ -1381,9 +1418,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, struct held_lock *this, int forwards, const char *irqclass) { - __raw_spin_unlock(&hash_lock); - debug_locks_off(); - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; printk("\n=========================================================\n"); @@ -1453,7 +1488,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); } -static inline void print_irqtrace_events(struct task_struct *curr) +void print_irqtrace_events(struct task_struct *curr) { printk("irq event stamp: %u\n", curr->irq_events); printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); @@ -1466,19 +1501,13 @@ static inline void print_irqtrace_events(struct task_struct *curr) print_ip_sym(curr->softirq_disable_ip); } -#else -static inline void print_irqtrace_events(struct task_struct *curr) -{ -} #endif static int print_usage_bug(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) { - __raw_spin_unlock(&hash_lock); - debug_locks_off(); - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; printk("\n=================================\n"); @@ -1539,12 +1568,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, if (likely(this->class->usage_mask & new_mask)) return 1; - __raw_spin_lock(&hash_lock); + if (!graph_lock()) + return 0; /* * Make sure we didnt race: */ if (unlikely(this->class->usage_mask & new_mask)) { - __raw_spin_unlock(&hash_lock); + graph_unlock(); return 1; } @@ -1730,16 +1760,16 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, debug_atomic_dec(&nr_unused_locks); break; default: - __raw_spin_unlock(&hash_lock); - debug_locks_off(); + if (!debug_locks_off_graph_unlock()) + return 0; WARN_ON(1); return 0; } - __raw_spin_unlock(&hash_lock); + graph_unlock(); /* - * We must printk outside of the hash_lock: + * We must printk outside of the graph_lock: */ if (ret == 2) { printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); @@ -2137,9 +2167,9 @@ out_calc_hash: * We look up the chain_key and do the O(N^2) check and update of * the dependencies only if this is a new dependency chain. * (If lookup_chain_cache() returns with 1 it acquires - * hash_lock for us) + * graph_lock for us) */ - if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) { + if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { /* * Check whether last held lock: * @@ -2170,7 +2200,7 @@ out_calc_hash: if (!chain_head && ret != 2) if (!check_prevs_add(curr, hlock)) return 0; - __raw_spin_unlock(&hash_lock); + graph_unlock(); } curr->lockdep_depth++; check_chain_key(curr); @@ -2433,6 +2463,7 @@ EXPORT_SYMBOL_GPL(lock_release); void lockdep_reset(void) { unsigned long flags; + int i; raw_local_irq_save(flags); current->curr_chain_key = 0; @@ -2443,6 +2474,8 @@ void lockdep_reset(void) nr_softirq_chains = 0; nr_process_chains = 0; debug_locks = 1; + for (i = 0; i < CHAINHASH_SIZE; i++) + INIT_LIST_HEAD(chainhash_table + i); raw_local_irq_restore(flags); } @@ -2479,7 +2512,7 @@ void lockdep_free_key_range(void *start, unsigned long size) int i; raw_local_irq_save(flags); - __raw_spin_lock(&hash_lock); + graph_lock(); /* * Unhash all classes that were created by this module: @@ -2493,7 +2526,7 @@ void lockdep_free_key_range(void *start, unsigned long size) zap_class(class); } - __raw_spin_unlock(&hash_lock); + graph_unlock(); raw_local_irq_restore(flags); } @@ -2521,20 +2554,20 @@ void lockdep_reset_lock(struct lockdep_map *lock) * Debug check: in the end all mapped classes should * be gone. */ - __raw_spin_lock(&hash_lock); + graph_lock(); for (i = 0; i < CLASSHASH_SIZE; i++) { head = classhash_table + i; if (list_empty(head)) continue; list_for_each_entry_safe(class, next, head, hash_entry) { if (unlikely(class == lock->class_cache)) { - __raw_spin_unlock(&hash_lock); - DEBUG_LOCKS_WARN_ON(1); + if (debug_locks_off_graph_unlock()) + WARN_ON(1); goto out_restore; } } } - __raw_spin_unlock(&hash_lock); + graph_unlock(); out_restore: raw_local_irq_restore(flags); diff --git a/kernel/module.c b/kernel/module.c index d9eae45d0145..b565eaeff7e6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -824,9 +824,34 @@ static inline void module_unload_init(struct module *mod) } #endif /* CONFIG_MODULE_UNLOAD */ +static ssize_t show_initstate(struct module_attribute *mattr, + struct module *mod, char *buffer) +{ + const char *state = "unknown"; + + switch (mod->state) { + case MODULE_STATE_LIVE: + state = "live"; + break; + case MODULE_STATE_COMING: + state = "coming"; + break; + case MODULE_STATE_GOING: + state = "going"; + break; + } + return sprintf(buffer, "%s\n", state); +} + +static struct module_attribute initstate = { + .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, + .show = show_initstate, +}; + static struct module_attribute *modinfo_attrs[] = { &modinfo_version, &modinfo_srcversion, + &initstate, #ifdef CONFIG_MODULE_UNLOAD &refcnt, #endif diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e2ce748e96af..f5b9ee6f6bbb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -46,10 +46,8 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig) struct nsproxy *ns; ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); - if (ns) { + if (ns) atomic_set(&ns->count, 1); - ns->id = -1; - } return ns; } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 710ed084e7c5..ed296225dcd4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -20,13 +20,14 @@ config PM sending the processor to sleep and saving power. config PM_LEGACY - bool "Legacy Power Management API" + bool "Legacy Power Management API (DEPRECATED)" depends on PM - default y + default n ---help--- - Support for pm_register() and friends. + Support for pm_register() and friends. This old API is obsoleted + by the driver model. - If unsure, say Y. + If unsure, say N. config PM_DEBUG bool "Power Management Debug Support" diff --git a/kernel/power/process.c b/kernel/power/process.c index 99eeb119b06d..6d566bf7085c 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -28,8 +28,7 @@ static inline int freezeable(struct task_struct * p) if ((p == current) || (p->flags & PF_NOFREEZE) || (p->exit_state == EXIT_ZOMBIE) || - (p->exit_state == EXIT_DEAD) || - (p->state == TASK_STOPPED)) + (p->exit_state == EXIT_DEAD)) return 0; return 1; } @@ -61,10 +60,16 @@ static inline void freeze_process(struct task_struct *p) unsigned long flags; if (!freezing(p)) { - freeze(p); - spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 0); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + rmb(); + if (!frozen(p)) { + if (p->state == TASK_STOPPED) + force_sig_specific(SIGSTOP, p); + + freeze(p); + spin_lock_irqsave(&p->sighand->siglock, flags); + signal_wake_up(p, p->state == TASK_STOPPED); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + } } } @@ -103,9 +108,7 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) if (frozen(p)) continue; - if (p->state == TASK_TRACED && - (frozen(p->parent) || - p->parent->state == TASK_STOPPED)) { + if (p->state == TASK_TRACED && frozen(p->parent)) { cancel_freezing(p); continue; } diff --git a/kernel/relay.c b/kernel/relay.c index 818e514729cf..a4701e7ba7d0 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -138,7 +138,7 @@ depopulate: */ struct rchan_buf *relay_create_buf(struct rchan *chan) { - struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL); + struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); if (!buf) return NULL; @@ -479,7 +479,7 @@ struct rchan *relay_open(const char *base_filename, if (!(subbuf_size && n_subbufs)) return NULL; - chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL); + chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); if (!chan) return NULL; diff --git a/kernel/sched.c b/kernel/sched.c index 8a0afb97af71..5cd833bc2173 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3429,6 +3429,8 @@ asmlinkage void __sched schedule(void) "%s/0x%08x/%d\n", current->comm, preempt_count(), current->pid); debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); dump_stack(); } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -6977,6 +6979,8 @@ void __might_sleep(char *file, int line) printk("in_atomic():%d, irqs_disabled():%d\n", in_atomic(), irqs_disabled()); debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); dump_stack(); } #endif diff --git a/kernel/signal.c b/kernel/signal.c index 1921ffdc5e77..5630255d2e2a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1705,7 +1705,9 @@ finish_stop(int stop_count) read_unlock(&tasklist_lock); } - schedule(); + do { + schedule(); + } while (try_to_freeze()); /* * Now we don't run again until continued. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 130c5ec9ee0b..600b33358ded 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -65,7 +65,6 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int sysctl_panic_on_oom; extern int max_threads; -extern int sysrq_enabled; extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; @@ -543,7 +542,7 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_SYSRQ, .procname = "sysrq", - .data = &sysrq_enabled, + .data = &__sysrq_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/kernel/timer.c b/kernel/timer.c index 0256ab443d8a..feddf817baa5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1146,11 +1146,15 @@ static inline void calc_load(unsigned long ticks) unsigned long active_tasks; /* fixed-point */ static int count = LOAD_FREQ; - active_tasks = count_active_tasks(); - for (count -= ticks; count < 0; count += LOAD_FREQ) { - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); + count -= ticks; + if (unlikely(count < 0)) { + active_tasks = count_active_tasks(); + do { + CALC_LOAD(avenrun[0], EXP_1, active_tasks); + CALC_LOAD(avenrun[1], EXP_5, active_tasks); + CALC_LOAD(avenrun[2], EXP_15, active_tasks); + count += LOAD_FREQ; + } while (count < 0); } } diff --git a/kernel/unwind.c b/kernel/unwind.c deleted file mode 100644 index 09c261329249..000000000000 --- a/kernel/unwind.c +++ /dev/null @@ -1,1305 +0,0 @@ -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich <jbeulich@novell.com> - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -#include <linux/unwind.h> -#include <linux/module.h> -#include <linux/bootmem.h> -#include <linux/sort.h> -#include <linux/stop_machine.h> -#include <linux/uaccess.h> -#include <asm/sections.h> -#include <asm/uaccess.h> -#include <asm/unaligned.h> - -extern const char __start_unwind[], __end_unwind[]; -extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; - -#define MAX_STACK_DEPTH 8 - -#define EXTRA_INFO(f) { \ - BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ - % FIELD_SIZEOF(struct unwind_frame_info, f)) \ - + offsetof(struct unwind_frame_info, f) \ - / FIELD_SIZEOF(struct unwind_frame_info, f), \ - FIELD_SIZEOF(struct unwind_frame_info, f) \ - } -#define PTREGS_INFO(f) EXTRA_INFO(regs.f) - -static const struct { - unsigned offs:BITS_PER_LONG / 2; - unsigned width:BITS_PER_LONG / 2; -} reg_info[] = { - UNW_REGISTER_INFO -}; - -#undef PTREGS_INFO -#undef EXTRA_INFO - -#ifndef REG_INVALID -#define REG_INVALID(r) (reg_info[r].width == 0) -#endif - -#define DW_CFA_nop 0x00 -#define DW_CFA_set_loc 0x01 -#define DW_CFA_advance_loc1 0x02 -#define DW_CFA_advance_loc2 0x03 -#define DW_CFA_advance_loc4 0x04 -#define DW_CFA_offset_extended 0x05 -#define DW_CFA_restore_extended 0x06 -#define DW_CFA_undefined 0x07 -#define DW_CFA_same_value 0x08 -#define DW_CFA_register 0x09 -#define DW_CFA_remember_state 0x0a -#define DW_CFA_restore_state 0x0b -#define DW_CFA_def_cfa 0x0c -#define DW_CFA_def_cfa_register 0x0d -#define DW_CFA_def_cfa_offset 0x0e -#define DW_CFA_def_cfa_expression 0x0f -#define DW_CFA_expression 0x10 -#define DW_CFA_offset_extended_sf 0x11 -#define DW_CFA_def_cfa_sf 0x12 -#define DW_CFA_def_cfa_offset_sf 0x13 -#define DW_CFA_val_offset 0x14 -#define DW_CFA_val_offset_sf 0x15 -#define DW_CFA_val_expression 0x16 -#define DW_CFA_lo_user 0x1c -#define DW_CFA_GNU_window_save 0x2d -#define DW_CFA_GNU_args_size 0x2e -#define DW_CFA_GNU_negative_offset_extended 0x2f -#define DW_CFA_hi_user 0x3f - -#define DW_EH_PE_FORM 0x07 -#define DW_EH_PE_native 0x00 -#define DW_EH_PE_leb128 0x01 -#define DW_EH_PE_data2 0x02 -#define DW_EH_PE_data4 0x03 -#define DW_EH_PE_data8 0x04 -#define DW_EH_PE_signed 0x08 -#define DW_EH_PE_ADJUST 0x70 -#define DW_EH_PE_abs 0x00 -#define DW_EH_PE_pcrel 0x10 -#define DW_EH_PE_textrel 0x20 -#define DW_EH_PE_datarel 0x30 -#define DW_EH_PE_funcrel 0x40 -#define DW_EH_PE_aligned 0x50 -#define DW_EH_PE_indirect 0x80 -#define DW_EH_PE_omit 0xff - -typedef unsigned long uleb128_t; -typedef signed long sleb128_t; -#define sleb128abs __builtin_labs - -static struct unwind_table { - struct { - unsigned long pc; - unsigned long range; - } core, init; - const void *address; - unsigned long size; - const unsigned char *header; - unsigned long hdrsz; - struct unwind_table *link; - const char *name; -} root_table; - -struct unwind_item { - enum item_location { - Nowhere, - Memory, - Register, - Value - } where; - uleb128_t value; -}; - -struct unwind_state { - uleb128_t loc, org; - const u8 *cieStart, *cieEnd; - uleb128_t codeAlign; - sleb128_t dataAlign; - struct cfa { - uleb128_t reg, offs; - } cfa; - struct unwind_item regs[ARRAY_SIZE(reg_info)]; - unsigned stackDepth:8; - unsigned version:8; - const u8 *label; - const u8 *stack[MAX_STACK_DEPTH]; -}; - -static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; - -static unsigned unwind_debug; -static int __init unwind_debug_setup(char *s) -{ - unwind_debug = simple_strtoul(s, NULL, 0); - return 1; -} -__setup("unwind_debug=", unwind_debug_setup); -#define dprintk(lvl, fmt, args...) \ - ((void)(lvl > unwind_debug \ - || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) - -static struct unwind_table *find_table(unsigned long pc) -{ - struct unwind_table *table; - - for (table = &root_table; table; table = table->link) - if ((pc >= table->core.pc - && pc < table->core.pc + table->core.range) - || (pc >= table->init.pc - && pc < table->init.pc + table->init.range)) - break; - - return table; -} - -static unsigned long read_pointer(const u8 **pLoc, - const void *end, - signed ptrType, - unsigned long text_base, - unsigned long data_base); - -static void init_unwind_table(struct unwind_table *table, - const char *name, - const void *core_start, - unsigned long core_size, - const void *init_start, - unsigned long init_size, - const void *table_start, - unsigned long table_size, - const u8 *header_start, - unsigned long header_size) -{ - const u8 *ptr = header_start + 4; - const u8 *end = header_start + header_size; - - table->core.pc = (unsigned long)core_start; - table->core.range = core_size; - table->init.pc = (unsigned long)init_start; - table->init.range = init_size; - table->address = table_start; - table->size = table_size; - /* See if the linker provided table looks valid. */ - if (header_size <= 4 - || header_start[0] != 1 - || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) - != table_start - || !read_pointer(&ptr, end, header_start[2], 0, 0) - || !read_pointer(&ptr, end, header_start[3], 0, - (unsigned long)header_start) - || !read_pointer(&ptr, end, header_start[3], 0, - (unsigned long)header_start)) - header_start = NULL; - table->hdrsz = header_size; - smp_wmb(); - table->header = header_start; - table->link = NULL; - table->name = name; -} - -void __init unwind_init(void) -{ - init_unwind_table(&root_table, "kernel", - _text, _end - _text, - NULL, 0, - __start_unwind, __end_unwind - __start_unwind, - __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); -} - -static const u32 bad_cie, not_fde; -static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); -static signed fde_pointer_type(const u32 *cie); - -struct eh_frame_hdr_table_entry { - unsigned long start, fde; -}; - -static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) -{ - const struct eh_frame_hdr_table_entry *e1 = p1; - const struct eh_frame_hdr_table_entry *e2 = p2; - - return (e1->start > e2->start) - (e1->start < e2->start); -} - -static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) -{ - struct eh_frame_hdr_table_entry *e1 = p1; - struct eh_frame_hdr_table_entry *e2 = p2; - unsigned long v; - - v = e1->start; - e1->start = e2->start; - e2->start = v; - v = e1->fde; - e1->fde = e2->fde; - e2->fde = v; -} - -static void __init setup_unwind_table(struct unwind_table *table, - void *(*alloc)(unsigned long)) -{ - const u8 *ptr; - unsigned long tableSize = table->size, hdrSize; - unsigned n; - const u32 *fde; - struct { - u8 version; - u8 eh_frame_ptr_enc; - u8 fde_count_enc; - u8 table_enc; - unsigned long eh_frame_ptr; - unsigned int fde_count; - struct eh_frame_hdr_table_entry table[]; - } __attribute__((__packed__)) *header; - - if (table->header) - return; - - if (table->hdrsz) - printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", - table->name); - - if (tableSize & (sizeof(*fde) - 1)) - return; - - for (fde = table->address, n = 0; - tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; - tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { - const u32 *cie = cie_for_fde(fde, table); - signed ptrType; - - if (cie == ¬_fde) - continue; - if (cie == NULL - || cie == &bad_cie - || (ptrType = fde_pointer_type(cie)) < 0) - return; - ptr = (const u8 *)(fde + 2); - if (!read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType, 0, 0)) - return; - ++n; - } - - if (tableSize || !n) - return; - - hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) - + 2 * n * sizeof(unsigned long); - dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); - header = alloc(hdrSize); - if (!header) - return; - header->version = 1; - header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; - header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; - header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; - put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); - BUILD_BUG_ON(offsetof(typeof(*header), fde_count) - % __alignof(typeof(header->fde_count))); - header->fde_count = n; - - BUILD_BUG_ON(offsetof(typeof(*header), table) - % __alignof(typeof(*header->table))); - for (fde = table->address, tableSize = table->size, n = 0; - tableSize; - tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { - const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); - - if (!fde[1]) - continue; /* this is a CIE */ - ptr = (const u8 *)(fde + 2); - header->table[n].start = read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - fde_pointer_type(cie), 0, 0); - header->table[n].fde = (unsigned long)fde; - ++n; - } - WARN_ON(n != header->fde_count); - - sort(header->table, - n, - sizeof(*header->table), - cmp_eh_frame_hdr_table_entries, - swap_eh_frame_hdr_table_entries); - - table->hdrsz = hdrSize; - smp_wmb(); - table->header = (const void *)header; -} - -static void *__init balloc(unsigned long sz) -{ - return __alloc_bootmem_nopanic(sz, - sizeof(unsigned int), - __pa(MAX_DMA_ADDRESS)); -} - -void __init unwind_setup(void) -{ - setup_unwind_table(&root_table, balloc); -} - -#ifdef CONFIG_MODULES - -static struct unwind_table *last_table; - -/* Must be called with module_mutex held. */ -void *unwind_add_table(struct module *module, - const void *table_start, - unsigned long table_size) -{ - struct unwind_table *table; - - if (table_size <= 0) - return NULL; - - table = kmalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return NULL; - - init_unwind_table(table, module->name, - module->module_core, module->core_size, - module->module_init, module->init_size, - table_start, table_size, - NULL, 0); - - if (last_table) - last_table->link = table; - else - root_table.link = table; - last_table = table; - - return table; -} - -struct unlink_table_info -{ - struct unwind_table *table; - int init_only; -}; - -static int unlink_table(void *arg) -{ - struct unlink_table_info *info = arg; - struct unwind_table *table = info->table, *prev; - - for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) - ; - - if (prev->link) { - if (info->init_only) { - table->init.pc = 0; - table->init.range = 0; - info->table = NULL; - } else { - prev->link = table->link; - if (!prev->link) - last_table = prev; - } - } else - info->table = NULL; - - return 0; -} - -/* Must be called with module_mutex held. */ -void unwind_remove_table(void *handle, int init_only) -{ - struct unwind_table *table = handle; - struct unlink_table_info info; - - if (!table || table == &root_table) - return; - - if (init_only && table == last_table) { - table->init.pc = 0; - table->init.range = 0; - return; - } - - info.table = table; - info.init_only = init_only; - stop_machine_run(unlink_table, &info, NR_CPUS); - - if (info.table) - kfree(table); -} - -#endif /* CONFIG_MODULES */ - -static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) -{ - const u8 *cur = *pcur; - uleb128_t value; - unsigned shift; - - for (shift = 0, value = 0; cur < end; shift += 7) { - if (shift + 7 > 8 * sizeof(value) - && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { - cur = end + 1; - break; - } - value |= (uleb128_t)(*cur & 0x7f) << shift; - if (!(*cur++ & 0x80)) - break; - } - *pcur = cur; - - return value; -} - -static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) -{ - const u8 *cur = *pcur; - sleb128_t value; - unsigned shift; - - for (shift = 0, value = 0; cur < end; shift += 7) { - if (shift + 7 > 8 * sizeof(value) - && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { - cur = end + 1; - break; - } - value |= (sleb128_t)(*cur & 0x7f) << shift; - if (!(*cur & 0x80)) { - value |= -(*cur++ & 0x40) << shift; - break; - } - } - *pcur = cur; - - return value; -} - -static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) -{ - const u32 *cie; - - if (!*fde || (*fde & (sizeof(*fde) - 1))) - return &bad_cie; - if (!fde[1]) - return ¬_fde; /* this is a CIE */ - if ((fde[1] & (sizeof(*fde) - 1)) - || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) - return NULL; /* this is not a valid FDE */ - cie = fde + 1 - fde[1] / sizeof(*fde); - if (*cie <= sizeof(*cie) + 4 - || *cie >= fde[1] - sizeof(*fde) - || (*cie & (sizeof(*cie) - 1)) - || cie[1]) - return NULL; /* this is not a (valid) CIE */ - return cie; -} - -static unsigned long read_pointer(const u8 **pLoc, - const void *end, - signed ptrType, - unsigned long text_base, - unsigned long data_base) -{ - unsigned long value = 0; - union { - const u8 *p8; - const u16 *p16u; - const s16 *p16s; - const u32 *p32u; - const s32 *p32s; - const unsigned long *pul; - } ptr; - - if (ptrType < 0 || ptrType == DW_EH_PE_omit) { - dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); - return 0; - } - ptr.p8 = *pLoc; - switch(ptrType & DW_EH_PE_FORM) { - case DW_EH_PE_data2: - if (end < (const void *)(ptr.p16u + 1)) { - dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); - return 0; - } - if(ptrType & DW_EH_PE_signed) - value = get_unaligned(ptr.p16s++); - else - value = get_unaligned(ptr.p16u++); - break; - case DW_EH_PE_data4: -#ifdef CONFIG_64BIT - if (end < (const void *)(ptr.p32u + 1)) { - dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); - return 0; - } - if(ptrType & DW_EH_PE_signed) - value = get_unaligned(ptr.p32s++); - else - value = get_unaligned(ptr.p32u++); - break; - case DW_EH_PE_data8: - BUILD_BUG_ON(sizeof(u64) != sizeof(value)); -#else - BUILD_BUG_ON(sizeof(u32) != sizeof(value)); -#endif - case DW_EH_PE_native: - if (end < (const void *)(ptr.pul + 1)) { - dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); - return 0; - } - value = get_unaligned(ptr.pul++); - break; - case DW_EH_PE_leb128: - BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); - value = ptrType & DW_EH_PE_signed - ? get_sleb128(&ptr.p8, end) - : get_uleb128(&ptr.p8, end); - if ((const void *)ptr.p8 > end) { - dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); - return 0; - } - break; - default: - dprintk(2, "Cannot decode pointer type %02X (%p,%p).", - ptrType, ptr.p8, end); - return 0; - } - switch(ptrType & DW_EH_PE_ADJUST) { - case DW_EH_PE_abs: - break; - case DW_EH_PE_pcrel: - value += (unsigned long)*pLoc; - break; - case DW_EH_PE_textrel: - if (likely(text_base)) { - value += text_base; - break; - } - dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", - ptrType, *pLoc, end); - return 0; - case DW_EH_PE_datarel: - if (likely(data_base)) { - value += data_base; - break; - } - dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", - ptrType, *pLoc, end); - return 0; - default: - dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", - ptrType, *pLoc, end); - return 0; - } - if ((ptrType & DW_EH_PE_indirect) - && probe_kernel_address((unsigned long *)value, value)) { - dprintk(1, "Cannot read indirect value %lx (%p,%p).", - value, *pLoc, end); - return 0; - } - *pLoc = ptr.p8; - - return value; -} - -static signed fde_pointer_type(const u32 *cie) -{ - const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; - - if (version != 1) - return -1; /* unsupported */ - if (*++ptr) { - const char *aug; - const u8 *end = (const u8 *)(cie + 1) + *cie; - uleb128_t len; - - /* check if augmentation size is first (and thus present) */ - if (*ptr != 'z') - return -1; - /* check if augmentation string is nul-terminated */ - if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) - return -1; - ++ptr; /* skip terminator */ - get_uleb128(&ptr, end); /* skip code alignment */ - get_sleb128(&ptr, end); /* skip data alignment */ - /* skip return address column */ - version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); - len = get_uleb128(&ptr, end); /* augmentation length */ - if (ptr + len < ptr || ptr + len > end) - return -1; - end = ptr + len; - while (*++aug) { - if (ptr >= end) - return -1; - switch(*aug) { - case 'L': - ++ptr; - break; - case 'P': { - signed ptrType = *ptr++; - - if (!read_pointer(&ptr, end, ptrType, 0, 0) - || ptr > end) - return -1; - } - break; - case 'R': - return *ptr; - default: - return -1; - } - } - } - return DW_EH_PE_native|DW_EH_PE_abs; -} - -static int advance_loc(unsigned long delta, struct unwind_state *state) -{ - state->loc += delta * state->codeAlign; - - return delta > 0; -} - -static void set_rule(uleb128_t reg, - enum item_location where, - uleb128_t value, - struct unwind_state *state) -{ - if (reg < ARRAY_SIZE(state->regs)) { - state->regs[reg].where = where; - state->regs[reg].value = value; - } -} - -static int processCFI(const u8 *start, - const u8 *end, - unsigned long targetLoc, - signed ptrType, - struct unwind_state *state) -{ - union { - const u8 *p8; - const u16 *p16; - const u32 *p32; - } ptr; - int result = 1; - - if (start != state->cieStart) { - state->loc = state->org; - result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); - if (targetLoc == 0 && state->label == NULL) - return result; - } - for (ptr.p8 = start; result && ptr.p8 < end; ) { - switch(*ptr.p8 >> 6) { - uleb128_t value; - - case 0: - switch(*ptr.p8++) { - case DW_CFA_nop: - break; - case DW_CFA_set_loc: - state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); - if (state->loc == 0) - result = 0; - break; - case DW_CFA_advance_loc1: - result = ptr.p8 < end && advance_loc(*ptr.p8++, state); - break; - case DW_CFA_advance_loc2: - result = ptr.p8 <= end + 2 - && advance_loc(*ptr.p16++, state); - break; - case DW_CFA_advance_loc4: - result = ptr.p8 <= end + 4 - && advance_loc(*ptr.p32++, state); - break; - case DW_CFA_offset_extended: - value = get_uleb128(&ptr.p8, end); - set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); - break; - case DW_CFA_val_offset: - value = get_uleb128(&ptr.p8, end); - set_rule(value, Value, get_uleb128(&ptr.p8, end), state); - break; - case DW_CFA_offset_extended_sf: - value = get_uleb128(&ptr.p8, end); - set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); - break; - case DW_CFA_val_offset_sf: - value = get_uleb128(&ptr.p8, end); - set_rule(value, Value, get_sleb128(&ptr.p8, end), state); - break; - case DW_CFA_restore_extended: - case DW_CFA_undefined: - case DW_CFA_same_value: - set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); - break; - case DW_CFA_register: - value = get_uleb128(&ptr.p8, end); - set_rule(value, - Register, - get_uleb128(&ptr.p8, end), state); - break; - case DW_CFA_remember_state: - if (ptr.p8 == state->label) { - state->label = NULL; - return 1; - } - if (state->stackDepth >= MAX_STACK_DEPTH) { - dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); - return 0; - } - state->stack[state->stackDepth++] = ptr.p8; - break; - case DW_CFA_restore_state: - if (state->stackDepth) { - const uleb128_t loc = state->loc; - const u8 *label = state->label; - - state->label = state->stack[state->stackDepth - 1]; - memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); - memset(state->regs, 0, sizeof(state->regs)); - state->stackDepth = 0; - result = processCFI(start, end, 0, ptrType, state); - state->loc = loc; - state->label = label; - } else { - dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); - return 0; - } - break; - case DW_CFA_def_cfa: - state->cfa.reg = get_uleb128(&ptr.p8, end); - /*nobreak*/ - case DW_CFA_def_cfa_offset: - state->cfa.offs = get_uleb128(&ptr.p8, end); - break; - case DW_CFA_def_cfa_sf: - state->cfa.reg = get_uleb128(&ptr.p8, end); - /*nobreak*/ - case DW_CFA_def_cfa_offset_sf: - state->cfa.offs = get_sleb128(&ptr.p8, end) - * state->dataAlign; - break; - case DW_CFA_def_cfa_register: - state->cfa.reg = get_uleb128(&ptr.p8, end); - break; - /*todo case DW_CFA_def_cfa_expression: */ - /*todo case DW_CFA_expression: */ - /*todo case DW_CFA_val_expression: */ - case DW_CFA_GNU_args_size: - get_uleb128(&ptr.p8, end); - break; - case DW_CFA_GNU_negative_offset_extended: - value = get_uleb128(&ptr.p8, end); - set_rule(value, - Memory, - (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); - break; - case DW_CFA_GNU_window_save: - default: - dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); - result = 0; - break; - } - break; - case 1: - result = advance_loc(*ptr.p8++ & 0x3f, state); - break; - case 2: - value = *ptr.p8++ & 0x3f; - set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); - break; - case 3: - set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); - break; - } - if (ptr.p8 > end) { - dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); - result = 0; - } - if (result && targetLoc != 0 && targetLoc < state->loc) - return 1; - } - - if (result && ptr.p8 < end) - dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); - - return result - && ptr.p8 == end - && (targetLoc == 0 - || (/*todo While in theory this should apply, gcc in practice omits - everything past the function prolog, and hence the location - never reaches the end of the function. - targetLoc < state->loc &&*/ state->label == NULL)); -} - -/* Unwind to previous to frame. Returns 0 if successful, negative - * number in case of an error. */ -int unwind(struct unwind_frame_info *frame) -{ -#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) - const u32 *fde = NULL, *cie = NULL; - const u8 *ptr = NULL, *end = NULL; - unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; - unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; - signed ptrType = -1; - uleb128_t retAddrReg = 0; - const struct unwind_table *table; - struct unwind_state state; - - if (UNW_PC(frame) == 0) - return -EINVAL; - if ((table = find_table(pc)) != NULL - && !(table->size & (sizeof(*fde) - 1))) { - const u8 *hdr = table->header; - unsigned long tableSize; - - smp_rmb(); - if (hdr && hdr[0] == 1) { - switch(hdr[3] & DW_EH_PE_FORM) { - case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; - case DW_EH_PE_data2: tableSize = 2; break; - case DW_EH_PE_data4: tableSize = 4; break; - case DW_EH_PE_data8: tableSize = 8; break; - default: tableSize = 0; break; - } - ptr = hdr + 4; - end = hdr + table->hdrsz; - if (tableSize - && read_pointer(&ptr, end, hdr[1], 0, 0) - == (unsigned long)table->address - && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 - && i == (end - ptr) / (2 * tableSize) - && !((end - ptr) % (2 * tableSize))) { - do { - const u8 *cur = ptr + (i / 2) * (2 * tableSize); - - startLoc = read_pointer(&cur, - cur + tableSize, - hdr[3], 0, - (unsigned long)hdr); - if (pc < startLoc) - i /= 2; - else { - ptr = cur - tableSize; - i = (i + 1) / 2; - } - } while (startLoc && i > 1); - if (i == 1 - && (startLoc = read_pointer(&ptr, - ptr + tableSize, - hdr[3], 0, - (unsigned long)hdr)) != 0 - && pc >= startLoc) - fde = (void *)read_pointer(&ptr, - ptr + tableSize, - hdr[3], 0, - (unsigned long)hdr); - } - } - if(hdr && !fde) - dprintk(3, "Binary lookup for %lx failed.", pc); - - if (fde != NULL) { - cie = cie_for_fde(fde, table); - ptr = (const u8 *)(fde + 2); - if(cie != NULL - && cie != &bad_cie - && cie != ¬_fde - && (ptrType = fde_pointer_type(cie)) >= 0 - && read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType, 0, 0) == startLoc) { - if (!(ptrType & DW_EH_PE_indirect)) - ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; - endLoc = startLoc - + read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType, 0, 0); - if(pc >= endLoc) - fde = NULL; - } else - fde = NULL; - if(!fde) - dprintk(1, "Binary lookup result for %lx discarded.", pc); - } - if (fde == NULL) { - for (fde = table->address, tableSize = table->size; - cie = NULL, tableSize > sizeof(*fde) - && tableSize - sizeof(*fde) >= *fde; - tableSize -= sizeof(*fde) + *fde, - fde += 1 + *fde / sizeof(*fde)) { - cie = cie_for_fde(fde, table); - if (cie == &bad_cie) { - cie = NULL; - break; - } - if (cie == NULL - || cie == ¬_fde - || (ptrType = fde_pointer_type(cie)) < 0) - continue; - ptr = (const u8 *)(fde + 2); - startLoc = read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType, 0, 0); - if (!startLoc) - continue; - if (!(ptrType & DW_EH_PE_indirect)) - ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; - endLoc = startLoc - + read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType, 0, 0); - if (pc >= startLoc && pc < endLoc) - break; - } - if(!fde) - dprintk(3, "Linear lookup for %lx failed.", pc); - } - } - if (cie != NULL) { - memset(&state, 0, sizeof(state)); - state.cieEnd = ptr; /* keep here temporarily */ - ptr = (const u8 *)(cie + 2); - end = (const u8 *)(cie + 1) + *cie; - frame->call_frame = 1; - if ((state.version = *ptr) != 1) - cie = NULL; /* unsupported version */ - else if (*++ptr) { - /* check if augmentation size is first (and thus present) */ - if (*ptr == 'z') { - while (++ptr < end && *ptr) { - switch(*ptr) { - /* check for ignorable (or already handled) - * nul-terminated augmentation string */ - case 'L': - case 'P': - case 'R': - continue; - case 'S': - frame->call_frame = 0; - continue; - default: - break; - } - break; - } - } - if (ptr >= end || *ptr) - cie = NULL; - } - if(!cie) - dprintk(1, "CIE unusable (%p,%p).", ptr, end); - ++ptr; - } - if (cie != NULL) { - /* get code aligment factor */ - state.codeAlign = get_uleb128(&ptr, end); - /* get data aligment factor */ - state.dataAlign = get_sleb128(&ptr, end); - if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) - cie = NULL; - else if (UNW_PC(frame) % state.codeAlign - || UNW_SP(frame) % sleb128abs(state.dataAlign)) { - dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", - UNW_PC(frame), UNW_SP(frame)); - return -EPERM; - } else { - retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); - /* skip augmentation */ - if (((const char *)(cie + 2))[1] == 'z') { - uleb128_t augSize = get_uleb128(&ptr, end); - - ptr += augSize; - } - if (ptr > end - || retAddrReg >= ARRAY_SIZE(reg_info) - || REG_INVALID(retAddrReg) - || reg_info[retAddrReg].width != sizeof(unsigned long)) - cie = NULL; - } - if(!cie) - dprintk(1, "CIE validation failed (%p,%p).", ptr, end); - } - if (cie != NULL) { - state.cieStart = ptr; - ptr = state.cieEnd; - state.cieEnd = end; - end = (const u8 *)(fde + 1) + *fde; - /* skip augmentation */ - if (((const char *)(cie + 2))[1] == 'z') { - uleb128_t augSize = get_uleb128(&ptr, end); - - if ((ptr += augSize) > end) - fde = NULL; - } - if(!fde) - dprintk(1, "FDE validation failed (%p,%p).", ptr, end); - } - if (cie == NULL || fde == NULL) { -#ifdef CONFIG_FRAME_POINTER - unsigned long top, bottom; - - if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) - return -EPERM; - top = STACK_TOP(frame->task); - bottom = STACK_BOTTOM(frame->task); -# if FRAME_RETADDR_OFFSET < 0 - if (UNW_SP(frame) < top - && UNW_FP(frame) <= UNW_SP(frame) - && bottom < UNW_FP(frame) -# else - if (UNW_SP(frame) > top - && UNW_FP(frame) >= UNW_SP(frame) - && bottom > UNW_FP(frame) -# endif - && !((UNW_SP(frame) | UNW_FP(frame)) - & (sizeof(unsigned long) - 1))) { - unsigned long link; - - if (!probe_kernel_address( - (unsigned long *)(UNW_FP(frame) - + FRAME_LINK_OFFSET), - link) -# if FRAME_RETADDR_OFFSET < 0 - && link > bottom && link < UNW_FP(frame) -# else - && link > UNW_FP(frame) && link < bottom -# endif - && !(link & (sizeof(link) - 1)) - && !probe_kernel_address( - (unsigned long *)(UNW_FP(frame) - + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { - UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET -# if FRAME_RETADDR_OFFSET < 0 - - -# else - + -# endif - sizeof(UNW_PC(frame)); - UNW_FP(frame) = link; - return 0; - } - } -#endif - return -ENXIO; - } - state.org = startLoc; - memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); - /* process instructions */ - if (!processCFI(ptr, end, pc, ptrType, &state) - || state.loc > endLoc - || state.regs[retAddrReg].where == Nowhere - || state.cfa.reg >= ARRAY_SIZE(reg_info) - || reg_info[state.cfa.reg].width != sizeof(unsigned long) - || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) - || state.cfa.offs % sizeof(unsigned long)) { - dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); - return -EIO; - } - /* update frame */ -#ifndef CONFIG_AS_CFI_SIGNAL_FRAME - if(frame->call_frame - && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) - frame->call_frame = 0; -#endif - cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; - startLoc = min((unsigned long)UNW_SP(frame), cfa); - endLoc = max((unsigned long)UNW_SP(frame), cfa); - if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { - startLoc = min(STACK_LIMIT(cfa), cfa); - endLoc = max(STACK_LIMIT(cfa), cfa); - } -#ifndef CONFIG_64BIT -# define CASES CASE(8); CASE(16); CASE(32) -#else -# define CASES CASE(8); CASE(16); CASE(32); CASE(64) -#endif - pc = UNW_PC(frame); - sp = UNW_SP(frame); - for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { - if (REG_INVALID(i)) { - if (state.regs[i].where == Nowhere) - continue; - dprintk(1, "Cannot restore register %u (%d).", - i, state.regs[i].where); - return -EIO; - } - switch(state.regs[i].where) { - default: - break; - case Register: - if (state.regs[i].value >= ARRAY_SIZE(reg_info) - || REG_INVALID(state.regs[i].value) - || reg_info[i].width > reg_info[state.regs[i].value].width) { - dprintk(1, "Cannot restore register %u from register %lu.", - i, state.regs[i].value); - return -EIO; - } - switch(reg_info[state.regs[i].value].width) { -#define CASE(n) \ - case sizeof(u##n): \ - state.regs[i].value = FRAME_REG(state.regs[i].value, \ - const u##n); \ - break - CASES; -#undef CASE - default: - dprintk(1, "Unsupported register size %u (%lu).", - reg_info[state.regs[i].value].width, - state.regs[i].value); - return -EIO; - } - break; - } - } - for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { - if (REG_INVALID(i)) - continue; - switch(state.regs[i].where) { - case Nowhere: - if (reg_info[i].width != sizeof(UNW_SP(frame)) - || &FRAME_REG(i, __typeof__(UNW_SP(frame))) - != &UNW_SP(frame)) - continue; - UNW_SP(frame) = cfa; - break; - case Register: - switch(reg_info[i].width) { -#define CASE(n) case sizeof(u##n): \ - FRAME_REG(i, u##n) = state.regs[i].value; \ - break - CASES; -#undef CASE - default: - dprintk(1, "Unsupported register size %u (%u).", - reg_info[i].width, i); - return -EIO; - } - break; - case Value: - if (reg_info[i].width != sizeof(unsigned long)) { - dprintk(1, "Unsupported value size %u (%u).", - reg_info[i].width, i); - return -EIO; - } - FRAME_REG(i, unsigned long) = cfa + state.regs[i].value - * state.dataAlign; - break; - case Memory: { - unsigned long addr = cfa + state.regs[i].value - * state.dataAlign; - - if ((state.regs[i].value * state.dataAlign) - % sizeof(unsigned long) - || addr < startLoc - || addr + sizeof(unsigned long) < addr - || addr + sizeof(unsigned long) > endLoc) { - dprintk(1, "Bad memory location %lx (%lx).", - addr, state.regs[i].value); - return -EIO; - } - switch(reg_info[i].width) { -#define CASE(n) case sizeof(u##n): \ - probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ - break - CASES; -#undef CASE - default: - dprintk(1, "Unsupported memory size %u (%u).", - reg_info[i].width, i); - return -EIO; - } - } - break; - } - } - - if (UNW_PC(frame) % state.codeAlign - || UNW_SP(frame) % sleb128abs(state.dataAlign)) { - dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", - UNW_PC(frame), UNW_SP(frame)); - return -EIO; - } - if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { - dprintk(1, "No progress (%lx,%lx).", pc, sp); - return -EIO; - } - - return 0; -#undef CASES -#undef FRAME_REG -} -EXPORT_SYMBOL(unwind); - -int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - /*const*/ struct pt_regs *regs) -{ - info->task = tsk; - info->call_frame = 0; - arch_unw_init_frame_info(info, regs); - - return 0; -} -EXPORT_SYMBOL(unwind_init_frame_info); - -/* - * Prepare to unwind a blocked task. - */ -int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - info->task = tsk; - info->call_frame = 0; - arch_unw_init_blocked(info); - - return 0; -} -EXPORT_SYMBOL(unwind_init_blocked); - -/* - * Prepare to unwind the currently running thread. - */ -int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*callback)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - info->task = current; - info->call_frame = 0; - - return arch_unwind_init_running(info, callback, arg); -} -EXPORT_SYMBOL(unwind_init_running); - -/* - * Unwind until the return pointer is in user-land (or until an error - * occurs). Returns 0 if successful, negative number in case of - * error. - */ -int unwind_to_user(struct unwind_frame_info *info) -{ - while (!arch_unw_user_mode(info)) { - int err = unwind(info); - - if (err < 0) - return err; - } - - return 0; -} -EXPORT_SYMBOL(unwind_to_user); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index db49886bfae1..742cbbe49bdc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -96,13 +96,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq) BUG_ON(!work_pending(work)); new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING); - new |= work->management & WORK_STRUCT_FLAG_MASK; - work->management = new; + new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work); + atomic_long_set(&work->data, new); } static inline void *get_wq_data(struct work_struct *work) { - return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); + return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); } static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) @@ -133,7 +133,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work list_del_init(&work->entry); spin_unlock_irqrestore(&cwq->lock, flags); - if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) + if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) work_release(work); f(work); @@ -206,7 +206,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) { int ret = 0, cpu = get_cpu(); - if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { if (unlikely(is_single_threaded(wq))) cpu = singlethread_cpu; BUG_ON(!list_empty(&work->entry)); @@ -248,7 +248,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq, if (delay == 0) return queue_work(wq, work); - if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; - if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -321,7 +321,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) spin_unlock_irqrestore(&cwq->lock, flags); BUG_ON(get_wq_data(work) != cwq); - if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) + if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) work_release(work); f(work); |