From f339b9dc1f03591761d5d930800db24bc0eda1e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 May 2011 10:49:20 +0200 Subject: sched: Fix schedstat.nr_wakeups_migrate While looking over the code I found that with the ttwu rework the nr_wakeups_migrate test broke since we now switch cpus prior to calling ttwu_stat(), hence the test is always true. Cure this by passing the migration state in wake_flags. Also move the whole test under CONFIG_SMP, its hard to migrate tasks on UP :-) Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-pwwxl7gdqs5676f1d4cx6pj7@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8da84b7bc1b8..483c1ed5bc4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1063,6 +1063,7 @@ struct sched_domain; */ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ #define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 -- cgit v1.2.3 From 3a43e05f4d0600e906fa09f4a65d749288c44592 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 31 May 2011 08:56:11 +0200 Subject: irq: Handle spurios irq detection for threaded irqs The detection of spurios interrupts is currently limited to first level handler. In force-threaded mode we never notice if the threaded irq does not feel responsible. This patch catches the return value of the threaded handler and forwards it to the spurious detector. If the primary handler returns only IRQ_WAKE_THREAD then the spourious detector ignores it because it gets called again from the threaded handler. [ tglx: Report the erroneous return value early and bail out ] Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/1306824972-27067-2-git-send-email-sebastian@breakpoint.cc Signed-off-by: Thomas Gleixner --- include/linux/irqreturn.h | 6 +++--- kernel/irq/handle.c | 6 ------ kernel/irq/manage.c | 24 ++++++++++++++++++------ kernel/irq/spurious.c | 22 ++++++++++++++++++---- 4 files changed, 39 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index 819acaaac3f5..714ba08dc092 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -8,9 +8,9 @@ * @IRQ_WAKE_THREAD handler requests to wake the handler thread */ enum irqreturn { - IRQ_NONE, - IRQ_HANDLED, - IRQ_WAKE_THREAD, + IRQ_NONE = (0 << 0), + IRQ_HANDLED = (1 << 0), + IRQ_WAKE_THREAD = (1 << 1), }; typedef enum irqreturn irqreturn_t; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 90cb55f6d7eb..470d08c82bbe 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,12 +132,6 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) switch (res) { case IRQ_WAKE_THREAD: - /* - * Set result to handled so the spurious check - * does not trigger. - */ - res = IRQ_HANDLED; - /* * Catch drivers which return WAKE_THREAD but * did not set up a thread function diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f7ce0021e1c4..d64bafb1afd0 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -723,13 +723,16 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static void +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { + irqreturn_t ret; + local_bh_disable(); - action->thread_fn(action->irq, action->dev_id); + ret = action->thread_fn(action->irq, action->dev_id); irq_finalize_oneshot(desc, action, false); local_bh_enable(); + return ret; } /* @@ -737,10 +740,14 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) * preemtible - many of them need to sleep and wait for slow busses to * complete. */ -static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_thread_fn(struct irq_desc *desc, + struct irqaction *action) { - action->thread_fn(action->irq, action->dev_id); + irqreturn_t ret; + + ret = action->thread_fn(action->irq, action->dev_id); irq_finalize_oneshot(desc, action, false); + return ret; } /* @@ -753,7 +760,8 @@ static int irq_thread(void *data) }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); - void (*handler_fn)(struct irq_desc *desc, struct irqaction *action); + irqreturn_t (*handler_fn)(struct irq_desc *desc, + struct irqaction *action); int wake; if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD, @@ -783,8 +791,12 @@ static int irq_thread(void *data) desc->istate |= IRQS_PENDING; raw_spin_unlock_irq(&desc->lock); } else { + irqreturn_t action_ret; + raw_spin_unlock_irq(&desc->lock); - handler_fn(desc, action); + action_ret = handler_fn(desc, action); + if (!noirqdebug) + note_interrupt(action->irq, desc, action_ret); } wake = atomic_dec_and_test(&desc->threads_active); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index c9a78ba30b6f..aa57d5da18c1 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -167,6 +167,13 @@ out: jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } +static inline int bad_action_ret(irqreturn_t action_ret) +{ + if (likely(action_ret <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) + return 0; + return 1; +} + /* * If 99,900 of the previous 100,000 interrupts have not been handled * then assume that the IRQ is stuck in some manner. Drop a diagnostic @@ -182,7 +189,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *action; unsigned long flags; - if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { + if (bad_action_ret(action_ret)) { printk(KERN_ERR "irq event %d: bogus return value %x\n", irq, action_ret); } else { @@ -263,7 +270,16 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, if (desc->istate & IRQS_POLL_INPROGRESS) return; - if (unlikely(action_ret != IRQ_HANDLED)) { + /* we get here again via the threaded handler */ + if (action_ret == IRQ_WAKE_THREAD) + return; + + if (bad_action_ret(action_ret)) { + report_bad_irq(irq, desc, action_ret); + return; + } + + if (unlikely(action_ret == IRQ_NONE)) { /* * If we are seeing only the odd spurious IRQ caused by * bus asynchronicity then don't eventually trigger an error, @@ -275,8 +291,6 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, else desc->irqs_unhandled++; desc->last_unhandled = jiffies; - if (unlikely(action_ret != IRQ_NONE)) - report_bad_irq(irq, desc, action_ret); } if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { -- cgit v1.2.3 From 9e1f1de02c2275d7172e18dc4e7c2065777611bf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Jun 2011 18:24:58 -0400 Subject: more conservative S_NOSEC handling Caching "we have already removed suid/caps" was overenthusiastic as merged. On network filesystems we might have had suid/caps set on another client, silently picked by this client on revalidate, all of that *without* clearing the S_NOSEC flag. AFAICS, the only reasonably sane way to deal with that is * new superblock flag; unless set, S_NOSEC is not going to be set. * local block filesystems set it in their ->mount() (more accurately, mount_bdev() does, so does btrfs ->mount(), users of mount_bdev() other than local block ones clear it) * if any network filesystem (or a cluster one) wants to use S_NOSEC, it'll need to set MS_NOSEC in sb->s_flags *AND* take care to clear S_NOSEC when inode attribute changes are picked from other clients. It's not an earth-shattering hole (anybody that can set suid on another client will almost certainly be able to write to the file before doing that anyway), but it's a bug that needs fixing. Signed-off-by: Al Viro --- fs/btrfs/super.c | 2 +- fs/fuse/inode.c | 2 ++ fs/ocfs2/super.c | 2 +- fs/super.c | 2 +- include/linux/fs.h | 3 ++- mm/filemap.c | 2 +- 6 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b2e7e5bc3ef..d158b672a2d2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -819,7 +819,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags; + s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cc6ec4b2f0ff..38f84cd48b67 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -921,6 +921,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_flags & MS_MANDLOCK) goto err; + sb->s_flags &= ~MS_NOSEC; + if (!parse_fuse_opt((char *) data, &d, is_bdev)) goto err; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cdbaf5e97308..56f61027236b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1072,7 +1072,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, diff --git a/fs/super.c b/fs/super.c index c75593953c52..ab3d672db0de 100644 --- a/fs/super.c +++ b/fs/super.c @@ -822,7 +822,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags; + s->s_flags = flags | MS_NOSEC; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); diff --git a/include/linux/fs.h b/include/linux/fs.h index c55d6b7cd5d6..646a1836152a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -208,6 +208,7 @@ struct inodes_stat_t { #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -2591,7 +2592,7 @@ static inline int is_sxid(mode_t mode) static inline void inode_has_no_xattr(struct inode *inode) { - if (!is_sxid(inode->i_mode)) + if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) inode->i_flags |= S_NOSEC; } diff --git a/mm/filemap.c b/mm/filemap.c index d7b10578a64b..a8251a8d3457 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2000,7 +2000,7 @@ int file_remove_suid(struct file *file) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); - if (!error) + if (!error && (inode->i_sb->s_flags & MS_NOSEC)) inode->i_flags |= S_NOSEC; return error; -- cgit v1.2.3 From d7ebe75b065a7c2d58ffc12f9d2e00d5ea4e71eb Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 3 Jun 2011 17:59:51 -0400 Subject: perf: Fix comments in include/linux/perf_event.h Fix include/linux/perf_event.h comments to be consistent with the actual #define names. This is trivial, but it can be a bit confusing when first reading through the file. Signed-off-by: Vince Weaver Cc: Peter Zijlstra Cc: paulus@samba.org Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1106031757090.29381@cl320.eecs.utk.edu Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3412684ce5d5..e0786e35f247 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -137,14 +137,14 @@ enum perf_event_sample_format { * * struct read_format { * { u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID * } && !PERF_FORMAT_GROUP * * { u64 nr; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID * } cntr[nr]; -- cgit v1.2.3