diff options
41 files changed, 507 insertions, 292 deletions
@@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Terrified Chipmunk # *DOCUMENTATION* diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0d..33a6a2423bd2 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 40db8f71deae..2df313b6a586 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); -DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); -DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); -DEFINE_STR(UM_KERN_ERR, KERN_ERR); -DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); -DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); -DEFINE_STR(UM_KERN_INFO, KERN_INFO); -DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE_STR(UM_KERN_CONT, KERN_CONT); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 4fa82c055aab..cef068563336 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -26,6 +26,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + #ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade9366364..8c82786da823 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -39,34 +39,21 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { + get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; -} -EXPORT_SYMBOL(start_thread); - -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) -{ - long error; - - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; + current->ptrace &= ~PT_DTRACE; #ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); + SUBARCH_EXECVE1(regs->regs); #endif - task_unlock(current); - } - return error; } +EXPORT_SYMBOL(start_thread); long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; - err = execve1(file, argv, env); + err = do_execve(file, argv, env, ¤t->thread.regs); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -81,7 +68,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + error = do_execve(filename, argv, env, ¤t->thread.regs); putname(filename); out: return error; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714a..c5f5afa50745 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc29..cc9c2350e417 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee3..a4c6d8eee74c 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index d50270d26b42..15889df9b466 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS:.o=.%): \ - c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o) + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of # using it directly. diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772d..aeaff8bef2f1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5eef..46a9df99f3c5 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e41..ca255a805ed9 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea13503..ba7363ecf896 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37b..b5408cecac6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9d..db444c7218fe 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/syscalls.h> +#include <sysdep/syscalls.h> /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15a..adb08eb5c22a 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include <linux/sched.h> +#include <asm/prctl.h> /* XXX This should get the constants from libc */ +#include <os.h> long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 38a2d0631882..ad16c68c8645 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -79,6 +79,7 @@ struct nvme_dev { char serial[20]; char model[40]; char firmware_rev[8]; + u32 max_hw_sectors; }; /* @@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, } static int nvme_get_features(struct nvme_dev *dev, unsigned fid, - unsigned dword11, dma_addr_t dma_addr) + unsigned nsid, dma_addr_t dma_addr) { struct nvme_command c; memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; + c.features.nsid = cpu_to_le32(nsid); c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - c.features.dword11 = cpu_to_le32(dword11); return nvme_submit_admin_cmd(dev, &c, NULL); } @@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, return nvme_submit_admin_cmd(dev, &c, result); } +/** + * nvme_cancel_ios - Cancel outstanding I/Os + * @queue: The queue to cancel I/Os on + * @timeout: True to only cancel I/Os which have timed out + */ +static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) +{ + int depth = nvmeq->q_depth - 1; + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + unsigned long now = jiffies; + int cmdid; + + for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { + void *ctx; + nvme_completion_fn fn; + static struct nvme_completion cqe = { + .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, + }; + + if (timeout && !time_after(now, info[cmdid].timeout)) + continue; + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); + ctx = cancel_cmdid(nvmeq, cmdid, &fn); + fn(nvmeq->dev, ctx, &cqe); + } +} + +static void nvme_free_queue_mem(struct nvme_queue *nvmeq) +{ + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); + kfree(nvmeq); +} + static void nvme_free_queue(struct nvme_dev *dev, int qid) { struct nvme_queue *nvmeq = dev->queues[qid]; int vector = dev->entry[nvmeq->cq_vector].vector; + spin_lock_irq(&nvmeq->q_lock); + nvme_cancel_ios(nvmeq, false); + spin_unlock_irq(&nvmeq->q_lock); + irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); @@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) adapter_delete_cq(dev, qid); } - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); + nvme_free_queue_mem(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth, int vector) { struct device *dmadev = &dev->pci_dev->dev; - unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); + unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * + sizeof(struct nvme_cmd_info)); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); if (!nvmeq) return NULL; @@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) { - int result; + int result = 0; u32 aqa; u64 cap; unsigned long timeout; @@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; dev->db_stride = NVME_CAP_STRIDE(cap); - while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { + while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { msleep(100); if (fatal_signal_pending(current)) - return -EINTR; + result = -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, "Device not ready; aborting initialisation\n"); - return -ENODEV; + result = -ENODEV; } } + if (result) { + nvme_free_queue_mem(nvmeq); + return result; + } + result = queue_request_irq(dev, nvmeq, "nvme admin"); dev->queues[0] = nvmeq; return result; @@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, offset = offset_in_page(addr); count = DIV_ROUND_UP(offset + length, PAGE_SIZE); pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); err = get_user_pages_fast(addr, count, 1, pages); if (err < count) { @@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return status; } -static int nvme_user_admin_cmd(struct nvme_ns *ns, +static int nvme_user_admin_cmd(struct nvme_dev *dev, struct nvme_admin_cmd __user *ucmd) { - struct nvme_dev *dev = ns->dev; struct nvme_admin_cmd cmd; struct nvme_command c; int status, length; - struct nvme_iod *iod; + struct nvme_iod *uninitialized_var(iod); if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case NVME_IOCTL_ID: return ns->ns_id; case NVME_IOCTL_ADMIN_CMD: - return nvme_user_admin_cmd(ns, (void __user *)arg); + return nvme_user_admin_cmd(ns->dev, (void __user *)arg); case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); default: @@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = { .compat_ioctl = nvme_ioctl, }; -static void nvme_timeout_ios(struct nvme_queue *nvmeq) -{ - int depth = nvmeq->q_depth - 1; - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - unsigned long now = jiffies; - int cmdid; - - for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { - void *ctx; - nvme_completion_fn fn; - static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; - - if (!time_after(now, info[cmdid].timeout)) - continue; - dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); - ctx = cancel_cmdid(nvmeq, cmdid, &fn); - fn(nvmeq->dev, ctx, &cqe); - } -} - static void nvme_resubmit_bios(struct nvme_queue *nvmeq) { while (bio_list_peek(&nvmeq->sq_cong)) { @@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data) spin_lock_irq(&nvmeq->q_lock); if (nvme_process_cq(nvmeq)) printk("process_cq did something\n"); - nvme_timeout_ios(nvmeq); + nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, ns->disk = disk; lbaf = id->flbas & 0xf; ns->lba_shift = id->lbaf[lbaf].ds; + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); + if (dev->max_hw_sectors) + blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); disk->major = nvme_major; disk->minors = NVME_MINORS; @@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) { - int result, cpu, i, nr_io_queues, db_bar_size; + int result, cpu, i, nr_io_queues, db_bar_size, q_depth; nr_io_queues = num_online_cpus(); result = set_queue_count(dev, nr_io_queues); @@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) cpu = cpumask_next(cpu, cpu_online_mask); } + q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, + NVME_Q_DEPTH); for (i = 0; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, - NVME_Q_DEPTH, i); + dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); if (IS_ERR(dev->queues[i + 1])) return PTR_ERR(dev->queues[i + 1]); dev->queue_count++; @@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); + if (ctrl->mdts) { + int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; + dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); + } id_ns = mem; for (i = 1; i <= nn; i++) { @@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev) list_del(&dev->node); spin_unlock(&dev_list_lock); - /* TODO: wait all I/O finished or cancel them */ - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { list_del(&ns->list); del_gendisk(ns->disk); @@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } -/* XXX: Use an ida or something to let remove / add work correctly */ -static void nvme_set_instance(struct nvme_dev *dev) +static DEFINE_IDA(nvme_instance_ida); + +static int nvme_set_instance(struct nvme_dev *dev) { - static int instance; - dev->instance = instance++; + int instance, error; + + do { + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) + return -ENODEV; + + spin_lock(&dev_list_lock); + error = ida_get_new(&nvme_instance_ida, &instance); + spin_unlock(&dev_list_lock); + } while (error == -EAGAIN); + + if (error) + return -ENODEV; + + dev->instance = instance; + return 0; } static void nvme_release_instance(struct nvme_dev *dev) { + spin_lock(&dev_list_lock); + ida_remove(&nvme_instance_ida, dev->instance); + spin_unlock(&dev_list_lock); } static int __devinit nvme_probe(struct pci_dev *pdev, @@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - nvme_set_instance(dev); + result = nvme_set_instance(dev); + if (result) + goto disable; + dev->entry[0].vector = pdev->irq; result = nvme_setup_prp_pools(dev); @@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { - int result = -EBUSY; + int result; nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); if (IS_ERR(nvme_thread)) return PTR_ERR(nvme_thread); - nvme_major = register_blkdev(nvme_major, "nvme"); - if (nvme_major <= 0) + result = register_blkdev(nvme_major, "nvme"); + if (result < 0) goto kill_kthread; + else if (result > 0) + nvme_major = result; result = pci_register_driver(&nvme_driver); if (result) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b64502dfa9f4..e89daf1b21b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -266,7 +266,7 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) static int iommu_init_device(struct device *dev) { - struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev); + struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; struct iommu_group *group; u16 alias; @@ -293,7 +293,9 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } else + } + + if (dma_pdev == NULL) dma_pdev = pci_dev_get(pdev); /* Account for quirked devices */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2f..034233eefc82 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct multipath *m = ti->private; + struct pgpath *pgpath; struct block_device *bdev; fmode_t mode; unsigned long flags; @@ -1570,12 +1571,14 @@ again: if (!m->current_pgpath) __choose_pgpath(m, 0); - if (m->current_pgpath) { - bdev = m->current_pgpath->path.dev->bdev; - mode = m->current_pgpath->path.dev->mode; + pgpath = m->current_pgpath; + + if (pgpath) { + bdev = pgpath->path.dev->bdev; + mode = pgpath->path.dev->mode; } - if (m->queue_io) + if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) r = -EAGAIN; else if (!bdev) r = -EIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f90069029aae..100368eb7991 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +static int count_device(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned *num_devices = data; + + (*num_devices)++; + + return 0; +} + +/* + * Check whether a table has no data devices attached using each + * target's iterate_devices method. + * Returns false if the result is unknown because a target doesn't + * support iterate_devices. + */ +bool dm_table_has_no_data_devices(struct dm_table *table) +{ + struct dm_target *uninitialized_var(ti); + unsigned i = 0, num_devices = 0; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (!ti->type->iterate_devices) + return false; + + ti->type->iterate_devices(ti, count_device, &num_devices); + if (num_devices) + return false; + } + + return true; +} + /* * Establish the new table's queue_limits and validate them. */ @@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, return q && blk_queue_nonrot(q); } -static bool dm_table_is_nonrot(struct dm_table *t) +static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_add_random(q); +} + +static bool dm_table_all_devices_attribute(struct dm_table *t, + iterate_devices_callout_fn func) { struct dm_target *ti; unsigned i = 0; - /* Ensure that all underlying device are non-rotational. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) + !ti->type->iterate_devices(ti, func, NULL)) return 0; } @@ -1396,7 +1439,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_discard_zeroes_data(t)) q->limits.discard_zeroes_data = 0; - if (dm_table_is_nonrot(t)) + /* Ensure that all underlying devices are non-rotational. */ + if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); @@ -1404,6 +1448,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_set_integrity(t); /* + * Determine whether or not this queue's I/O timings contribute + * to the entropy pool, Only request-based targets use this. + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ + if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + + /* * QUEUE_FLAG_STACKABLE must be set after all queue settings are * visible to other CPUs because, once the flag is set, incoming bios * are processed by request-based dm, which refers to the queue diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2ad..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode { struct pool_features { enum pool_mode mode; - unsigned zero_new_blocks:1; - unsigned discard_enabled:1; - unsigned discard_passdown:1; + bool zero_new_blocks:1; + bool discard_enabled:1; + bool discard_passdown:1; }; struct thin_c; @@ -580,7 +580,8 @@ struct pool_c { struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; - struct pool_features pf; + struct pool_features requested_pf; /* Features requested during table load */ + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ }; /* @@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt) +{ + struct pool *pool = pt->pool; + struct block_device *data_bdev = pt->data_dev->bdev; + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!pt->adjusted_pf.discard_passdown) + return; + + if (!data_dev_supports_discard(pt)) + reason = "discard unsupported"; + + else if (data_limits->max_discard_sectors < pool->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + else if (data_limits->discard_granularity > block_size) + reason = "discard granularity larger than a block"; + + else if (block_size & (data_limits->discard_granularity - 1)) + reason = "discard granularity not a factor of block size"; + + if (reason) { + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + pt->adjusted_pf.discard_passdown = false; + } +} + static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; @@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) * We want to make sure that degraded pools are never upgraded. */ enum pool_mode old_mode = pool->pf.mode; - enum pool_mode new_mode = pt->pf.mode; + enum pool_mode new_mode = pt->adjusted_pf.mode; if (old_mode > new_mode) new_mode = old_mode; pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; - pool->pf = pt->pf; - set_pool_mode(pool, new_mode); + pool->pf = pt->adjusted_pf; - /* - * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. - */ - /* FIXME: pull this out into a sep fn. */ - if (pt->pf.discard_passdown) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - if (!q || !blk_queue_discard(q)) { - char buf[BDEVNAME_SIZE]; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); - pool->pf.discard_passdown = 0; - } - } + set_pool_mode(pool, new_mode); return 0; } @@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) static void pool_features_init(struct pool_features *pf) { pf->mode = PM_WRITE; - pf->zero_new_blocks = 1; - pf->discard_enabled = 1; - pf->discard_passdown = 1; + pf->zero_new_blocks = true; + pf->discard_enabled = true; + pf->discard_passdown = true; } static void __pool_destroy(struct pool *pool) @@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, argc--; if (!strcasecmp(arg_name, "skip_block_zeroing")) - pf->zero_new_blocks = 0; + pf->zero_new_blocks = false; else if (!strcasecmp(arg_name, "ignore_discard")) - pf->discard_enabled = 0; + pf->discard_enabled = false; else if (!strcasecmp(arg_name, "no_discard_passdown")) - pf->discard_passdown = 0; + pf->discard_passdown = false; else if (!strcasecmp(arg_name, "read_only")) pf->mode = PM_READ_ONLY; @@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->metadata_dev = metadata_dev; pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; - pt->pf = pf; + pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_requests = 1; + /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) */ if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_requests = 1; + /* * Setting 'discards_supported' circumvents the normal * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - emit_flags(&pt->pf, result, sz, maxlen); + emit_flags(&pt->requested_pf, result, sz, maxlen); break; } @@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { - /* - * FIXME: these limits may be incompatible with the pool's data device - */ + struct pool *pool = pt->pool; + struct queue_limits *data_limits; + limits->max_discard_sectors = pool->sectors_per_block; /* - * This is just a hint, and not enforced. We have to cope with - * bios that cover a block partially. A discard that spans a block - * boundary is not sent to this target. + * discard_granularity is just a hint, and not enforced. */ - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->discard_zeroes_data = pool->pf.zero_new_blocks; + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; + limits->discard_granularity = data_limits->discard_granularity; + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - if (pool->pf.discard_enabled) - set_discard_limits(pool, limits); + + /* + * pt->adjusted_pf is a staging area for the actual features to use. + * They get transferred to the live pool in bind_control_target() + * called from pool_preresume(). + */ + if (!pt->adjusted_pf.discard_enabled) + return; + + disable_passdown_if_not_supported(pt); + + set_discard_limits(pt, limits); } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +/* + * A thin device always inherits its queue limits from its pool. + */ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; - struct pool *pool = tc->pool; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - set_discard_limits(pool, limits); + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; } static struct target_type thin_target = { .name = "thin", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 254d19268ad2..892ae2766aa6 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->hash_dev_block_bits = ffs(num) - 1; if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid data blocks"; r = -EINVAL; goto bad; @@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid hash start"; r = -EINVAL; goto bad; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e09b6ff5b49..67ffa391edcf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped) { int r = error; struct dm_rq_target_io *tio = clone->end_io_data; - dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; + dm_request_endio_fn rq_end_io = NULL; - if (mapped && rq_end_io) - r = rq_end_io(tio->ti, clone, error, &tio->info); + if (tio->ti) { + rq_end_io = tio->ti->type->rq_end_io; + + if (mapped && rq_end_io) + r = rq_end_io(tio->ti, clone, error, &tio->info); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone, int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; - /* - * Hold the md reference here for the in-flight I/O. - * We can't rely on the reference count by device opener, - * because the device may be closed during the request completion - * when all bios are completed. - * See the comment in rq_completed() too. - */ - dm_get(md); - tio->ti = ti; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { @@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone, return requeued; } +static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +{ + struct request *clone; + + blk_start_request(orig); + clone = orig->special; + atomic_inc(&md->pending[rq_data_dir(clone)]); + + /* + * Hold the md reference here for the in-flight I/O. + * We can't rely on the reference count by device opener, + * because the device may be closed during the request completion + * when all bios are completed. + * See the comment in rq_completed() too. + */ + dm_get(md); + + return clone; +} + /* * q->request_fn for request-based dm. * Called with the queue lock held. @@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); ti = dm_table_find_target(map, pos); - BUG_ON(!dm_target_is_valid(ti)); + if (!dm_target_is_valid(ti)) { + /* + * Must perform setup, that dm_done() requires, + * before calling dm_kill_unmapped_request + */ + DMERR_LIMIT("request attempted access beyond the end of device"); + clone = dm_start_request(md, rq); + dm_kill_unmapped_request(clone, -EIO); + continue; + } if (ti->type->busy && ti->type->busy(ti)) goto delay_and_out; - blk_start_request(rq); - clone = rq->special; - atomic_inc(&md->pending[rq_data_dir(clone)]); + clone = dm_start_request(md, rq); spin_unlock(q->queue_lock); if (map_request(ti, clone, md)) @@ -1684,8 +1706,6 @@ delay_and_out: blk_delay_queue(q, HZ / 10); out: dm_table_put(map); - - return; } int dm_underlying_device_busy(struct request_queue *q) @@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *map = ERR_PTR(-EINVAL); + struct dm_table *live_map, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) if (!dm_suspended_md(md)) goto out; + /* + * If the new table has no data devices, retain the existing limits. + * This helps multipath with queue_if_no_path if all paths disappear, + * then new I/O is queued based on these limits, and then some paths + * reappear. + */ + if (dm_table_has_no_data_devices(table)) { + live_map = dm_get_live_table(md); + if (live_map) + limits = md->queue->limits; + dm_table_put(live_map); + } + r = dm_calculate_queue_limits(table, &limits); if (r) { map = ERR_PTR(r); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 52eef493d266..6a99fefaa743 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index f2f482bec573..a6e74514e662 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1123,6 +1123,33 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, } #endif +static inline unsigned long get_vm_size(struct vm_area_struct *vma) +{ + return vma->vm_end - vma->vm_start; +} + +static inline resource_size_t get_vm_offset(struct vm_area_struct *vma) +{ + return (resource_size_t) vma->vm_pgoff << PAGE_SHIFT; +} + +/* + * Set a new vm offset. + * + * Verify that the incoming offset really works as a page offset, + * and that the offset and size fit in a resource_size_t. + */ +static inline int set_vm_offset(struct vm_area_struct *vma, resource_size_t off) +{ + pgoff_t pgoff = off >> PAGE_SHIFT; + if (off != (resource_size_t) pgoff << PAGE_SHIFT) + return -EINVAL; + if (off + get_vm_size(vma) - 1 < off) + return -EINVAL; + vma->vm_pgoff = pgoff; + return 0; +} + /* * set up a mapping for shared memory segments */ @@ -1132,20 +1159,29 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) struct mtd_file_info *mfi = file->private_data; struct mtd_info *mtd = mfi->mtd; struct map_info *map = mtd->priv; - unsigned long start; - unsigned long off; - u32 len; + resource_size_t start, off; + unsigned long len, vma_len; if (mtd->type == MTD_RAM || mtd->type == MTD_ROM) { - off = vma->vm_pgoff << PAGE_SHIFT; + off = get_vm_offset(vma); start = map->phys; len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size); start &= PAGE_MASK; - if ((vma->vm_end - vma->vm_start + off) > len) + vma_len = get_vm_size(vma); + + /* Overflow in off+len? */ + if (vma_len + off < off) + return -EINVAL; + /* Does it fit in the mapping? */ + if (vma_len + off > len) return -EINVAL; off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; + /* Did that overflow? */ + if (off < start) + return -EINVAL; + if (set_vm_offset(vma, off) < 0) + return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED; #ifdef pgprot_noncached diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 79cebd8525ce..e48312f2305d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8564,7 +8564,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - iounmap(bp->regview); + pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/octeon/octeon_mgmt.c b/drivers/net/ethernet/octeon/octeon_mgmt.c index c42bbb16cdae..a688a2ddcfd6 100644 --- a/drivers/net/ethernet/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/octeon/octeon_mgmt.c @@ -722,10 +722,8 @@ static int octeon_mgmt_init_phy(struct net_device *netdev) octeon_mgmt_adjust_link, 0, PHY_INTERFACE_MODE_MII); - if (IS_ERR(p->phydev)) { - p->phydev = NULL; + if (!p->phydev) return -1; - } phy_start_aneg(p->phydev); diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index e559dfa06d6a..6fa74d530e44 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1101,9 +1101,9 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); - if (IS_ERR(phydev)) { + if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); - return PTR_ERR(phydev); + return -ENODEV; } mac->phydev = phydev; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index b8ead696141e..2a179d087207 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -15,7 +15,7 @@ qlcnic_poll_rsp(struct qlcnic_adapter *adapter) do { /* give atleast 1ms for firmware to respond */ - msleep(1); + mdelay(1); if (++timeout > QLCNIC_OS_CRB_RETRY_COUNT) return QLCNIC_CDRP_RSP_TIMEOUT; @@ -601,7 +601,7 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) qlcnic_fw_cmd_destroy_tx_ctx(adapter); /* Allow dma queues to drain after context reset */ - msleep(20); + mdelay(20); } } diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 6d6192316b30..88e3991464e7 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -56,6 +56,32 @@ static int smsc_phy_config_init(struct phy_device *phydev) return smsc_phy_ack_interrupt (phydev); } +static int lan87xx_config_init(struct phy_device *phydev) +{ + /* + * Make sure the EDPWRDOWN bit is NOT set. Setting this bit on + * LAN8710/LAN8720 PHY causes the PHY to misbehave, likely due + * to a bug on the chip. + * + * When the system is powered on with the network cable being + * disconnected all the way until after ifconfig ethX up is + * issued for the LAN port with this PHY, connecting the cable + * afterwards does not cause LINK change detection, while the + * expected behavior is the Link UP being detected. + */ + int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + + rc &= ~MII_LAN83C185_EDPWRDOWN; + + rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, rc); + if (rc < 0) + return rc; + + return smsc_phy_ack_interrupt(phydev); +} + static int lan911x_config_init(struct phy_device *phydev) { return smsc_phy_ack_interrupt(phydev); @@ -162,7 +188,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_config_init, /* IRQ related */ .ack_interrupt = smsc_phy_ack_interrupt, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 3ffe8a6e3c8b..f8cd61f449a4 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1653,8 +1653,8 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -EMSGSIZE; goto err_msg_put; } @@ -1848,8 +1848,8 @@ start_again: hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2068,8 +2068,8 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index f5ab6e613ec8..376143e8a1aa 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1253,6 +1253,7 @@ static struct usb_driver smsc75xx_driver = { .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, + .reset_resume = usbnet_resume, .disconnect = usbnet_disconnect, .disable_hub_initiated_lpm = 1, }; diff --git a/fs/dcache.c b/fs/dcache.c index 0364af2311f4..693f95bf1cae 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1134,6 +1134,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1236,6 +1238,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -3035,6 +3039,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe97..8d80c990dffd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ - mutex_lock(&file->f_mutex); list_del_init(&block->b_flist); mutex_unlock(&file->f_mutex); @@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) static void nlmsvc_release_block(struct nlm_block *block) { if (block != NULL) - kref_put(&block->b_count, nlmsvc_free_block); + kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); } /* diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e2561..7bdf7907413f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) - goto unlock; + if (unlikely(!check_mnt(real_mount(path->mnt)))) { + /* that's acceptable only for automounts done in private ns */ + if (!(mnt_flags & MNT_SHRINKABLE)) + goto unlock; + /* ... and for those we'd better have mountpoint still alive */ + if (!real_mount(path->mnt)->mnt_ns) + goto unlock; + } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7e83370e6fd2..f3b99e1c1042 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -256,72 +256,78 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline int iommu_attach_group(struct iommu_domain *domain, + struct iommu_group *group) { return -ENODEV; } -void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline void iommu_detach_group(struct iommu_domain *domain, + struct iommu_group *group) { } -struct iommu_group *iommu_group_alloc(void) +static inline struct iommu_group *iommu_group_alloc(void) { return ERR_PTR(-ENODEV); } -void *iommu_group_get_iommudata(struct iommu_group *group) +static inline void *iommu_group_get_iommudata(struct iommu_group *group) { return NULL; } -void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, - void (*release)(void *iommu_data)) +static inline void iommu_group_set_iommudata(struct iommu_group *group, + void *iommu_data, + void (*release)(void *iommu_data)) { } -int iommu_group_set_name(struct iommu_group *group, const char *name) +static inline int iommu_group_set_name(struct iommu_group *group, + const char *name) { return -ENODEV; } -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +static inline int iommu_group_add_device(struct iommu_group *group, + struct device *dev) { return -ENODEV; } -void iommu_group_remove_device(struct device *dev) +static inline void iommu_group_remove_device(struct device *dev) { } -int iommu_group_for_each_dev(struct iommu_group *group, void *data, - int (*fn)(struct device *, void *)) +static inline int iommu_group_for_each_dev(struct iommu_group *group, + void *data, + int (*fn)(struct device *, void *)) { return -ENODEV; } -struct iommu_group *iommu_group_get(struct device *dev) +static inline struct iommu_group *iommu_group_get(struct device *dev) { return NULL; } -void iommu_group_put(struct iommu_group *group) +static inline void iommu_group_put(struct iommu_group *group) { } -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_register_notifier(struct iommu_group *group, + struct notifier_block *nb) { return -ENODEV; } -int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_unregister_notifier(struct iommu_group *group, + struct notifier_block *nb) { return 0; } -int iommu_group_id(struct iommu_group *group) +static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9490a00529f4..c25cccaa555a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -35,8 +35,10 @@ struct nvme_bar { __u64 acq; /* Admin CQ Base Address */ }; +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) enum { NVME_CC_ENABLE = 1 << 0, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 57c4b9309015..141dbb695097 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1811,7 +1811,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); VM_BUG_ON(page_mapcount(src_page) != 1); - VM_BUG_ON(page_count(src_page) != 2); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd34..c7527f6b9ad9 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6f936358d664 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } @@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8ab309..a4c1e4528cac 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } |