From e2c75fcae4d72b229830ae7e7438d544bb71bf89 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 May 2017 15:20:37 -0700 Subject: module: Fix pr_fmt() bug for header use of printk This commit removes the pr_fmt() macro, replacing it with mod_err() and mod_debug() macros to avoid errors when using printk() from header files. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney --- arch/blackfin/kernel/module.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c index 0188c933b155..15af5768c403 100644 --- a/arch/blackfin/kernel/module.c +++ b/arch/blackfin/kernel/module.c @@ -4,8 +4,6 @@ * Licensed under the GPL-2 or later */ -#define pr_fmt(fmt) "module %s: " fmt, mod->name - #include #include #include @@ -16,6 +14,11 @@ #include #include +#define mod_err(mod, fmt, ...) \ + pr_err("module %s: " fmt, (mod)->name, ##__VA_ARGS__) +#define mod_debug(mod, fmt, ...) \ + pr_debug("module %s: " fmt, (mod)->name, ##__VA_ARGS__) + /* Transfer the section to the L1 memory */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -44,7 +47,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l1_inst_sram_alloc(s->sh_size); mod->arch.text_l1 = dest; if (dest == NULL) { - pr_err("L1 inst memory allocation failed\n"); + mod_err(mod, "L1 inst memory allocation failed\n"); return -1; } dma_memcpy(dest, (void *)s->sh_addr, s->sh_size); @@ -56,7 +59,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l1_data_sram_alloc(s->sh_size); mod->arch.data_a_l1 = dest; if (dest == NULL) { - pr_err("L1 data memory allocation failed\n"); + mod_err(mod, "L1 data memory allocation failed\n"); return -1; } memcpy(dest, (void *)s->sh_addr, s->sh_size); @@ -68,7 +71,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l1_data_sram_zalloc(s->sh_size); mod->arch.bss_a_l1 = dest; if (dest == NULL) { - pr_err("L1 data memory allocation failed\n"); + mod_err(mod, "L1 data memory allocation failed\n"); return -1; } @@ -77,7 +80,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l1_data_B_sram_alloc(s->sh_size); mod->arch.data_b_l1 = dest; if (dest == NULL) { - pr_err("L1 data memory allocation failed\n"); + mod_err(mod, "L1 data memory allocation failed\n"); return -1; } memcpy(dest, (void *)s->sh_addr, s->sh_size); @@ -87,7 +90,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l1_data_B_sram_alloc(s->sh_size); mod->arch.bss_b_l1 = dest; if (dest == NULL) { - pr_err("L1 data memory allocation failed\n"); + mod_err(mod, "L1 data memory allocation failed\n"); return -1; } memset(dest, 0, s->sh_size); @@ -99,7 +102,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l2_sram_alloc(s->sh_size); mod->arch.text_l2 = dest; if (dest == NULL) { - pr_err("L2 SRAM allocation failed\n"); + mod_err(mod, "L2 SRAM allocation failed\n"); return -1; } memcpy(dest, (void *)s->sh_addr, s->sh_size); @@ -111,7 +114,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l2_sram_alloc(s->sh_size); mod->arch.data_l2 = dest; if (dest == NULL) { - pr_err("L2 SRAM allocation failed\n"); + mod_err(mod, "L2 SRAM allocation failed\n"); return -1; } memcpy(dest, (void *)s->sh_addr, s->sh_size); @@ -123,7 +126,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, dest = l2_sram_zalloc(s->sh_size); mod->arch.bss_l2 = dest; if (dest == NULL) { - pr_err("L2 SRAM allocation failed\n"); + mod_err(mod, "L2 SRAM allocation failed\n"); return -1; } @@ -157,8 +160,8 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf32_Sym *sym; unsigned long location, value, size; - pr_debug("applying relocate section %u to %u\n", - relsec, sechdrs[relsec].sh_info); + mod_debug(mod, "applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ @@ -174,14 +177,14 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, #ifdef CONFIG_SMP if (location >= COREB_L1_DATA_A_START) { - pr_err("cannot relocate in L1: %u (SMP kernel)\n", + mod_err(mod, "cannot relocate in L1: %u (SMP kernel)\n", ELF32_R_TYPE(rel[i].r_info)); return -ENOEXEC; } #endif - pr_debug("location is %lx, value is %lx type is %d\n", - location, value, ELF32_R_TYPE(rel[i].r_info)); + mod_debug(mod, "location is %lx, value is %lx type is %d\n", + location, value, ELF32_R_TYPE(rel[i].r_info)); switch (ELF32_R_TYPE(rel[i].r_info)) { @@ -200,12 +203,12 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, case R_BFIN_PCREL12_JUMP: case R_BFIN_PCREL12_JUMP_S: case R_BFIN_PCREL10: - pr_err("unsupported relocation: %u (no -mlong-calls?)\n", + mod_err(mod, "unsupported relocation: %u (no -mlong-calls?)\n", ELF32_R_TYPE(rel[i].r_info)); return -ENOEXEC; default: - pr_err("unknown relocation: %u\n", + mod_err(mod, "unknown relocation: %u\n", ELF32_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -222,7 +225,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, isram_memcpy((void *)location, &value, size); break; default: - pr_err("invalid relocation for %#lx\n", location); + mod_err(mod, "invalid relocation for %#lx\n", location); return -ENOEXEC; } } -- cgit v1.2.3 From 22e4ebb975822833b083533035233d128b30e98f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 28 Jul 2017 16:40:40 -0400 Subject: membarrier: Provide expedited private command Implement MEMBARRIER_CMD_PRIVATE_EXPEDITED with IPIs using cpumask built from all runqueues for which current thread's mm is the same as the thread calling sys_membarrier. It executes faster than the non-expedited variant (no blocking). It also works on NOHZ_FULL configurations. Scheduler-wise, it requires a memory barrier before and after context switching between processes (which have different mm). The memory barrier before context switch is already present. For the barrier after context switch: * Our TSO archs can do RELEASE without being a full barrier. Look at x86 spin_unlock() being a regular STORE for example. But for those archs, all atomics imply smp_mb and all of them have atomic ops in switch_mm() for mm_cpumask(), and on x86 the CR3 load acts as a full barrier. * From all weakly ordered machines, only ARM64 and PPC can do RELEASE, the rest does indeed do smp_mb(), so there the spin_unlock() is a full barrier and we're good. * ARM64 has a very heavy barrier in switch_to(), which suffices. * PPC just removed its barrier from switch_to(), but appears to be talking about adding something to switch_mm(). So add a smp_mb__after_unlock_lock() for now, until this is settled on the PPC side. Changes since v3: - Properly document the memory barriers provided by each architecture. Changes since v2: - Address comments from Peter Zijlstra, - Add smp_mb__after_unlock_lock() after finish_lock_switch() in finish_task_switch() to add the memory barrier we need after storing to rq->curr. This is much simpler than the previous approach relying on atomic_dec_and_test() in mmdrop(), which actually added a memory barrier in the common case of switching between userspace processes. - Return -EINVAL when MEMBARRIER_CMD_SHARED is used on a nohz_full kernel, rather than having the whole membarrier system call returning -ENOSYS. Indeed, CMD_PRIVATE_EXPEDITED is compatible with nohz_full. Adapt the CMD_QUERY mask accordingly. Changes since v1: - move membarrier code under kernel/sched/ because it uses the scheduler runqueue, - only add the barrier when we switch from a kernel thread. The case where we switch from a user-space thread is already handled by the atomic_dec_and_test() in mmdrop(). - add a comment to mmdrop() documenting the requirement on the implicit memory barrier. CC: Peter Zijlstra CC: Paul E. McKenney CC: Boqun Feng CC: Andrew Hunter CC: Maged Michael CC: gromer@google.com CC: Avi Kivity CC: Benjamin Herrenschmidt CC: Paul Mackerras CC: Michael Ellerman Signed-off-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Tested-by: Dave Watson --- MAINTAINERS | 2 +- arch/arm64/kernel/process.c | 2 + include/uapi/linux/membarrier.h | 23 +++++- kernel/Makefile | 1 - kernel/membarrier.c | 70 ------------------ kernel/sched/Makefile | 1 + kernel/sched/core.c | 25 +++++++ kernel/sched/membarrier.c | 152 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 202 insertions(+), 74 deletions(-) delete mode 100644 kernel/membarrier.c create mode 100644 kernel/sched/membarrier.c (limited to 'arch') diff --git a/MAINTAINERS b/MAINTAINERS index f66488dfdbc9..3b035584272f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8621,7 +8621,7 @@ M: Mathieu Desnoyers M: "Paul E. McKenney" L: linux-kernel@vger.kernel.org S: Supported -F: kernel/membarrier.c +F: kernel/sched/membarrier.c F: include/uapi/linux/membarrier.h MEMORY MANAGEMENT diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 659ae8094ed5..c8f7d98d8cb9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -360,6 +360,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, /* * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. + * This full barrier is also required by the membarrier system + * call. */ dsb(ish); diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index e0b108bd2624..6d47b3249d8a 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -40,14 +40,33 @@ * (non-running threads are de facto in such a * state). This covers threads from all processes * running on the system. This command returns 0. + * @MEMBARRIER_CMD_PRIVATE_EXPEDITED: + * Execute a memory barrier on each running + * thread belonging to the same process as the current + * thread. Upon return from system call, the + * caller thread is ensured that all its running + * threads siblings have passed through a state + * where all memory accesses to user-space + * addresses match program order between entry + * to and return from the system call + * (non-running threads are de facto in such a + * state). This only covers threads from the + * same processes as the caller thread. This + * command returns 0. The "expedited" commands + * complete faster than the non-expedited ones, + * they never block, but have the downside of + * causing extra overhead. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), + /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ + /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 4cb8e8b23c6e..9c323a6daa46 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -108,7 +108,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o obj-$(CONFIG_TORTURE_TEST) += torture.o -obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_HAS_IOMEM) += memremap.o diff --git a/kernel/membarrier.c b/kernel/membarrier.c deleted file mode 100644 index 9f9284f37f8d..000000000000 --- a/kernel/membarrier.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2010, 2015 Mathieu Desnoyers - * - * membarrier system call - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include - -/* - * Bitmask made from a "or" of all commands within enum membarrier_cmd, - * except MEMBARRIER_CMD_QUERY. - */ -#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED) - -/** - * sys_membarrier - issue memory barriers on a set of threads - * @cmd: Takes command values defined in enum membarrier_cmd. - * @flags: Currently needs to be 0. For future extensions. - * - * If this system call is not implemented, -ENOSYS is returned. If the - * command specified does not exist, or if the command argument is invalid, - * this system call returns -EINVAL. For a given command, with flags argument - * set to 0, this system call is guaranteed to always return the same value - * until reboot. - * - * All memory accesses performed in program order from each targeted thread - * is guaranteed to be ordered with respect to sys_membarrier(). If we use - * the semantic "barrier()" to represent a compiler barrier forcing memory - * accesses to be performed in program order across the barrier, and - * smp_mb() to represent explicit memory barriers forcing full memory - * ordering across the barrier, we have the following ordering table for - * each pair of barrier(), sys_membarrier() and smp_mb(): - * - * The pair ordering is detailed as (O: ordered, X: not ordered): - * - * barrier() smp_mb() sys_membarrier() - * barrier() X X O - * smp_mb() X O O - * sys_membarrier() O O O - */ -SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) -{ - /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ - if (tick_nohz_full_enabled()) - return -ENOSYS; - if (unlikely(flags)) - return -EINVAL; - switch (cmd) { - case MEMBARRIER_CMD_QUERY: - return MEMBARRIER_CMD_BITMASK; - case MEMBARRIER_CMD_SHARED: - if (num_online_cpus() > 1) - synchronize_sched(); - return 0; - default: - return -EINVAL; - } -} diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 53f0164ed362..78f54932ea1d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o +obj-$(CONFIG_MEMBARRIER) += membarrier.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bfee6ea7db49..f77269c6c2f8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2640,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) prev_state = prev->state; vtime_task_switch(prev); perf_event_task_sched_in(prev, current); + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + * + * TODO: This smp_mb__after_unlock_lock can go away if PPC end + * up adding a full barrier to switch_mm(), or we should figure + * out if a smp_mb__after_unlock_lock is really the proper API + * to use. + */ + smp_mb__after_unlock_lock(); finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); @@ -3329,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt) if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; + /* + * The membarrier system call requires each architecture + * to have a full memory barrier after updating + * rq->curr, before returning to user-space. For TSO + * (e.g. x86), the architecture must provide its own + * barrier in switch_mm(). For weakly ordered machines + * for which spin_unlock() acts as a full memory + * barrier, finish_lock_switch() in common code takes + * care of this barrier. For weakly ordered machines for + * which spin_unlock() acts as a RELEASE barrier (only + * arm64 and PowerPC), arm64 has a full barrier in + * switch_to(), and PowerPC has + * smp_mb__after_unlock_lock() before + * finish_lock_switch(). + */ ++*switch_count; trace_sched_switch(preempt, prev, next); diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c new file mode 100644 index 000000000000..a92fddc22747 --- /dev/null +++ b/kernel/sched/membarrier.c @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2010-2017 Mathieu Desnoyers + * + * membarrier system call + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include "sched.h" /* for cpu_rq(). */ + +/* + * Bitmask made from a "or" of all commands within enum membarrier_cmd, + * except MEMBARRIER_CMD_QUERY. + */ +#define MEMBARRIER_CMD_BITMASK \ + (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) + +static void ipi_mb(void *info) +{ + smp_mb(); /* IPIs should be serializing but paranoid. */ +} + +static void membarrier_private_expedited(void) +{ + int cpu; + bool fallback = false; + cpumask_var_t tmpmask; + + if (num_online_cpus() == 1) + return; + + /* + * Matches memory barriers around rq->curr modification in + * scheduler. + */ + smp_mb(); /* system call entry is not a mb. */ + + /* + * Expedited membarrier commands guarantee that they won't + * block, hence the GFP_NOWAIT allocation flag and fallback + * implementation. + */ + if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { + /* Fallback for OOM. */ + fallback = true; + } + + cpus_read_lock(); + for_each_online_cpu(cpu) { + struct task_struct *p; + + /* + * Skipping the current CPU is OK even through we can be + * migrated at any point. The current CPU, at the point + * where we read raw_smp_processor_id(), is ensured to + * be in program order with respect to the caller + * thread. Therefore, we can skip this CPU from the + * iteration. + */ + if (cpu == raw_smp_processor_id()) + continue; + rcu_read_lock(); + p = task_rcu_dereference(&cpu_rq(cpu)->curr); + if (p && p->mm == current->mm) { + if (!fallback) + __cpumask_set_cpu(cpu, tmpmask); + else + smp_call_function_single(cpu, ipi_mb, NULL, 1); + } + rcu_read_unlock(); + } + if (!fallback) { + smp_call_function_many(tmpmask, ipi_mb, NULL, 1); + free_cpumask_var(tmpmask); + } + cpus_read_unlock(); + + /* + * Memory barrier on the caller thread _after_ we finished + * waiting for the last IPI. Matches memory barriers around + * rq->curr modification in scheduler. + */ + smp_mb(); /* exit from system call is not a mb */ +} + +/** + * sys_membarrier - issue memory barriers on a set of threads + * @cmd: Takes command values defined in enum membarrier_cmd. + * @flags: Currently needs to be 0. For future extensions. + * + * If this system call is not implemented, -ENOSYS is returned. If the + * command specified does not exist, not available on the running + * kernel, or if the command argument is invalid, this system call + * returns -EINVAL. For a given command, with flags argument set to 0, + * this system call is guaranteed to always return the same value until + * reboot. + * + * All memory accesses performed in program order from each targeted thread + * is guaranteed to be ordered with respect to sys_membarrier(). If we use + * the semantic "barrier()" to represent a compiler barrier forcing memory + * accesses to be performed in program order across the barrier, and + * smp_mb() to represent explicit memory barriers forcing full memory + * ordering across the barrier, we have the following ordering table for + * each pair of barrier(), sys_membarrier() and smp_mb(): + * + * The pair ordering is detailed as (O: ordered, X: not ordered): + * + * barrier() smp_mb() sys_membarrier() + * barrier() X X O + * smp_mb() X O O + * sys_membarrier() O O O + */ +SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) +{ + if (unlikely(flags)) + return -EINVAL; + switch (cmd) { + case MEMBARRIER_CMD_QUERY: + { + int cmd_mask = MEMBARRIER_CMD_BITMASK; + + if (tick_nohz_full_enabled()) + cmd_mask &= ~MEMBARRIER_CMD_SHARED; + return cmd_mask; + } + case MEMBARRIER_CMD_SHARED: + /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ + if (tick_nohz_full_enabled()) + return -EINVAL; + if (num_online_cpus() > 1) + synchronize_sched(); + return 0; + case MEMBARRIER_CMD_PRIVATE_EXPEDITED: + membarrier_private_expedited(); + return 0; + default: + return -EINVAL; + } +} -- cgit v1.2.3 From 952111d7db02573e7165e338de8d4871fa447b21 Mon Sep 17 00:00:00 2001 From: Paul E. McKenney Date: Thu, 29 Jun 2017 15:53:02 -0700 Subject: arch: Remove spin_unlock_wait() arch-specific definitions There is no agreed-upon definition of spin_unlock_wait()'s semantics, and it appears that all callers could do just as well with a lock/unlock pair. This commit therefore removes the underlying arch-specific arch_spin_unlock_wait() for all architectures providing them. Signed-off-by: Paul E. McKenney Cc: Cc: Peter Zijlstra Cc: Alan Stern Cc: Andrea Parri Cc: Linus Torvalds Acked-by: Will Deacon Acked-by: Boqun Feng --- arch/alpha/include/asm/spinlock.h | 5 ---- arch/arc/include/asm/spinlock.h | 5 ---- arch/arm/include/asm/spinlock.h | 16 ---------- arch/arm64/include/asm/spinlock.h | 58 ++++-------------------------------- arch/blackfin/include/asm/spinlock.h | 5 ---- arch/hexagon/include/asm/spinlock.h | 5 ---- arch/ia64/include/asm/spinlock.h | 21 ------------- arch/m32r/include/asm/spinlock.h | 5 ---- arch/metag/include/asm/spinlock.h | 5 ---- arch/mn10300/include/asm/spinlock.h | 5 ---- arch/parisc/include/asm/spinlock.h | 7 ----- arch/powerpc/include/asm/spinlock.h | 33 -------------------- arch/s390/include/asm/spinlock.h | 7 ----- arch/sh/include/asm/spinlock-cas.h | 5 ---- arch/sh/include/asm/spinlock-llsc.h | 5 ---- arch/sparc/include/asm/spinlock_32.h | 5 ---- arch/tile/include/asm/spinlock_32.h | 2 -- arch/tile/include/asm/spinlock_64.h | 2 -- arch/tile/lib/spinlock_32.c | 23 -------------- arch/tile/lib/spinlock_64.c | 22 -------------- arch/xtensa/include/asm/spinlock.h | 5 ---- 21 files changed, 5 insertions(+), 241 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index a40b9fc0c6c3..718ac0b64adf 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -16,11 +16,6 @@ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.lock == 0; diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 233d5ffe6ec7..a325e6a36523 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -16,11 +16,6 @@ #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->slock, !VAL); -} - #ifdef CONFIG_ARC_HAS_LLSC static inline void arch_spin_lock(arch_spinlock_t *lock) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4bec45442072..c030143c18c6 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -52,22 +52,6 @@ static inline void dsb_sev(void) * memory. */ -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - u16 owner = READ_ONCE(lock->tickets.owner); - - for (;;) { - arch_spinlock_t tmp = READ_ONCE(*lock); - - if (tmp.tickets.owner == tmp.tickets.next || - tmp.tickets.owner != owner) - break; - - wfe(); - } - smp_acquire__after_ctrl_dep(); -} - #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) static inline void arch_spin_lock(arch_spinlock_t *lock) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index cae331d553f8..f445bd7f2b9f 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -26,58 +26,6 @@ * The memory barriers are implicit with the load-acquire and store-release * instructions. */ -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - unsigned int tmp; - arch_spinlock_t lockval; - u32 owner; - - /* - * Ensure prior spin_lock operations to other locks have completed - * on this CPU before we test whether "lock" is locked. - */ - smp_mb(); - owner = READ_ONCE(lock->owner) << 16; - - asm volatile( -" sevl\n" -"1: wfe\n" -"2: ldaxr %w0, %2\n" - /* Is the lock free? */ -" eor %w1, %w0, %w0, ror #16\n" -" cbz %w1, 3f\n" - /* Lock taken -- has there been a subsequent unlock->lock transition? */ -" eor %w1, %w3, %w0, lsl #16\n" -" cbz %w1, 1b\n" - /* - * The owner has been updated, so there was an unlock->lock - * transition that we missed. That means we can rely on the - * store-release of the unlock operation paired with the - * load-acquire of the lock operation to publish any of our - * previous stores to the new lock owner and therefore don't - * need to bother with the writeback below. - */ -" b 4f\n" -"3:\n" - /* - * Serialise against any concurrent lockers by writing back the - * unlocked lock value - */ - ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ -" stxr %w1, %w0, %2\n" - __nops(2), - /* LSE atomics */ -" mov %w1, %w0\n" -" cas %w0, %w0, %2\n" -" eor %w1, %w1, %w0\n") - /* Somebody else wrote to the lock, GOTO 10 and reload the value */ -" cbnz %w1, 2b\n" -"4:" - : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : "r" (owner) - : "memory"); -} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) @@ -176,7 +124,11 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - smp_mb(); /* See arch_spin_unlock_wait */ + /* + * Ensure prior spin_lock operations to other locks have completed + * on this CPU before we test whether "lock" is locked. + */ + smp_mb(); /* ^^^ */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index c58f4a83ed6f..f6431439d15d 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -48,11 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __raw_spin_unlock_asm(&lock->lock); } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - static inline int arch_read_can_lock(arch_rwlock_t *rw) { return __raw_uncached_fetch_asm(&rw->lock) > 0; diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index a1c55788c5d6..53a8d5885887 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -179,11 +179,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) */ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - #define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index ca9e76149a4a..df2c121164b8 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -76,22 +76,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) ACCESS_ONCE(*p) = (tmp + 2) & ~1; } -static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) -{ - int *p = (int *)&lock->lock, ticket; - - ia64_invala(); - - for (;;) { - asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory"); - if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) - return; - cpu_relax(); - } - - smp_acquire__after_ctrl_dep(); -} - static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); @@ -143,11 +127,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, arch_spin_lock(lock); } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - __ticket_spin_unlock_wait(lock); -} - #define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0) #define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0) diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 323c7fc953cd..a56825592b90 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -30,11 +30,6 @@ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->slock, VAL > 0); -} - /** * arch_spin_trylock - Try spin lock and return a result * @lock: Pointer to the lock variable diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index c0c7a22be1ae..ddf7fe5708a6 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -15,11 +15,6 @@ * locked. */ -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index 9c7b8f7942d8..fe413b41df6c 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -26,11 +26,6 @@ #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->slock, !VAL); -} - static inline void arch_spin_unlock(arch_spinlock_t *lock) { asm volatile( diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index e32936cd7f10..55bfe4affca3 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -14,13 +14,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x) #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *x) -{ - volatile unsigned int *a = __ldcw_align(x); - - smp_cond_load_acquire(a, VAL); -} - static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) { diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 8c1b913de6d7..d256e448ea49 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -170,39 +170,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) lock->slock = 0; } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - arch_spinlock_t lock_val; - - smp_mb(); - - /* - * Atomically load and store back the lock value (unchanged). This - * ensures that our observation of the lock value is ordered with - * respect to other lock operations. - */ - __asm__ __volatile__( -"1: " PPC_LWARX(%0, 0, %2, 0) "\n" -" stwcx. %0, 0, %2\n" -" bne- 1b\n" - : "=&r" (lock_val), "+m" (*lock) - : "r" (lock) - : "cr0", "xer"); - - if (arch_spin_value_unlocked(lock_val)) - goto out; - - while (lock->slock) { - HMT_low(); - if (SHARED_PROCESSOR) - __spin_yield(lock); - } - HMT_medium(); - -out: - smp_mb(); -} - /* * Read-write spinlocks, allowing multiple readers * but only one writer. diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f7838ecd83c6..217ee5210c32 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -98,13 +98,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) : "cc", "memory"); } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - while (arch_spin_is_locked(lock)) - arch_spin_relax(lock); - smp_acquire__after_ctrl_dep(); -} - /* * Read-write spinlocks, allowing multiple readers * but only one writer. diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h index c46e8cc7b515..5ed7dbbd94ff 100644 --- a/arch/sh/include/asm/spinlock-cas.h +++ b/arch/sh/include/asm/spinlock-cas.h @@ -29,11 +29,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, VAL > 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lock) { while (!__sl_cas(&lock->lock, 1, 0)); diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h index cec78143fa83..f77263aae760 100644 --- a/arch/sh/include/asm/spinlock-llsc.h +++ b/arch/sh/include/asm/spinlock-llsc.h @@ -21,11 +21,6 @@ #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, VAL > 0); -} - /* * Simple spin lock operations. There are two variants, one clears IRQ's * on the local processor, one does not. diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 8011e79f59c9..67345b2dc408 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -14,11 +14,6 @@ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - static inline void arch_spin_lock(arch_spinlock_t *lock) { __asm__ __volatile__( diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index b14b1ba5bf9c..cba8ba9b8da6 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h @@ -64,8 +64,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) lock->current_ticket = old_ticket + TICKET_QUANTUM; } -void arch_spin_unlock_wait(arch_spinlock_t *lock); - /* * Read-write spinlocks, allowing multiple readers * but only one writer. diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index b9718fb4e74a..9a2c2d605752 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -58,8 +58,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT); } -void arch_spin_unlock_wait(arch_spinlock_t *lock); - void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val); /* Grab the "next" ticket number and bump it atomically. diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 076c6cc43113..db9333f2447c 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -62,29 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock) } EXPORT_SYMBOL(arch_spin_trylock); -void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - u32 iterations = 0; - int curr = READ_ONCE(lock->current_ticket); - int next = READ_ONCE(lock->next_ticket); - - /* Return immediately if unlocked. */ - if (next == curr) - return; - - /* Wait until the current locker has released the lock. */ - do { - delay_backoff(iterations++); - } while (READ_ONCE(lock->current_ticket) == curr); - - /* - * The TILE architecture doesn't do read speculation; therefore - * a control dependency guarantees a LOAD->{LOAD,STORE} order. - */ - barrier(); -} -EXPORT_SYMBOL(arch_spin_unlock_wait); - /* * The low byte is always reserved to be the marker for a "tns" operation * since the low bit is set to "1" by a tns. The next seven bits are diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index a4b5b2cbce93..de414c22892f 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -62,28 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock) } EXPORT_SYMBOL(arch_spin_trylock); -void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - u32 iterations = 0; - u32 val = READ_ONCE(lock->lock); - u32 curr = arch_spin_current(val); - - /* Return immediately if unlocked. */ - if (arch_spin_next(val) == curr) - return; - - /* Wait until the current locker has released the lock. */ - do { - delay_backoff(iterations++); - } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); - - /* - * The TILE architecture doesn't do read speculation; therefore - * a control dependency guarantees a LOAD->{LOAD,STORE} order. - */ - barrier(); -} -EXPORT_SYMBOL(arch_spin_unlock_wait); /* * If the read lock fails due to a writer, we retry periodically diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index a36221cf6363..3bb49681ee24 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -33,11 +33,6 @@ #define arch_spin_is_locked(x) ((x)->slock != 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->slock, !VAL); -} - #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) static inline void arch_spin_lock(arch_spinlock_t *lock) -- cgit v1.2.3