aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/locking/ww-mutex-design.txt12
-rw-r--r--arch/um/drivers/random.c2
-rw-r--r--arch/x86/include/asm/spinlock.h3
-rw-r--r--arch/x86/kernel/jump_label.c3
-rw-r--r--arch/x86/kernel/kvm.c14
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/xen/spinlock.c19
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_req.c31
-rw-r--r--drivers/block/rbd.c8
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/gpu/drm/drm_gem_cma_helper.c2
-rw-r--r--drivers/gpu/drm/drm_info.c2
-rw-r--r--drivers/gpu/drm/drm_mode_object.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_object.h2
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c2
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c63
-rw-r--r--drivers/gpu/drm/ttm/ttm_execbuf_util.c4
-rw-r--r--drivers/gpu/drm/ttm/ttm_object.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h6
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c4
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-crypt.c4
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c4
-rw-r--r--drivers/misc/genwqe/card_dev.c2
-rw-r--r--drivers/misc/lkdtm.h8
-rw-r--r--drivers/misc/lkdtm_bugs.c87
-rw-r--r--drivers/misc/lkdtm_core.c8
-rw-r--r--drivers/misc/mei/debugfs.c2
-rw-r--r--drivers/pci/hotplug/pnv_php.c2
-rw-r--r--drivers/pci/slot.c2
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_io.c8
-rw-r--r--drivers/scsi/cxgbi/libcxgbi.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c40
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c3
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c4
-rw-r--r--drivers/staging/android/ion/ion.c2
-rw-r--r--drivers/staging/comedi/comedi_buf.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c2
-rw-r--r--drivers/target/target_core_pr.c10
-rw-r--r--drivers/target/tcm_fc/tfc_sess.c2
-rw-r--r--drivers/tty/tty_ldsem.c18
-rw-r--r--drivers/usb/gadget/function/f_fs.c2
-rw-r--r--drivers/usb/mon/mon_main.c2
-rw-r--r--fs/exofs/sys.c2
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c12
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c8
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c2
-rw-r--r--include/asm-generic/rwsem.h13
-rw-r--r--include/drm/drm_framebuffer.h2
-rw-r--r--include/drm/ttm/ttm_bo_api.h15
-rw-r--r--include/drm/ttm/ttm_bo_driver.h4
-rw-r--r--include/linux/jump_label.h4
-rw-r--r--include/linux/kref.h78
-rw-r--r--include/linux/mutex.h5
-rw-r--r--include/linux/percpu-rwsem.h8
-rw-r--r--include/linux/poison.h1
-rw-r--r--include/linux/rcuwait.h63
-rw-r--r--include/linux/refcount.h294
-rw-r--r--include/linux/sched.h37
-rw-r--r--include/linux/spinlock.h8
-rw-r--r--include/linux/spinlock_api_smp.h2
-rw-r--r--include/linux/spinlock_api_up.h1
-rw-r--r--include/linux/sunrpc/cache.h2
-rw-r--r--include/linux/ww_mutex.h32
-rw-r--r--include/net/bluetooth/hci_core.h4
-rw-r--r--init/version.c4
-rw-r--r--kernel/exit.c52
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/locktorture.c73
-rw-r--r--kernel/locking/mutex-debug.h17
-rw-r--r--kernel/locking/mutex.c516
-rw-r--r--kernel/locking/mutex.h4
-rw-r--r--kernel/locking/percpu-rwsem.c7
-rw-r--r--kernel/locking/qspinlock_paravirt.h2
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rwsem-spinlock.c18
-rw-r--r--kernel/locking/rwsem-xadd.c14
-rw-r--r--kernel/locking/semaphore.c7
-rw-r--r--kernel/locking/spinlock.c8
-rw-r--r--kernel/locking/spinlock_debug.c86
-rw-r--r--kernel/locking/test-ww_mutex.c646
-rw-r--r--kernel/pid.c4
-rw-r--r--lib/Kconfig.debug25
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/a2mp.c4
-rw-r--r--net/bluetooth/amp.c4
-rw-r--r--net/bluetooth/l2cap_core.c4
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/osd_client.c10
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/svc_xprt.c6
-rw-r--r--net/sunrpc/svcauth.c15
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c4
-rw-r--r--security/apparmor/include/apparmor.h6
-rw-r--r--security/apparmor/include/policy.h4
-rw-r--r--tools/testing/selftests/locking/ww_mutex.sh10
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK076
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot1
116 files changed, 1864 insertions, 761 deletions
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
index 8a112dc304c3..34c3a1b50b9a 100644
--- a/Documentation/locking/ww-mutex-design.txt
+++ b/Documentation/locking/ww-mutex-design.txt
@@ -309,11 +309,15 @@ Design:
normal mutex locks, which are far more common. As such there is only a small
increase in code size if wait/wound mutexes are not used.
+ We maintain the following invariants for the wait list:
+ (1) Waiters with an acquire context are sorted by stamp order; waiters
+ without an acquire context are interspersed in FIFO order.
+ (2) Among waiters with contexts, only the first one can have other locks
+ acquired already (ctx->acquired > 0). Note that this waiter may come
+ after other waiters without contexts in the list.
+
In general, not much contention is expected. The locks are typically used to
- serialize access to resources for devices. The only way to make wakeups
- smarter would be at the cost of adding a field to struct mutex_waiter. This
- would add overhead to all cases where normal mutexes are used, and
- ww_mutexes are generally less performance sensitive.
+ serialize access to resources for devices.
Lockdep:
Special care has been taken to warn for as many cases of api abuse
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 05523f14d7b2..57f03050c850 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -76,7 +76,7 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
add_sigio_fd(random_fd);
add_wait_queue(&host_read_wait, &wait);
- set_task_state(current, TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);
schedule();
remove_wait_queue(&host_read_wait, &wait);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 921bea7a2708..6d391909e864 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -23,9 +23,6 @@
/* How long a lock should spin before we consider blocking */
#define SPIN_THRESHOLD (1 << 15)
-extern struct static_key paravirt_ticketlocks_enabled;
-static __always_inline bool static_key_false(struct static_key *key);
-
#include <asm/qspinlock.h>
/*
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index fc25f698d792..c37bd0f39c70 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line)
* Something went wrong. Crash the box, as something could be
* corrupting the kernel.
*/
- pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
- ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+ pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
BUG();
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 36bc66416021..099fcba4981d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -620,18 +620,4 @@ void __init kvm_spinlock_init(void)
}
}
-static __init int kvm_spinlock_init_jump(void)
-{
- if (!kvm_para_available())
- return 0;
- if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
- return 0;
-
- static_key_slow_inc(&paravirt_ticketlocks_enabled);
- printk(KERN_INFO "KVM setup paravirtual spinlock\n");
-
- return 0;
-}
-early_initcall(kvm_spinlock_init_jump);
-
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 6d4bf812af45..6259327f3454 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = {
#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
-
-struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index e8a9ea7d7a21..25a7c4302ce7 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -141,25 +141,6 @@ void __init xen_init_spinlocks(void)
pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
}
-/*
- * While the jump_label init code needs to happend _after_ the jump labels are
- * enabled and before SMP is started. Hence we use pre-SMP initcall level
- * init. We cannot do it in xen_init_spinlocks as that is done before
- * jump labels are activated.
- */
-static __init int xen_init_spinlocks_jump(void)
-{
- if (!xen_pvspin)
- return 0;
-
- if (!xen_domain())
- return 0;
-
- static_key_slow_inc(&paravirt_ticketlocks_enabled);
- return 0;
-}
-early_initcall(xen_init_spinlocks_jump);
-
static __init int xen_parse_nopvspin(char *arg)
{
xen_pvspin = false;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index ab62b81c2ca7..dece26f119d4 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1070,7 +1070,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
.done = 0,
.flags = flags,
.error = 0,
- .kref = { ATOMIC_INIT(2) },
+ .kref = KREF_INIT(2),
};
if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 83482721bc01..c3ff60c30dde 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2948,7 +2948,6 @@ void drbd_delete_device(struct drbd_device *device)
struct drbd_resource *resource = device->resource;
struct drbd_connection *connection;
struct drbd_peer_device *peer_device;
- int refs = 3;
/* move to free_peer_device() */
for_each_peer_device(peer_device, device)
@@ -2956,13 +2955,15 @@ void drbd_delete_device(struct drbd_device *device)
drbd_debugfs_device_cleanup(device);
for_each_connection(connection, resource) {
idr_remove(&connection->peer_devices, device->vnr);
- refs++;
+ kref_put(&device->kref, drbd_destroy_device);
}
idr_remove(&resource->devices, device->vnr);
+ kref_put(&device->kref, drbd_destroy_device);
idr_remove(&drbd_devices, device_to_minor(device));
+ kref_put(&device->kref, drbd_destroy_device);
del_gendisk(device->vdisk);
synchronize_rcu();
- kref_sub(&device->kref, refs, drbd_destroy_device);
+ kref_put(&device->kref, drbd_destroy_device);
}
static int __init drbd_init(void)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index de279fe4e4fd..b489ac2e9c44 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -421,7 +421,6 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
struct drbd_peer_device *peer_device = first_peer_device(device);
unsigned s = req->rq_state;
int c_put = 0;
- int k_put = 0;
if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP))
set |= RQ_COMPLETION_SUSP;
@@ -437,6 +436,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
/* intent: get references */
+ kref_get(&req->kref);
+
if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
atomic_inc(&req->completion_ref);
@@ -473,15 +474,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
- /* local completion may still come in later,
- * we need to keep the req object around. */
- kref_get(&req->kref);
++c_put;
}
if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
if (req->rq_state & RQ_LOCAL_ABORTED)
- ++k_put;
+ kref_put(&req->kref, drbd_req_destroy);
else
++c_put;
list_del_init(&req->req_pending_local);
@@ -503,7 +501,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
if (s & RQ_NET_SENT)
atomic_sub(req->i.size >> 9, &device->ap_in_flight);
if (s & RQ_EXP_BARR_ACK)
- ++k_put;
+ kref_put(&req->kref, drbd_req_destroy);
req->net_done_jif = jiffies;
/* in ahead/behind mode, or just in case,
@@ -516,25 +514,16 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
/* potentially complete and destroy */
- if (k_put || c_put) {
- /* Completion does it's own kref_put. If we are going to
- * kref_sub below, we need req to be still around then. */
- int at_least = k_put + !!c_put;
- int refcount = atomic_read(&req->kref.refcount);
- if (refcount < at_least)
- drbd_err(device,
- "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n",
- s, req->rq_state, refcount, at_least);
- }
-
/* If we made progress, retry conflicting peer requests, if any. */
if (req->i.waiting)
wake_up(&device->misc_wait);
- if (c_put)
- k_put += drbd_req_put_completion_ref(req, m, c_put);
- if (k_put)
- kref_sub(&req->kref, k_put, drbd_req_destroy);
+ if (c_put) {
+ if (drbd_req_put_completion_ref(req, m, c_put))
+ kref_put(&req->kref, drbd_req_destroy);
+ } else {
+ kref_put(&req->kref, drbd_req_destroy);
+ }
}
static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 36d2b9f4e836..436baa66f701 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1535,7 +1535,7 @@ static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request)
static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
{
dout("%s: obj %p (was %d)\n", __func__, obj_request,
- atomic_read(&obj_request->kref.refcount));
+ kref_read(&obj_request->kref));
kref_get(&obj_request->kref);
}
@@ -1544,14 +1544,14 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
{
rbd_assert(obj_request != NULL);
dout("%s: obj %p (was %d)\n", __func__, obj_request,
- atomic_read(&obj_request->kref.refcount));
+ kref_read(&obj_request->kref));
kref_put(&obj_request->kref, rbd_obj_request_destroy);
}
static void rbd_img_request_get(struct rbd_img_request *img_request)
{
dout("%s: img %p (was %d)\n", __func__, img_request,
- atomic_read(&img_request->kref.refcount));
+ kref_read(&img_request->kref));
kref_get(&img_request->kref);
}
@@ -1562,7 +1562,7 @@ static void rbd_img_request_put(struct rbd_img_request *img_request)
{
rbd_assert(img_request != NULL);
dout("%s: img %p (was %d)\n", __func__, img_request,
- atomic_read(&img_request->kref.refcount));
+ kref_read(&img_request->kref));
if (img_request_child_test(img_request))
kref_put(&img_request->kref, rbd_parent_request_destroy);
else
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 10332c24f961..264c5eac12b0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -770,7 +770,7 @@ static void virtblk_remove(struct virtio_device *vdev)
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
- refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
+ refc = kref_read(&disk_to_dev(vblk->disk)->kobj.kref);
put_disk(vblk->disk);
vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index 1d6c335584ec..33cd51632721 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -376,7 +376,7 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj,
off = drm_vma_node_start(&obj->vma_node);
seq_printf(m, "%2d (%2d) %08llx %pad %p %zu",
- obj->name, obj->refcount.refcount.counter,
+ obj->name, kref_read(&obj->refcount),
off, &cma_obj->paddr, cma_obj->vaddr, obj->size);
seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index ffb2ab389d1d..6b68e9088436 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -118,7 +118,7 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
seq_printf(m, "%6d %8zd %7d %8d\n",
obj->name, obj->size,
obj->handle_count,
- atomic_read(&obj->refcount.refcount));
+ kref_read(&obj->refcount));
return 0;
}
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index 9f17085b1fdd..c6885a4911c0 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -159,7 +159,7 @@ EXPORT_SYMBOL(drm_mode_object_find);
void drm_mode_object_unreference(struct drm_mode_object *obj)
{
if (obj->free_cb) {
- DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount));
+ DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount));
kref_put(&obj->refcount, obj->free_cb);
}
}
@@ -176,7 +176,7 @@ EXPORT_SYMBOL(drm_mode_object_unreference);
void drm_mode_object_reference(struct drm_mode_object *obj)
{
if (obj->free_cb) {
- DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount));
+ DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount));
kref_get(&obj->refcount);
}
}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 114dddbd297b..aa6e35ddc87f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -486,7 +486,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n",
etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I',
- obj->name, obj->refcount.refcount.counter,
+ obj->name, kref_read(&obj->refcount),
off, etnaviv_obj->vaddr, obj->size);
rcu_read_lock();
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 6a368de9d81e..ecfefb9d42e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -256,7 +256,7 @@ extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
static inline bool
i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
{
- return atomic_read(&obj->base.refcount.refcount) == 0;
+ return kref_read(&obj->base.refcount) == 0;
}
static inline bool
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 8098677a3916..1974ccb781de 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -642,7 +642,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
seq_printf(m, "%08x: %c %2d (%2d) %08llx %p\t",
msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
- obj->name, obj->refcount.refcount.counter,
+ obj->name, kref_read(&obj->refcount),
off, msm_obj->vaddr);
for (id = 0; id < priv->num_aspaces; id++)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index a6126c93f215..88ee60d1b907 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -527,7 +527,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f)
* caller should have a reference on the fence,
* else fence could get freed here
*/
- WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
+ WARN_ON(kref_read(&fence->base.refcount) <= 1);
/*
* This needs uevents to work correctly, but dma_fence_add_callback relies on
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 4a90c690f09e..74a9968df421 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -1033,7 +1033,7 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
off = drm_vma_node_start(&obj->vma_node);
seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d",
- omap_obj->flags, obj->name, obj->refcount.refcount.counter,
+ omap_obj->flags, obj->name, kref_read(&obj->refcount),
off, &omap_obj->paddr, omap_obj->paddr_cnt,
omap_obj->vaddr, omap_obj->roll);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d5063618efa7..ffc6cb55c78c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -140,8 +140,8 @@ static void ttm_bo_release_list(struct kref *list_kref)
struct ttm_bo_device *bdev = bo->bdev;
size_t acc_size = bo->acc_size;
- BUG_ON(atomic_read(&bo->list_kref.refcount));
- BUG_ON(atomic_read(&bo->kref.refcount));
+ BUG_ON(kref_read(&bo->list_kref));
+ BUG_ON(kref_read(&bo->kref));
BUG_ON(atomic_read(&bo->cpu_writers));
BUG_ON(bo->mem.mm_node != NULL);
BUG_ON(!list_empty(&bo->lru));
@@ -181,61 +181,46 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
}
EXPORT_SYMBOL(ttm_bo_add_to_lru);
-int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+static void ttm_bo_ref_bug(struct kref *list_kref)
+{
+ BUG();
+}
+
+void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
- int put_count = 0;
if (bdev->driver->lru_removal)
bdev->driver->lru_removal(bo);
if (!list_empty(&bo->swap)) {
list_del_init(&bo->swap);
- ++put_count;
+ kref_put(&bo->list_kref, ttm_bo_ref_bug);
}
if (!list_empty(&bo->lru)) {
list_del_init(&bo->lru);
- ++put_count;
+ kref_put(&bo->list_kref, ttm_bo_ref_bug);
}
-
- return put_count;
-}
-
-static void ttm_bo_ref_bug(struct kref *list_kref)
-{
- BUG();
-}
-
-void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
- bool never_free)
-{
- kref_sub(&bo->list_kref, count,
- (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list);
}
void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo)
{
- int put_count;
-
spin_lock(&bo->glob->lru_lock);
- put_count = ttm_bo_del_from_lru(bo);
+ ttm_bo_del_from_lru(bo);
spin_unlock(&bo->glob->lru_lock);
- ttm_bo_list_ref_sub(bo, put_count, true);
}
EXPORT_SYMBOL(ttm_bo_del_sub_from_lru);
void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
- int put_count = 0;
lockdep_assert_held(&bo->resv->lock.base);
if (bdev->driver->lru_removal)
bdev->driver->lru_removal(bo);
- put_count = ttm_bo_del_from_lru(bo);
- ttm_bo_list_ref_sub(bo, put_count, true);
+ ttm_bo_del_from_lru(bo);
ttm_bo_add_to_lru(bo);
}
EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -447,7 +432,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_global *glob = bo->glob;
- int put_count;
int ret;
spin_lock(&glob->lru_lock);
@@ -455,13 +439,10 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
if (!ret) {
if (!ttm_bo_wait(bo, false, true)) {
- put_count = ttm_bo_del_from_lru(bo);
-
+ ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
- ttm_bo_list_ref_sub(bo, put_count, true);
-
return;
} else
ttm_bo_flush_all_fences(bo);
@@ -504,7 +485,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
bool no_wait_gpu)
{
struct ttm_bo_global *glob = bo->glob;
- int put_count;
int ret;
ret = ttm_bo_wait(bo, false, true);
@@ -554,15 +534,13 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
return ret;
}
- put_count = ttm_bo_del_from_lru(bo);
+ ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- ++put_count;
+ kref_put(&bo->list_kref, ttm_bo_ref_bug);
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
- ttm_bo_list_ref_sub(bo, put_count, true);
-
return 0;
}
@@ -740,7 +718,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
struct ttm_bo_global *glob = bdev->glob;
struct ttm_mem_type_manager *man = &bdev->man[mem_type];
struct ttm_buffer_object *bo;
- int ret = -EBUSY, put_count;
+ int ret = -EBUSY;
spin_lock(&glob->lru_lock);
list_for_each_entry(bo, &man->lru, lru) {
@@ -771,13 +749,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
return ret;
}
- put_count = ttm_bo_del_from_lru(bo);
+ ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
BUG_ON(ret != 0);
- ttm_bo_list_ref_sub(bo, put_count, true);
-
ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
ttm_bo_unreserve(bo);
@@ -1669,7 +1645,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
container_of(shrink, struct ttm_bo_global, shrink);
struct ttm_buffer_object *bo;
int ret = -EBUSY;
- int put_count;
uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
spin_lock(&glob->lru_lock);
@@ -1692,11 +1667,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
return ret;
}
- put_count = ttm_bo_del_from_lru(bo);
+ ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
- ttm_bo_list_ref_sub(bo, put_count, true);
-
/**
* Move to system cached
*/
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index d35bc491e8de..5e1bcabffef5 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -48,9 +48,7 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list)
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- unsigned put_count = ttm_bo_del_from_lru(bo);
-
- ttm_bo_list_ref_sub(bo, put_count, true);
+ ttm_bo_del_from_lru(bo);
}
}
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 4f5fa8d65fe9..fdb451e3ec01 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -304,7 +304,7 @@ bool ttm_ref_object_exists(struct ttm_object_file *tfile,
* Verify that the ref->obj pointer was actually valid!
*/
rmb();
- if (unlikely(atomic_read(&ref->kref.refcount) == 0))
+ if (unlikely(kref_read(&ref->kref) == 0))
goto out_false;
rcu_read_unlock();
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index b9efadfffb4f..e66e75921797 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -55,14 +55,14 @@
#define put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
kref_put(&((ep)->kref), __free_ep); \
}
#define get_ep(ep) { \
PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
+ ep, kref_read(&((ep)->kref))); \
kref_get(&((ep)->kref)); \
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d939980a708f..a9194db7f9b8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -961,7 +961,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
case IWCH_QP_STATE_RTS:
switch (attrs->next_state) {
case IWCH_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
qhp->attr.state = IWCH_QP_STATE_CLOSING;
if (!internal) {
abort=0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 8cd4d054a87e..d19662f635b1 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -672,14 +672,14 @@ enum c4iw_mmid_state {
#define c4iw_put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
kref_put(&((ep)->kref), _c4iw_free_ep); \
}
#define c4iw_get_ep(ep) { \
PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
+ ep, kref_read(&((ep)->kref))); \
kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 04c1c382dedb..d4fd2f5c8326 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1580,7 +1580,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
case C4IW_QP_STATE_RTS:
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 80ef3f8998c8..04443242e258 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -80,7 +80,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
left = PAGE_SIZE;
mutex_lock(&us_ibdev->usdev_lock);
- if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ if (kref_read(&us_ibdev->vf_cnt) > 0) {
char *busname;
/*
@@ -99,7 +99,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
PCI_FUNC(us_ibdev->pdev->devfn),
netdev_name(us_ibdev->netdev),
us_ibdev->ufdev->mac,
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
UPDATE_PTR_LEFT(n, ptr, left);
for (res_type = USNIC_VNIC_RES_TYPE_EOL;
@@ -147,7 +147,7 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%u\n",
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
}
static ssize_t
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 74819a7951e2..69df8e353123 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -291,11 +291,11 @@ int usnic_ib_query_device(struct ib_device *ibdev,
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
props->max_qp = qp_per_vf *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
props->max_mr = USNIC_UIOM_MAX_MR_CNT;
props->local_ca_ack_delay = 0;
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 84d2f0e4c754..d36d427a9efb 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -794,7 +794,7 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&c->free_buffer_wait, &wait);
- set_task_state(current, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
dm_bufio_unlock(c);
io_schedule();
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8a9f742d8ed7..1cb2ca9dfae3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1210,14 +1210,14 @@ continue_locked:
spin_unlock_irq(&cc->write_thread_wait.lock);
if (unlikely(kthread_should_stop())) {
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
remove_wait_queue(&cc->write_thread_wait, &wait);
break;
}
schedule();
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index a6dde7cab458..758d90cc2733 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -120,7 +120,7 @@ static int __check_holder(struct block_lock *lock)
static void __wait(struct waiter *w)
{
for (;;) {
- set_task_state(current, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!w->task)
break;
@@ -128,7 +128,7 @@ static void __wait(struct waiter *w)
schedule();
}
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
}
static void __wake_waiter(struct waiter *w)
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 7f1b282d7d96..cb290b8ca0c8 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -1396,7 +1396,7 @@ int genwqe_device_remove(struct genwqe_dev *cd)
* application which will decrease this reference from
* 1/unused to 0/illegal and not from 2/used 1/empty.
*/
- rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
+ rc = kref_read(&cd->cdev_genwqe.kobj.kref);
if (rc != 1) {
dev_err(&pci_dev->dev,
"[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h
index cfa1039c62e7..67d27be60405 100644
--- a/drivers/misc/lkdtm.h
+++ b/drivers/misc/lkdtm.h
@@ -19,8 +19,12 @@ void lkdtm_SOFTLOCKUP(void);
void lkdtm_HARDLOCKUP(void);
void lkdtm_SPINLOCKUP(void);
void lkdtm_HUNG_TASK(void);
-void lkdtm_ATOMIC_UNDERFLOW(void);
-void lkdtm_ATOMIC_OVERFLOW(void);
+void lkdtm_REFCOUNT_SATURATE_INC(void);
+void lkdtm_REFCOUNT_SATURATE_ADD(void);
+void lkdtm_REFCOUNT_ZERO_DEC(void);
+void lkdtm_REFCOUNT_ZERO_INC(void);
+void lkdtm_REFCOUNT_ZERO_SUB(void);
+void lkdtm_REFCOUNT_ZERO_ADD(void);
void lkdtm_CORRUPT_LIST_ADD(void);
void lkdtm_CORRUPT_LIST_DEL(void);
diff --git a/drivers/misc/lkdtm_bugs.c b/drivers/misc/lkdtm_bugs.c
index 91edd0b55e5c..cba0837aee2e 100644
--- a/drivers/misc/lkdtm_bugs.c
+++ b/drivers/misc/lkdtm_bugs.c
@@ -6,6 +6,7 @@
*/
#include "lkdtm.h"
#include <linux/list.h>
+#include <linux/refcount.h>
#include <linux/sched.h>
struct lkdtm_list {
@@ -129,28 +130,86 @@ void lkdtm_HUNG_TASK(void)
schedule();
}
-void lkdtm_ATOMIC_UNDERFLOW(void)
+void lkdtm_REFCOUNT_SATURATE_INC(void)
{
- atomic_t under = ATOMIC_INIT(INT_MIN);
+ refcount_t over = REFCOUNT_INIT(UINT_MAX - 1);
- pr_info("attempting good atomic increment\n");
- atomic_inc(&under);
- atomic_dec(&under);
+ pr_info("attempting good refcount decrement\n");
+ refcount_dec(&over);
+ refcount_inc(&over);
- pr_info("attempting bad atomic underflow\n");
- atomic_dec(&under);
+ pr_info("attempting bad refcount inc overflow\n");
+ refcount_inc(&over);
+ refcount_inc(&over);
+ if (refcount_read(&over) == UINT_MAX)
+ pr_err("Correctly stayed saturated, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount wrapped\n");
+}
+
+void lkdtm_REFCOUNT_SATURATE_ADD(void)
+{
+ refcount_t over = REFCOUNT_INIT(UINT_MAX - 1);
+
+ pr_info("attempting good refcount decrement\n");
+ refcount_dec(&over);
+ refcount_inc(&over);
+
+ pr_info("attempting bad refcount add overflow\n");
+ refcount_add(2, &over);
+ if (refcount_read(&over) == UINT_MAX)
+ pr_err("Correctly stayed saturated, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount wrapped\n");
+}
+
+void lkdtm_REFCOUNT_ZERO_DEC(void)
+{
+ refcount_t zero = REFCOUNT_INIT(1);
+
+ pr_info("attempting bad refcount decrement to zero\n");
+ refcount_dec(&zero);
+ if (refcount_read(&zero) == 0)
+ pr_err("Stayed at zero, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount went crazy\n");
}
-void lkdtm_ATOMIC_OVERFLOW(void)
+void lkdtm_REFCOUNT_ZERO_SUB(void)
{
- atomic_t over = ATOMIC_INIT(INT_MAX);
+ refcount_t zero = REFCOUNT_INIT(1);
+
+ pr_info("attempting bad refcount subtract past zero\n");
+ if (!refcount_sub_and_test(2, &zero))
+ pr_info("wrap attempt was noticed\n");
+ if (refcount_read(&zero) == 1)
+ pr_err("Correctly stayed above 0, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount wrapped\n");
+}
- pr_info("attempting good atomic decrement\n");
- atomic_dec(&over);
- atomic_inc(&over);
+void lkdtm_REFCOUNT_ZERO_INC(void)
+{
+ refcount_t zero = REFCOUNT_INIT(0);
- pr_info("attempting bad atomic overflow\n");
- atomic_inc(&over);
+ pr_info("attempting bad refcount increment from zero\n");
+ refcount_inc(&zero);
+ if (refcount_read(&zero) == 0)
+ pr_err("Stayed at zero, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount went past zero\n");
+}
+
+void lkdtm_REFCOUNT_ZERO_ADD(void)
+{
+ refcount_t zero = REFCOUNT_INIT(0);
+
+ pr_info("attempting bad refcount addition from zero\n");
+ refcount_add(2, &zero);
+ if (refcount_read(&zero) == 0)
+ pr_err("Stayed at zero, but no BUG?!\n");
+ else
+ pr_err("Fail: refcount went past zero\n");
}
void lkdtm_CORRUPT_LIST_ADD(void)
diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c
index 7eeb71a75549..16e4cf110930 100644
--- a/drivers/misc/lkdtm_core.c
+++ b/drivers/misc/lkdtm_core.c
@@ -220,8 +220,12 @@ struct crashtype crashtypes[] = {
CRASHTYPE(WRITE_RO),
CRASHTYPE(WRITE_RO_AFTER_INIT),
CRASHTYPE(WRITE_KERN),
- CRASHTYPE(ATOMIC_UNDERFLOW),
- CRASHTYPE(ATOMIC_OVERFLOW),
+ CRASHTYPE(REFCOUNT_SATURATE_INC),
+ CRASHTYPE(REFCOUNT_SATURATE_ADD),
+ CRASHTYPE(REFCOUNT_ZERO_DEC),
+ CRASHTYPE(REFCOUNT_ZERO_INC),
+ CRASHTYPE(REFCOUNT_ZERO_SUB),
+ CRASHTYPE(REFCOUNT_ZERO_ADD),
CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
CRASHTYPE(USERCOPY_HEAP_FLAG_TO),
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index c6217a4993ad..a617aa5a3ad8 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -67,7 +67,7 @@ static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf,
me_cl->props.max_number_of_connections,
me_cl->props.max_msg_length,
me_cl->props.single_recv_buf,
- atomic_read(&me_cl->refcnt.refcount));
+ kref_read(&me_cl->refcnt));
mei_me_cl_put(me_cl);
}
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 56efaf72d08e..d2961ef39a3a 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -155,7 +155,7 @@ static void pnv_php_detach_device_nodes(struct device_node *parent)
pnv_php_detach_device_nodes(dn);
of_node_put(dn);
- refcount = atomic_read(&dn->kobj.kref.refcount);
+ refcount = kref_read(&dn->kobj.kref);
if (refcount != 1)
pr_warn("Invalid refcount %d on <%s>\n",
refcount, of_node_full_name(dn));
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 429d34c348b9..e42909524dee 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -345,7 +345,7 @@ EXPORT_SYMBOL_GPL(pci_create_slot);
void pci_destroy_slot(struct pci_slot *slot)
{
dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
- slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
+ slot->number, kref_read(&slot->kobj.kref) - 1);
mutex_lock(&pci_slot_mutex);
kobject_put(&slot->kobj);
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index f501095f91ac..898461b146cc 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -74,7 +74,7 @@ static void bnx2fc_cmd_timeout(struct work_struct *work)
&io_req->req_flags)) {
/* Handle internally generated ABTS timeout */
BNX2FC_IO_DBG(io_req, "ABTS timed out refcnt = %d\n",
- io_req->refcount.refcount.counter);
+ kref_read(&io_req->refcount));
if (!(test_and_set_bit(BNX2FC_FLAG_ABTS_DONE,
&io_req->req_flags))) {
/*
@@ -1141,7 +1141,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
return SUCCESS;
}
BNX2FC_IO_DBG(io_req, "eh_abort - refcnt = %d\n",
- io_req->refcount.refcount.counter);
+ kref_read(&io_req->refcount));
/* Hold IO request across abort processing */
kref_get(&io_req->refcount);
@@ -1299,7 +1299,7 @@ void bnx2fc_process_cleanup_compl(struct bnx2fc_cmd *io_req,
{
BNX2FC_IO_DBG(io_req, "Entered process_cleanup_compl "
"refcnt = %d, cmd_type = %d\n",
- io_req->refcount.refcount.counter, io_req->cmd_type);
+ kref_read(&io_req->refcount), io_req->cmd_type);
bnx2fc_scsi_done(io_req, DID_ERROR);
kref_put(&io_req->refcount, bnx2fc_cmd_release);
if (io_req->wait_for_comp)
@@ -1318,7 +1318,7 @@ void bnx2fc_process_abts_compl(struct bnx2fc_cmd *io_req,
BNX2FC_IO_DBG(io_req, "Entered process_abts_compl xid = 0x%x"
"refcnt = %d, cmd_type = %d\n",
io_req->xid,
- io_req->refcount.refcount.counter, io_req->cmd_type);
+ kref_read(&io_req->refcount), io_req->cmd_type);
if (test_and_set_bit(BNX2FC_FLAG_ABTS_DONE,
&io_req->req_flags)) {
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 95ba99044c3e..18e0ea83d361 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -301,7 +301,7 @@ static inline void __cxgbi_sock_put(const char *fn, struct cxgbi_sock *csk)
{
log_debug(1 << CXGBI_DBG_SOCK,
"%s, put csk 0x%p, ref %u-1.\n",
- fn, csk, atomic_read(&csk->refcnt.refcount));
+ fn, csk, kref_read(&csk->refcnt));
kref_put(&csk->refcnt, cxgbi_sock_free);
}
#define cxgbi_sock_put(csk) __cxgbi_sock_put(__func__, csk)
@@ -310,7 +310,7 @@ static inline void __cxgbi_sock_get(const char *fn, struct cxgbi_sock *csk)
{
log_debug(1 << CXGBI_DBG_SOCK,
"%s, get csk 0x%p, ref %u+1.\n",
- fn, csk, atomic_read(&csk->refcnt.refcount));
+ fn, csk, kref_read(&csk->refcnt));
kref_get(&csk->refcnt);
}
#define cxgbi_sock_get(csk) __cxgbi_sock_get(__func__, csk)
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index a63542bac153..caa7a7b0ec53 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -607,7 +607,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
len += snprintf(buf+len, size-len, "usgmap:%x ",
ndlp->nlp_usg_map);
len += snprintf(buf+len, size-len, "refcnt:%x",
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
len += snprintf(buf+len, size-len, "\n");
}
spin_unlock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 7b6bd8ed0d0b..63bef4566548 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -3690,7 +3690,7 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
"0006 rpi%x DID:%x flg:%x %d map:%x %p\n",
ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
if (NLP_CHK_NODE_ACT(ndlp)) {
lpfc_nlp_put(ndlp);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index ed223937798a..82047070cdc9 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3440,7 +3440,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
"0002 rpi:%x DID:%x flg:%x %d map:%x %p\n",
ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
@@ -3861,7 +3861,7 @@ out:
lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
"0003 rpi:%x DID:%x flg:%x %d map%x %p\n",
ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
if (vport->port_state < LPFC_VPORT_READY) {
@@ -4238,7 +4238,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
"0277 lpfc_enable_node: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
return NULL;
}
/* The ndlp should not already be in active mode */
@@ -4248,7 +4248,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
"0278 lpfc_enable_node: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
return NULL;
}
@@ -4272,7 +4272,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
"0008 rpi:%x DID:%x flg:%x refcnt:%d "
"map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
}
@@ -4546,7 +4546,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
(bf_get(lpfc_sli_intf_if_type,
&phba->sli4_hba.sli_intf) ==
LPFC_SLI_INTF_IF_TYPE_2) &&
- (atomic_read(&ndlp->kref.refcount) > 0)) {
+ (kref_read(&ndlp->kref) > 0)) {
mbox->context1 = lpfc_nlp_get(ndlp);
mbox->mbox_cmpl =
lpfc_sli4_unreg_rpi_cmpl_clr;
@@ -4695,14 +4695,14 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
"0280 lpfc_cleanup_node: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
lpfc_dequeue_node(vport, ndlp);
} else {
lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
"0281 lpfc_cleanup_node: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
lpfc_disable_node(vport, ndlp);
}
@@ -4791,7 +4791,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
"0005 rpi:%x DID:%x flg:%x %d map:%x %p\n",
ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))
!= NULL) {
@@ -5557,7 +5557,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
"0004 rpi:%x DID:%x flg:%x %d map:%x %p\n",
ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
/*
* Start issuing Fabric-Device Management Interface (FDMI) command to
@@ -5728,7 +5728,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
"0007 rpi:%x DID:%x flg:%x refcnt:%d "
"map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount),
+ kref_read(&ndlp->kref),
ndlp->nlp_usg_map, ndlp);
ndlp->active_rrqs_xri_bitmap =
@@ -5767,7 +5767,7 @@ lpfc_nlp_release(struct kref *kref)
"0279 lpfc_nlp_release: ndlp:x%p did %x "
"usgmap:x%x refcnt:%d rpi:%x\n",
(void *)ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount), ndlp->nlp_rpi);
+ kref_read(&ndlp->kref), ndlp->nlp_rpi);
/* remove ndlp from action. */
lpfc_nlp_remove(ndlp->vport, ndlp);
@@ -5804,7 +5804,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp)
lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
"node get: did:x%x flg:x%x refcnt:x%x",
ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
/* The check of ndlp usage to prevent incrementing the
* ndlp reference count that is in the process of being
* released.
@@ -5817,7 +5817,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp)
"0276 lpfc_nlp_get: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
return NULL;
} else
kref_get(&ndlp->kref);
@@ -5844,7 +5844,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
"node put: did:x%x flg:x%x refcnt:x%x",
ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
phba = ndlp->phba;
spin_lock_irqsave(&phba->ndlp_lock, flags);
/* Check the ndlp memory free acknowledge flag to avoid the
@@ -5857,7 +5857,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
"0274 lpfc_nlp_put: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
return 1;
}
/* Check the ndlp inactivate log flag to avoid the possible
@@ -5870,7 +5870,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
"0275 lpfc_nlp_put: ndlp:x%p "
"usgmap:x%x refcnt:%d\n",
(void *)ndlp, ndlp->nlp_usg_map,
- atomic_read(&ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
return 1;
}
/* For last put, mark the ndlp usage flags to make sure no
@@ -5878,7 +5878,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
* in between the process when the final kref_put has been
* invoked on this ndlp.
*/
- if (atomic_read(&ndlp->kref.refcount) == 1) {
+ if (kref_read(&ndlp->kref) == 1) {
/* Indicate ndlp is put to inactive state. */
NLP_SET_IACT_REQ(ndlp);
/* Acknowledge ndlp memory free has been seen. */
@@ -5906,8 +5906,8 @@ lpfc_nlp_not_used(struct lpfc_nodelist *ndlp)
lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
"node not used: did:x%x flg:x%x refcnt:x%x",
ndlp->nlp_DID, ndlp->nlp_flag,
- atomic_read(&ndlp->kref.refcount));
- if (atomic_read(&ndlp->kref.refcount) == 1)
+ kref_read(&ndlp->kref));
+ if (kref_read(&ndlp->kref) == 1)
if (lpfc_nlp_put(ndlp))
return 1;
return 0;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4776fd85514f..64717c171b15 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2660,8 +2660,7 @@ lpfc_cleanup(struct lpfc_vport *vport)
"usgmap:x%x refcnt:%d\n",
ndlp->nlp_DID, (void *)ndlp,
ndlp->nlp_usg_map,
- atomic_read(
- &ndlp->kref.refcount));
+ kref_read(&ndlp->kref));
}
break;
}
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index d925910be761..3084983c1287 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -371,7 +371,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
*/
pr_debug("write_pending aborted cmd[%p] refcount %d "
"transport_state %x, t_state %x, se_cmd_flags %x\n",
- cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+ cmd, kref_read(&cmd->se_cmd.cmd_kref),
cmd->se_cmd.transport_state,
cmd->se_cmd.t_state,
cmd->se_cmd.se_cmd_flags);
@@ -584,7 +584,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
*/
pr_debug("queue_data_in aborted cmd[%p] refcount %d "
"transport_state %x, t_state %x, se_cmd_flags %x\n",
- cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+ cmd, kref_read(&cmd->se_cmd.cmd_kref),
cmd->se_cmd.transport_state,
cmd->se_cmd.t_state,
cmd->se_cmd.se_cmd_flags);
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index b653451843c8..937c2d5d7ec3 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1300,7 +1300,7 @@ static int ion_debug_heap_show(struct seq_file *s, void *unused)
seq_printf(s, "%16s %16u %16zu %d %d\n",
buffer->task_comm, buffer->pid,
buffer->size, buffer->kmap_cnt,
- atomic_read(&buffer->ref.refcount));
+ kref_read(&buffer->ref));
total_orphaned_size += buffer->size;
}
}
diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c
index c7d7682b1412..1e1df89b5018 100644
--- a/drivers/staging/comedi/comedi_buf.c
+++ b/drivers/staging/comedi/comedi_buf.c
@@ -188,7 +188,7 @@ bool comedi_buf_is_mmapped(struct comedi_subdevice *s)
{
struct comedi_buf_map *bm = s->async->buf_map;
- return bm && (atomic_read(&bm->refcount.refcount) > 1);
+ return bm && (kref_read(&bm->refcount) > 1);
}
int comedi_buf_alloc(struct comedi_device *dev, struct comedi_subdevice *s,
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
index 39a72e3f0c18..7035356e56b3 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
@@ -107,7 +107,7 @@ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
libcfs_debug_dumplog();
if (libcfs_panic_on_lbug)
panic("LBUG");
- set_task_state(current, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
while (1)
schedule();
}
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index d761025144f9..e18051185846 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -788,7 +788,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
* __core_scsi3_add_registration()
*/
dest_lun = rcu_dereference_check(deve_tmp->se_lun,
- atomic_read(&deve_tmp->pr_kref.refcount) != 0);
+ kref_read(&deve_tmp->pr_kref) != 0);
pr_reg_atp = __core_scsi3_do_alloc_registration(dev,
nacl_tmp, dest_lun, deve_tmp,
@@ -1463,7 +1463,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
* For nacl->dynamic_node_acl=1
*/
lun_acl = rcu_dereference_check(se_deve->se_lun_acl,
- atomic_read(&se_deve->pr_kref.refcount) != 0);
+ kref_read(&se_deve->pr_kref) != 0);
if (!lun_acl)
return 0;
@@ -1478,7 +1478,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
* For nacl->dynamic_node_acl=1
*/
lun_acl = rcu_dereference_check(se_deve->se_lun_acl,
- atomic_read(&se_deve->pr_kref.refcount) != 0);
+ kref_read(&se_deve->pr_kref) != 0);
if (!lun_acl) {
kref_put(&se_deve->pr_kref, target_pr_kref_release);
return;
@@ -1759,7 +1759,7 @@ core_scsi3_decode_spec_i_port(
* 2nd loop which will never fail.
*/
dest_lun = rcu_dereference_check(dest_se_deve->se_lun,
- atomic_read(&dest_se_deve->pr_kref.refcount) != 0);
+ kref_read(&dest_se_deve->pr_kref) != 0);
dest_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev,
dest_node_acl, dest_lun, dest_se_deve,
@@ -3466,7 +3466,7 @@ after_iport_check:
iport_ptr);
if (!dest_pr_reg) {
struct se_lun *dest_lun = rcu_dereference_check(dest_se_deve->se_lun,
- atomic_read(&dest_se_deve->pr_kref.refcount) != 0);
+ kref_read(&dest_se_deve->pr_kref) != 0);
spin_unlock(&dev->dev_reservation_lock);
if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl,
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index fd5c3de79470..c91979c1463d 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -454,7 +454,7 @@ static void ft_sess_free(struct kref *kref)
void ft_sess_put(struct ft_sess *sess)
{
- int sess_held = atomic_read(&sess->kref.refcount);
+ int sess_held = kref_read(&sess->kref);
BUG_ON(!sess_held);
kref_put(&sess->kref, ft_sess_free);
diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 1bf8ed13f827..9229de43e19d 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -200,7 +200,6 @@ static struct ld_semaphore __sched *
down_read_failed(struct ld_semaphore *sem, long count, long timeout)
{
struct ldsem_waiter waiter;
- struct task_struct *tsk = current;
long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS;
/* set up my own style of waitqueue */
@@ -221,8 +220,8 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
list_add_tail(&waiter.list, &sem->read_wait);
sem->wait_readers++;
- waiter.task = tsk;
- get_task_struct(tsk);
+ waiter.task = current;
+ get_task_struct(current);
/* if there are no active locks, wake the new lock owner(s) */
if ((count & LDSEM_ACTIVE_MASK) == 0)
@@ -232,7 +231,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
/* wait to be given the lock */
for (;;) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!waiter.task)
break;
@@ -241,7 +240,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
timeout = schedule_timeout(timeout);
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
if (!timeout) {
/* lock timed out but check if this task was just
@@ -268,7 +267,6 @@ static struct ld_semaphore __sched *
down_write_failed(struct ld_semaphore *sem, long count, long timeout)
{
struct ldsem_waiter waiter;
- struct task_struct *tsk = current;
long adjust = -LDSEM_ACTIVE_BIAS;
int locked = 0;
@@ -289,16 +287,16 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
list_add_tail(&waiter.list, &sem->write_wait);
- waiter.task = tsk;
+ waiter.task = current;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
for (;;) {
if (!timeout)
break;
raw_spin_unlock_irq(&sem->wait_lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->wait_lock);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
locked = writer_trylock(sem);
if (locked)
break;
@@ -309,7 +307,7 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
/* lock wait may have timed out */
if (!locked)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index fd80c1b9c823..e6a17455adac 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3698,7 +3698,7 @@ static void ffs_closed(struct ffs_data *ffs)
goto done;
if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent
- || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount))
+ || !kref_read(&opts->func_inst.group.cg_item.ci_kref))
goto done;
ci = opts->func_inst.group.cg_item.ci_parent->ci_parent;
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 33ff49c4cea4..46847340b819 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -409,7 +409,7 @@ static void __exit mon_exit(void)
printk(KERN_ERR TAG
": Outstanding opens (%d) on usb%d, leaking...\n",
mbus->nreaders, mbus->u_bus->busnum);
- atomic_set(&mbus->ref.refcount, 2); /* Force leak */
+ kref_get(&mbus->ref); /* Force leak */
}
mon_dissolve(mbus, mbus->u_bus);
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 5e6a2c0a1f0b..1f7d5e46cdda 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -122,7 +122,7 @@ void exofs_sysfs_dbg_print(void)
list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
printk(KERN_INFO "%s: name %s ref %d\n",
__func__, kobject_name(k_name),
- (int)atomic_read(&k_name->kref.refcount));
+ (int)kref_read(&k_name->kref));
}
#endif
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 91307940c8ac..052f8d3c41cb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -256,7 +256,7 @@ struct fuse_io_priv {
#define FUSE_IO_PRIV_SYNC(f) \
{ \
- .refcnt = { ATOMIC_INIT(1) }, \
+ .refcnt = KREF_INIT(1), \
.async = 0, \
.file = f, \
}
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 27d1242c8383..564c504d6efd 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -349,7 +349,7 @@ static void sc_show_sock_container(struct seq_file *seq,
" func key: 0x%08x\n"
" func type: %u\n",
sc,
- atomic_read(&sc->sc_kref.refcount),
+ kref_read(&sc->sc_kref),
&saddr, inet ? ntohs(sport) : 0,
&daddr, inet ? ntohs(dport) : 0,
sc->sc_node->nd_name,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index d4b5c81f0445..ec000575e863 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -97,7 +97,7 @@
typeof(sc) __sc = (sc); \
mlog(ML_SOCKET, "[sc %p refs %d sock %p node %u page %p " \
"pg_off %zu] " fmt, __sc, \
- atomic_read(&__sc->sc_kref.refcount), __sc->sc_sock, \
+ kref_read(&__sc->sc_kref), __sc->sc_sock, \
__sc->sc_node->nd_num, __sc->sc_page, __sc->sc_page_off , \
##args); \
} while (0)
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index e7b760deefae..9b984cae4c4e 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -81,7 +81,7 @@ static void __dlm_print_lock(struct dlm_lock *lock)
lock->ml.type, lock->ml.convert_type, lock->ml.node,
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
- atomic_read(&lock->lock_refs.refcount),
+ kref_read(&lock->lock_refs),
(list_empty(&lock->ast_list) ? 'y' : 'n'),
(lock->ast_pending ? 'y' : 'n'),
(list_empty(&lock->bast_list) ? 'y' : 'n'),
@@ -106,7 +106,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
printk("lockres: %s, owner=%u, state=%u\n",
buf, res->owner, res->state);
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
- res->last_used, atomic_read(&res->refs.refcount),
+ res->last_used, kref_read(&res->refs),
list_empty(&res->purge) ? "no" : "yes");
printk(" on dirty list: %s, on reco list: %s, "
"migrating pending: %s\n",
@@ -298,7 +298,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
mle_type, mle->master, mle->new_master,
!list_empty(&mle->hb_events),
!!mle->inuse,
- atomic_read(&mle->mle_refs.refcount));
+ kref_read(&mle->mle_refs));
out += snprintf(buf + out, len - out, "Maybe=");
out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES,
@@ -494,7 +494,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len)
lock->ast_pending, lock->bast_pending,
lock->convert_pending, lock->lock_pending,
lock->cancel_pending, lock->unlock_pending,
- atomic_read(&lock->lock_refs.refcount));
+ kref_read(&lock->lock_refs));
spin_unlock(&lock->spinlock);
return out;
@@ -521,7 +521,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
!list_empty(&res->recovering),
res->inflight_locks, res->migration_pending,
atomic_read(&res->asts_reserved),
- atomic_read(&res->refs.refcount));
+ kref_read(&res->refs));
/* refmap */
out += snprintf(buf + out, len - out, "RMAP:");
@@ -777,7 +777,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
/* Purge Count: xxx Refs: xxx */
out += snprintf(buf + out, len - out,
"Purge Count: %d Refs: %d\n", dlm->purge_count,
- atomic_read(&dlm->dlm_refs.refcount));
+ kref_read(&dlm->dlm_refs));
/* Dead Node: xxx */
out += snprintf(buf + out, len - out,
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 733e4e79c8e2..32fd261ae13d 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -2072,7 +2072,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks);
mlog(0, "context init: refcount %u\n",
- atomic_read(&dlm->dlm_refs.refcount));
+ kref_read(&dlm->dlm_refs));
leave:
if (ret < 0 && dlm) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a464c8088170..7025d8c27999 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -233,7 +233,7 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle)
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&dlm->master_lock);
- if (!atomic_read(&mle->mle_refs.refcount)) {
+ if (!kref_read(&mle->mle_refs)) {
/* this may or may not crash, but who cares.
* it's a BUG. */
mlog(ML_ERROR, "bad mle: %p\n", mle);
@@ -1124,9 +1124,9 @@ recheck:
unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS);
/*
- if (atomic_read(&mle->mle_refs.refcount) < 2)
+ if (kref_read(&mle->mle_refs) < 2)
mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle,
- atomic_read(&mle->mle_refs.refcount),
+ kref_read(&mle->mle_refs),
res->lockname.len, res->lockname.name);
*/
atomic_set(&mle->woken, 0);
@@ -1979,7 +1979,7 @@ ok:
* on this mle. */
spin_lock(&dlm->master_lock);
- rr = atomic_read(&mle->mle_refs.refcount);
+ rr = kref_read(&mle->mle_refs);
if (mle->inuse > 0) {
if (extra_ref && rr < 3)
err = 1;
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 1082b2c3014b..63d701cd1e2e 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -251,7 +251,7 @@ leave:
mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
- atomic_read(&lock->lock_refs.refcount)-1);
+ kref_read(&lock->lock_refs)-1);
dlm_lock_put(lock);
}
if (actions & DLM_UNLOCK_CALL_AST)
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index 5be122e3d326..6c6a2141f271 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -33,7 +33,7 @@
*/
static inline void __down_read(struct rw_semaphore *sem)
{
- if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0))
+ if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
rwsem_down_read_failed(sem);
}
@@ -58,7 +58,7 @@ static inline void __down_write(struct rw_semaphore *sem)
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_long_t *)&sem->count);
+ &sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
}
@@ -68,7 +68,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_long_t *)&sem->count);
+ &sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
@@ -91,7 +91,7 @@ static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
- tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count);
+ tmp = atomic_long_dec_return_release(&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
@@ -102,7 +102,7 @@ static inline void __up_read(struct rw_semaphore *sem)
static inline void __up_write(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_long_t *)&sem->count) < 0))
+ &sem->count) < 0))
rwsem_wake(sem);
}
@@ -120,8 +120,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
* read-locked region is ok to be re-ordered into the
* write side. As such, rely on RELEASE semantics.
*/
- tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS,
- (atomic_long_t *)&sem->count);
+ tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
if (tmp < 0)
rwsem_downgrade_wake(sem);
}
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index 1ddfa2928802..a232e7f0c869 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -247,7 +247,7 @@ static inline void drm_framebuffer_unreference(struct drm_framebuffer *fb)
*/
static inline uint32_t drm_framebuffer_read_refcount(struct drm_framebuffer *fb)
{
- return atomic_read(&fb->base.refcount.refcount);
+ return kref_read(&fb->base.refcount);
}
/**
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 652e45be97c8..9a465314572c 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -332,19 +332,6 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo,
*/
extern void ttm_bo_unref(struct ttm_buffer_object **bo);
-
-/**
- * ttm_bo_list_ref_sub
- *
- * @bo: The buffer object.
- * @count: The number of references with which to decrease @bo::list_kref;
- * @never_free: The refcount should not reach zero with this operation.
- *
- * Release @count lru list references to this buffer object.
- */
-extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
- bool never_free);
-
/**
* ttm_bo_add_to_lru
*
@@ -367,7 +354,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
* and is usually called just immediately after the bo has been reserved to
* avoid recursive reservation from lru lists.
*/
-extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
+extern void ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
/**
* ttm_bo_move_to_lru_tail
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cdbdb40eb5bd..feecf33a1212 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -878,7 +878,7 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo,
{
int ret;
- WARN_ON(!atomic_read(&bo->kref.refcount));
+ WARN_ON(!kref_read(&bo->kref));
ret = __ttm_bo_reserve(bo, interruptible, no_wait, ticket);
if (likely(ret == 0))
@@ -903,7 +903,7 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
{
int ret = 0;
- WARN_ON(!atomic_read(&bo->kref.refcount));
+ WARN_ON(!kref_read(&bo->kref));
if (interruptible)
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a0547c571800..b63d6b7b0db0 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void);
#define static_branch_enable(x) static_key_enable(&(x)->key)
#define static_branch_disable(x) static_key_disable(&(x)->key)
-#endif /* _LINUX_JUMP_LABEL_H */
-
#endif /* __ASSEMBLY__ */
+
+#endif /* _LINUX_JUMP_LABEL_H */
diff --git a/include/linux/kref.h b/include/linux/kref.h
index e15828fd71f1..f4156f88f557 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -15,22 +15,27 @@
#ifndef _KREF_H_
#define _KREF_H_
-#include <linux/bug.h>
-#include <linux/atomic.h>
-#include <linux/kernel.h>
-#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
struct kref {
- atomic_t refcount;
+ refcount_t refcount;
};
+#define KREF_INIT(n) { .refcount = REFCOUNT_INIT(n), }
+
/**
* kref_init - initialize object.
* @kref: object in question.
*/
static inline void kref_init(struct kref *kref)
{
- atomic_set(&kref->refcount, 1);
+ refcount_set(&kref->refcount, 1);
+}
+
+static inline unsigned int kref_read(const struct kref *kref)
+{
+ return refcount_read(&kref->refcount);
}
/**
@@ -39,17 +44,12 @@ static inline void kref_init(struct kref *kref)
*/
static inline void kref_get(struct kref *kref)
{
- /* If refcount was 0 before incrementing then we have a race
- * condition when this kref is freeing by some other thread right now.
- * In this case one should use kref_get_unless_zero()
- */
- WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2);
+ refcount_inc(&kref->refcount);
}
/**
- * kref_sub - subtract a number of refcounts for object.
+ * kref_put - decrement refcount for object.
* @kref: object.
- * @count: Number of recounts to subtract.
* @release: pointer to the function that will clean up the object when the
* last reference to the object is released.
* This pointer is required, and it is not acceptable to pass kfree
@@ -58,57 +58,43 @@ static inline void kref_get(struct kref *kref)
* maintainer, and anyone else who happens to notice it. You have
* been warned.
*
- * Subtract @count from the refcount, and if 0, call release().
+ * Decrement the refcount, and if 0, call release().
* Return 1 if the object was removed, otherwise return 0. Beware, if this
* function returns 0, you still can not count on the kref from remaining in
* memory. Only use the return value if you want to see if the kref is now
* gone, not present.
*/
-static inline int kref_sub(struct kref *kref, unsigned int count,
- void (*release)(struct kref *kref))
+static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
WARN_ON(release == NULL);
- if (atomic_sub_and_test((int) count, &kref->refcount)) {
+ if (refcount_dec_and_test(&kref->refcount)) {
release(kref);
return 1;
}
return 0;
}
-/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- * last reference to the object is released.
- * This pointer is required, and it is not acceptable to pass kfree
- * in as this function. If the caller does pass kfree to this
- * function, you will be publicly mocked mercilessly by the kref
- * maintainer, and anyone else who happens to notice it. You have
- * been warned.
- *
- * Decrement the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0. Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory. Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
-{
- return kref_sub(kref, 1, release);
-}
-
static inline int kref_put_mutex(struct kref *kref,
void (*release)(struct kref *kref),
struct mutex *lock)
{
WARN_ON(release == NULL);
- if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) {
- mutex_lock(lock);
- if (unlikely(!atomic_dec_and_test(&kref->refcount))) {
- mutex_unlock(lock);
- return 0;
- }
+
+ if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) {
+ release(kref);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int kref_put_lock(struct kref *kref,
+ void (*release)(struct kref *kref),
+ spinlock_t *lock)
+{
+ WARN_ON(release == NULL);
+
+ if (refcount_dec_and_lock(&kref->refcount, lock)) {
release(kref);
return 1;
}
@@ -133,6 +119,6 @@ static inline int kref_put_mutex(struct kref *kref,
*/
static inline int __must_check kref_get_unless_zero(struct kref *kref)
{
- return atomic_add_unless(&kref->refcount, 1, 0);
+ return refcount_inc_not_zero(&kref->refcount);
}
#endif /* _KREF_H_ */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7fffbfcd5430..1127fe31645d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,8 @@
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>
+struct ww_acquire_ctx;
+
/*
* Simple, straightforward mutexes with strict semantics:
*
@@ -65,7 +67,7 @@ struct mutex {
static inline struct task_struct *__mutex_owner(struct mutex *lock)
{
- return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+ return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
}
/*
@@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
struct mutex_waiter {
struct list_head list;
struct task_struct *task;
+ struct ww_acquire_ctx *ww_ctx;
#ifdef CONFIG_DEBUG_MUTEXES
void *magic;
#endif
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5b2e6159b744..93664f022ecf 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -4,15 +4,15 @@
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
-#include <linux/wait.h>
+#include <linux/rcuwait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
unsigned int __percpu *read_count;
- struct rw_semaphore rw_sem;
- wait_queue_head_t writer;
+ struct rw_semaphore rw_sem; /* slowpath */
+ struct rcuwait writer; /* blocked writer */
int readers_block;
};
@@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
- .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \
+ .writer = __RCUWAIT_INITIALIZER(name.writer), \
}
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 51334edec506..a39540326417 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -80,6 +80,7 @@
/********** kernel/mutexes **********/
#define MUTEX_DEBUG_INIT 0x11
#define MUTEX_DEBUG_FREE 0x22
+#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA)
/********** lib/flex_array.c **********/
#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
new file mode 100644
index 000000000000..a4ede51b3e7c
--- /dev/null
+++ b/include/linux/rcuwait.h
@@ -0,0 +1,63 @@
+#ifndef _LINUX_RCUWAIT_H_
+#define _LINUX_RCUWAIT_H_
+
+#include <linux/rcupdate.h>
+
+/*
+ * rcuwait provides a way of blocking and waking up a single
+ * task in an rcu-safe manner; where it is forbidden to use
+ * after exit_notify(). task_struct is not properly rcu protected,
+ * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ *
+ * Alternatively we have task_rcu_dereference(), but the return
+ * semantics have different implications which would break the
+ * wakeup side. The only time @task is non-nil is when a user is
+ * blocked (or checking if it needs to) on a condition, and reset
+ * as soon as we know that the condition has succeeded and are
+ * awoken.
+ */
+struct rcuwait {
+ struct task_struct *task;
+};
+
+#define __RCUWAIT_INITIALIZER(name) \
+ { .task = NULL, }
+
+static inline void rcuwait_init(struct rcuwait *w)
+{
+ w->task = NULL;
+}
+
+extern void rcuwait_wake_up(struct rcuwait *w);
+
+/*
+ * The caller is responsible for locking around rcuwait_wait_event(),
+ * such that writes to @task are properly serialized.
+ */
+#define rcuwait_wait_event(w, condition) \
+({ \
+ /* \
+ * Complain if we are called after do_exit()/exit_notify(), \
+ * as we cannot rely on the rcu critical region for the \
+ * wakeup side. \
+ */ \
+ WARN_ON(current->exit_state); \
+ \
+ rcu_assign_pointer((w)->task, current); \
+ for (;;) { \
+ /* \
+ * Implicit barrier (A) pairs with (B) in \
+ * rcuwait_wake_up(). \
+ */ \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ \
+ schedule(); \
+ } \
+ \
+ WRITE_ONCE((w)->task, NULL); \
+ __set_current_state(TASK_RUNNING); \
+})
+
+#endif /* _LINUX_RCUWAIT_H_ */
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
new file mode 100644
index 000000000000..600aadf9cca4
--- /dev/null
+++ b/include/linux/refcount.h
@@ -0,0 +1,294 @@
+#ifndef _LINUX_REFCOUNT_H
+#define _LINUX_REFCOUNT_H
+
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_REFCOUNT
+#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
+#define __refcount_check __must_check
+#else
+#define REFCOUNT_WARN(cond, str) (void)(cond)
+#define __refcount_check
+#endif
+
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
+
+static inline void refcount_set(refcount_t *r, unsigned int n)
+{
+ atomic_set(&r->refs, n);
+}
+
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+ return atomic_read(&r->refs);
+}
+
+static inline __refcount_check
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ if (!val)
+ return false;
+
+ if (unlikely(val == UINT_MAX))
+ return true;
+
+ new = val + i;
+ if (new < val)
+ new = UINT_MAX;
+ old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+ return true;
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+ REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_inc_not_zero(refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ new = val + 1;
+
+ if (!val)
+ return false;
+
+ if (unlikely(!new))
+ return true;
+
+ old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+ return true;
+}
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+ REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ if (unlikely(val == UINT_MAX))
+ return false;
+
+ new = val - i;
+ if (new > val) {
+ REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+ return false;
+ }
+
+ old = atomic_cmpxchg_release(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ return !new;
+}
+
+static inline __refcount_check
+bool refcount_dec_and_test(refcount_t *r)
+{
+ return refcount_sub_and_test(1, r);
+}
+
+/*
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+static inline
+void refcount_dec(refcount_t *r)
+{
+ REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+}
+
+/*
+ * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
+ * success thereof.
+ *
+ * Like all decrement operations, it provides release memory order and provides
+ * a control dependency.
+ *
+ * It can be used like a try-delete operator; this explicit case is provided
+ * and not cmpxchg in generic, because that would allow implementing unsafe
+ * operations.
+ */
+static inline __refcount_check
+bool refcount_dec_if_one(refcount_t *r)
+{
+ return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
+}
+
+/*
+ * No atomic_t counterpart, it decrements unless the value is 1, in which case
+ * it will return false.
+ *
+ * Was often done like: atomic_add_unless(&var, -1, 1)
+ */
+static inline __refcount_check
+bool refcount_dec_not_one(refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ if (unlikely(val == UINT_MAX))
+ return true;
+
+ if (val == 1)
+ return false;
+
+ new = val - 1;
+ if (new > val) {
+ REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+ return true;
+ }
+
+ old = atomic_cmpxchg_release(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ return true;
+}
+
+/*
+ * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
+ * to decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
+{
+ if (refcount_dec_not_one(r))
+ return false;
+
+ mutex_lock(lock);
+ if (!refcount_dec_and_test(r)) {
+ mutex_unlock(lock);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
+{
+ if (refcount_dec_not_one(r))
+ return false;
+
+ spin_lock(lock);
+ if (!refcount_dec_and_test(r)) {
+ spin_unlock(lock);
+ return false;
+ }
+
+ return true;
+}
+
+#endif /* _LINUX_REFCOUNT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c89b7fdec41e..c8e519d0b4a3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -226,7 +226,7 @@ extern void proc_sched_set_task(struct task_struct *p);
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-/* Convenience macros for the sake of set_task_state */
+/* Convenience macros for the sake of set_current_state */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
@@ -253,17 +253,6 @@ extern char ___assert_task_state[1 - 2*!!(
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-#define __set_task_state(tsk, state_value) \
- do { \
- (tsk)->task_state_change = _THIS_IP_; \
- (tsk)->state = (state_value); \
- } while (0)
-#define set_task_state(tsk, state_value) \
- do { \
- (tsk)->task_state_change = _THIS_IP_; \
- smp_store_mb((tsk)->state, (state_value)); \
- } while (0)
-
#define __set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
@@ -276,20 +265,6 @@ extern char ___assert_task_state[1 - 2*!!(
} while (0)
#else
-
-/*
- * @tsk had better be current, or you get to keep the pieces.
- *
- * The only reason is that computing current can be more expensive than
- * using a pointer that's already available.
- *
- * Therefore, see set_current_state().
- */
-#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- smp_store_mb((tsk)->state, (state_value))
-
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
@@ -1018,8 +993,8 @@ enum cpu_idle_type {
*
* The DEFINE_WAKE_Q macro declares and initializes the list head.
* wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function, where the fact that the queue is
- * not used again will be easy to see by inspection.
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
*
* Note that this can cause spurious wakeups. schedule() callers
* must ensure the call is done inside a loop, confirming that the
@@ -1039,6 +1014,12 @@ struct wake_q_head {
#define DEFINE_WAKE_Q(name) \
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+static inline void wake_q_init(struct wake_q_head *head)
+{
+ head->first = WAKE_Q_TAIL;
+ head->lastp = &head->first;
+}
+
extern void wake_q_add(struct wake_q_head *head,
struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 47dd0cebd204..59248dcc6ef3 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define raw_spin_lock_nested(lock, subclass) \
_raw_spin_lock_nested(lock, subclass)
-# define raw_spin_lock_bh_nested(lock, subclass) \
- _raw_spin_lock_bh_nested(lock, subclass)
# define raw_spin_lock_nest_lock(lock, nest_lock) \
do { \
@@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
# define raw_spin_lock_nested(lock, subclass) \
_raw_spin_lock(((void)(subclass), (lock)))
# define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock)
-# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock)
#endif
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
@@ -317,11 +314,6 @@ do { \
raw_spin_lock_nested(spinlock_check(lock), subclass); \
} while (0)
-#define spin_lock_bh_nested(lock, subclass) \
-do { \
- raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\
-} while (0)
-
#define spin_lock_nest_lock(lock, nest_lock) \
do { \
raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 5344268e6e62..42dfab89e740 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr);
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
__acquires(lock);
-void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
- __acquires(lock);
void __lockfunc
_raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
__acquires(lock);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d3afef9d8dbe..d0d188861ad6 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -57,7 +57,6 @@
#define _raw_spin_lock(lock) __LOCK(lock)
#define _raw_spin_lock_nested(lock, subclass) __LOCK(lock)
-#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock)
#define _raw_read_lock(lock) __LOCK(lock)
#define _raw_write_lock(lock) __LOCK(lock)
#define _raw_spin_lock_bh(lock) __LOCK_BH(lock)
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..8a511c0985aa 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -198,7 +198,7 @@ static inline struct cache_head *cache_get(struct cache_head *h)
static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
{
- if (atomic_read(&h->ref.refcount) <= 2 &&
+ if (kref_read(&h->ref) <= 2 &&
h->expiry_time < cd->nextcheck)
cd->nextcheck = h->expiry_time;
kref_put(&h->ref, cd->cache_put);
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 7b0066814fa0..5dd9a7682227 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -51,10 +51,10 @@ struct ww_mutex {
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \
- , .ww_class = &ww_class
+# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \
+ , .ww_class = class
#else
-# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class)
+# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
#endif
#define __WW_CLASS_INITIALIZER(ww_class) \
@@ -63,7 +63,7 @@ struct ww_mutex {
, .mutex_name = #ww_class "_mutex" }
#define __WW_MUTEX_INITIALIZER(lockname, class) \
- { .base = { \__MUTEX_INITIALIZER(lockname) } \
+ { .base = __MUTEX_INITIALIZER(lockname.base) \
__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
#define DEFINE_WW_CLASS(classname) \
@@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
#endif
}
-extern int __must_check __ww_mutex_lock(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx);
-extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx);
-
/**
* ww_mutex_lock - acquire the w/w mutex
* @lock: the mutex to be acquired
@@ -220,14 +215,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
*
* A mutex acquired with this function must be released with ww_mutex_unlock.
*/
-static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
-{
- if (ctx)
- return __ww_mutex_lock(lock, ctx);
-
- mutex_lock(&lock->base);
- return 0;
-}
+extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx);
/**
* ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible
@@ -259,14 +247,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct
*
* A mutex acquired with this function must be released with ww_mutex_unlock.
*/
-static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx)
-{
- if (ctx)
- return __ww_mutex_lock_interruptible(lock, ctx);
- else
- return mutex_lock_interruptible(&lock->base);
-}
+extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx);
/**
* ww_mutex_lock_slow - slowpath acquiring of the w/w mutex
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 554671c81f4a..90708f68cc02 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -987,7 +987,7 @@ static inline void hci_conn_drop(struct hci_conn *conn)
static inline void hci_dev_put(struct hci_dev *d)
{
BT_DBG("%s orig refcnt %d", d->name,
- atomic_read(&d->dev.kobj.kref.refcount));
+ kref_read(&d->dev.kobj.kref));
put_device(&d->dev);
}
@@ -995,7 +995,7 @@ static inline void hci_dev_put(struct hci_dev *d)
static inline struct hci_dev *hci_dev_hold(struct hci_dev *d)
{
BT_DBG("%s orig refcnt %d", d->name,
- atomic_read(&d->dev.kobj.kref.refcount));
+ kref_read(&d->dev.kobj.kref));
get_device(&d->dev);
return d;
diff --git a/init/version.c b/init/version.c
index fe41a63efed6..5606341e9efd 100644
--- a/init/version.c
+++ b/init/version.c
@@ -23,9 +23,7 @@ int version_string(LINUX_VERSION_CODE);
#endif
struct uts_namespace init_uts_ns = {
- .kref = {
- .refcount = ATOMIC_INIT(2),
- },
+ .kref = KREF_INIT(2),
.name = {
.sysname = UTS_SYSNAME,
.nodename = UTS_NODENAME,
diff --git a/kernel/exit.c b/kernel/exit.c
index 8e5e21338b3a..b67c57faa705 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -55,6 +55,7 @@
#include <linux/shm.h>
#include <linux/kcov.h>
#include <linux/random.h>
+#include <linux/rcuwait.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -282,6 +283,35 @@ retry:
return task;
}
+void rcuwait_wake_up(struct rcuwait *w)
+{
+ struct task_struct *task;
+
+ rcu_read_lock();
+
+ /*
+ * Order condition vs @task, such that everything prior to the load
+ * of @task is visible. This is the condition as to why the user called
+ * rcuwait_trywake() in the first place. Pairs with set_current_state()
+ * barrier (A) in rcuwait_wait_event().
+ *
+ * WAIT WAKE
+ * [S] tsk = current [S] cond = true
+ * MB (A) MB (B)
+ * [L] cond [L] tsk
+ */
+ smp_rmb(); /* (B) */
+
+ /*
+ * Avoid using task_rcu_dereference() magic as long as we are careful,
+ * see comment in rcuwait_wait_event() regarding ->exit_state.
+ */
+ task = rcu_dereference(w->task);
+ if (task)
+ wake_up_process(task);
+ rcu_read_unlock();
+}
+
struct task_struct *try_get_task_struct(struct task_struct **ptask)
{
struct task_struct *task;
@@ -468,12 +498,12 @@ assign_new_owner:
* Turn us into a lazy TLB process if we
* aren't already..
*/
-static void exit_mm(struct task_struct *tsk)
+static void exit_mm(void)
{
- struct mm_struct *mm = tsk->mm;
+ struct mm_struct *mm = current->mm;
struct core_state *core_state;
- mm_release(tsk, mm);
+ mm_release(current, mm);
if (!mm)
return;
sync_mm_rss(mm);
@@ -491,7 +521,7 @@ static void exit_mm(struct task_struct *tsk)
up_read(&mm->mmap_sem);
- self.task = tsk;
+ self.task = current;
self.next = xchg(&core_state->dumper.next, &self);
/*
* Implies mb(), the result of xchg() must be visible
@@ -501,22 +531,22 @@ static void exit_mm(struct task_struct *tsk)
complete(&core_state->startup);
for (;;) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!self.task) /* see coredump_finish() */
break;
freezable_schedule();
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
- BUG_ON(mm != tsk->active_mm);
+ BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
- task_lock(tsk);
- tsk->mm = NULL;
+ task_lock(current);
+ current->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
- task_unlock(tsk);
+ task_unlock(current);
mm_update_next_owner(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
@@ -823,7 +853,7 @@ void __noreturn do_exit(long code)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
- exit_mm(tsk);
+ exit_mm();
if (group_dead)
acct_process();
diff --git a/kernel/fork.c b/kernel/fork.c
index f6995cdfe714..ff82e24573b6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -432,11 +432,13 @@ void __init fork_init(void)
int i;
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
-#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
+#define ARCH_MIN_TASKALIGN 0
#endif
+ int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+
/* create a slab on which task_structs can be allocated */
task_struct_cachep = kmem_cache_create("task_struct",
- arch_task_struct_size, ARCH_MIN_TASKALIGN,
+ arch_task_struct_size, align,
SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 6f88e352cd4f..760158d9d98d 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
+obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d9a698e8458f..9812e5dd409e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2203,7 +2203,7 @@ cache_hit:
* Important for check_no_collision().
*/
if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
- if (debug_locks_off_graph_unlock())
+ if (!debug_locks_off_graph_unlock())
return 0;
print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index d3de04b12f8c..28350dc8ecbb 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -372,6 +372,78 @@ static struct lock_torture_ops mutex_lock_ops = {
.name = "mutex_lock"
};
+#include <linux/ww_mutex.h>
+static DEFINE_WW_CLASS(torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
+
+static int torture_ww_mutex_lock(void)
+__acquires(torture_ww_mutex_0)
+__acquires(torture_ww_mutex_1)
+__acquires(torture_ww_mutex_2)
+{
+ LIST_HEAD(list);
+ struct reorder_lock {
+ struct list_head link;
+ struct ww_mutex *lock;
+ } locks[3], *ll, *ln;
+ struct ww_acquire_ctx ctx;
+
+ locks[0].lock = &torture_ww_mutex_0;
+ list_add(&locks[0].link, &list);
+
+ locks[1].lock = &torture_ww_mutex_1;
+ list_add(&locks[1].link, &list);
+
+ locks[2].lock = &torture_ww_mutex_2;
+ list_add(&locks[2].link, &list);
+
+ ww_acquire_init(&ctx, &torture_ww_class);
+
+ list_for_each_entry(ll, &list, link) {
+ int err;
+
+ err = ww_mutex_lock(ll->lock, &ctx);
+ if (!err)
+ continue;
+
+ ln = ll;
+ list_for_each_entry_continue_reverse(ln, &list, link)
+ ww_mutex_unlock(ln->lock);
+
+ if (err != -EDEADLK)
+ return err;
+
+ ww_mutex_lock_slow(ll->lock, &ctx);
+ list_move(&ll->link, &list);
+ }
+
+ ww_acquire_fini(&ctx);
+ return 0;
+}
+
+static void torture_ww_mutex_unlock(void)
+__releases(torture_ww_mutex_0)
+__releases(torture_ww_mutex_1)
+__releases(torture_ww_mutex_2)
+{
+ ww_mutex_unlock(&torture_ww_mutex_0);
+ ww_mutex_unlock(&torture_ww_mutex_1);
+ ww_mutex_unlock(&torture_ww_mutex_2);
+}
+
+static struct lock_torture_ops ww_mutex_lock_ops = {
+ .writelock = torture_ww_mutex_lock,
+ .write_delay = torture_mutex_delay,
+ .task_boost = torture_boost_dummy,
+ .writeunlock = torture_ww_mutex_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "ww_mutex_lock"
+};
+
#ifdef CONFIG_RT_MUTEXES
static DEFINE_RT_MUTEX(torture_rtmutex);
@@ -797,6 +869,7 @@ static int __init lock_torture_init(void)
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
+ &ww_mutex_lock_ops,
#ifdef CONFIG_RT_MUTEXES
&rtmutex_lock_ops,
#endif
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index a459faa48987..4174417d5309 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -26,20 +26,3 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
-
-#define spin_lock_mutex(lock, flags) \
- do { \
- struct mutex *l = container_of(lock, struct mutex, wait_lock); \
- \
- DEBUG_LOCKS_WARN_ON(in_interrupt()); \
- local_irq_save(flags); \
- arch_spin_lock(&(lock)->rlock.raw_lock);\
- DEBUG_LOCKS_WARN_ON(l->magic != l); \
- } while (0)
-
-#define spin_unlock_mutex(lock, flags) \
- do { \
- arch_spin_unlock(&(lock)->rlock.raw_lock); \
- local_irq_restore(flags); \
- preempt_check_resched(); \
- } while (0)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 8464a5cbab97..ad2d9e22697b 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
/*
* @owner: contains: 'struct task_struct *' to the current lock owner,
* NULL means not owned. Since task_struct pointers are aligned at
- * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
- * bits to store extra state.
+ * at least L1_CACHE_BYTES, we have low bits to store extra state.
*
* Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
* Bit1 indicates unlock needs to hand the lock to the top-waiter
+ * Bit2 indicates handoff has been done and we're waiting for pickup.
*/
#define MUTEX_FLAG_WAITERS 0x01
#define MUTEX_FLAG_HANDOFF 0x02
+#define MUTEX_FLAG_PICKUP 0x04
-#define MUTEX_FLAGS 0x03
+#define MUTEX_FLAGS 0x07
static inline struct task_struct *__owner_task(unsigned long owner)
{
@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
}
/*
- * Actual trylock that will work on any unlocked state.
- *
- * When setting the owner field, we must preserve the low flag bits.
- *
- * Be careful with @handoff, only set that in a wait-loop (where you set
- * HANDOFF) to avoid recursive lock attempts.
+ * Trylock variant that retuns the owning task on failure.
*/
-static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
+static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{
unsigned long owner, curr = (unsigned long)current;
owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */
unsigned long old, flags = __owner_flags(owner);
+ unsigned long task = owner & ~MUTEX_FLAGS;
- if (__owner_task(owner)) {
- if (handoff && unlikely(__owner_task(owner) == current)) {
- /*
- * Provide ACQUIRE semantics for the lock-handoff.
- *
- * We cannot easily use load-acquire here, since
- * the actual load is a failed cmpxchg, which
- * doesn't imply any barriers.
- *
- * Also, this is a fairly unlikely scenario, and
- * this contains the cost.
- */
- smp_mb(); /* ACQUIRE */
- return true;
- }
+ if (task) {
+ if (likely(task != curr))
+ break;
- return false;
+ if (likely(!(flags & MUTEX_FLAG_PICKUP)))
+ break;
+
+ flags &= ~MUTEX_FLAG_PICKUP;
+ } else {
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
+#endif
}
/*
@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
* past the point where we acquire it. This would be possible
* if we (accidentally) set the bit on an unlocked mutex.
*/
- if (handoff)
- flags &= ~MUTEX_FLAG_HANDOFF;
+ flags &= ~MUTEX_FLAG_HANDOFF;
old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
if (old == owner)
- return true;
+ return NULL;
owner = old;
}
+
+ return __owner_task(owner);
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+ return !__mutex_trylock_or_owner(lock);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
/*
* Give up ownership to a specific task, when @task = NULL, this is equivalent
- * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
- * semantics like a regular unlock, the __mutex_trylock() provides matching
- * ACQUIRE semantics for the handoff.
+ * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
+ * WAITERS. Provides RELEASE semantics like a regular unlock, the
+ * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
*/
static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
{
@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+ DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
new = (owner & MUTEX_FLAG_WAITERS);
new |= (unsigned long)task;
+ if (task)
+ new |= MUTEX_FLAG_PICKUP;
old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
if (old == owner)
@@ -237,8 +241,8 @@ void __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock);
#endif
-static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
- struct ww_acquire_ctx *ww_ctx)
+static __always_inline void
+ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
/*
@@ -277,17 +281,50 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
ww_ctx->acquired++;
}
+static inline bool __sched
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+ return a->stamp - b->stamp <= LONG_MAX &&
+ (a->stamp != b->stamp || a > b);
+}
+
+/*
+ * Wake up any waiters that may have to back off when the lock is held by the
+ * given context.
+ *
+ * Due to the invariants on the wait list, this can only affect the first
+ * waiter with a context.
+ *
+ * The current task must not be on the wait list.
+ */
+static void __sched
+__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+
+ lockdep_assert_held(&lock->wait_lock);
+
+ list_for_each_entry(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (cur->ww_ctx->acquired > 0 &&
+ __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
+ debug_mutex_wake_waiter(lock, cur);
+ wake_up_process(cur->task);
+ }
+
+ break;
+ }
+}
+
/*
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*/
static __always_inline void
-ww_mutex_set_context_fastpath(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx)
+ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- unsigned long flags;
- struct mutex_waiter *cur;
-
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
@@ -311,46 +348,79 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
* Uh oh, we raced in fastpath, wake up everyone in this case,
* so they can see the new lock->ctx.
*/
- spin_lock_mutex(&lock->base.wait_lock, flags);
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
- spin_unlock_mutex(&lock->base.wait_lock, flags);
+ spin_lock(&lock->base.wait_lock);
+ __ww_mutex_wakeup_for_backoff(&lock->base, ctx);
+ spin_unlock(&lock->base.wait_lock);
}
/*
- * After acquiring lock in the slowpath set ctx and wake up any
- * waiters so they can recheck.
+ * After acquiring lock in the slowpath set ctx.
+ *
+ * Unlike for the fast path, the caller ensures that waiters are woken up where
+ * necessary.
*
* Callers must hold the mutex wait_lock.
*/
static __always_inline void
-ww_mutex_set_context_slowpath(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx)
+ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- struct mutex_waiter *cur;
-
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
+}
+
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+
+static inline
+bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ struct mutex_waiter *waiter)
+{
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
/*
- * Give any possible sleeping processes the chance to wake up,
- * so they can recheck if they have to back off.
+ * If ww->ctx is set the contents are undefined, only
+ * by acquiring wait_lock there is a guarantee that
+ * they are not invalid when reading.
+ *
+ * As such, when deadlock detection needs to be
+ * performed the optimistic spinning cannot be done.
+ *
+ * Check this in every inner iteration because we may
+ * be racing against another thread's ww_mutex_lock.
*/
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
+ if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx))
+ return false;
+
+ /*
+ * If we aren't on the wait list yet, cancel the spin
+ * if there are waiters. We want to avoid stealing the
+ * lock from a waiter with an earlier stamp, since the
+ * other thread may already own a lock that we also
+ * need.
+ */
+ if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS))
+ return false;
+
+ /*
+ * Similarly, stop spinning if we are no longer the
+ * first waiter.
+ */
+ if (waiter && !__mutex_waiter_is_first(lock, waiter))
+ return false;
+
+ return true;
}
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
+ * Look out! "owner" is an entirely speculative pointer access and not
+ * reliable.
+ *
+ * "noinline" so that this function shows up on perf profiles.
*/
static noinline
-bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
+ struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter)
{
bool ret = true;
@@ -373,6 +443,11 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
break;
}
+ if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) {
+ ret = false;
+ break;
+ }
+
cpu_relax();
}
rcu_read_unlock();
@@ -431,12 +506,10 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
* with the spinner at the head of the OSQ, if present, until the owner is
* changed to itself.
*/
-static bool mutex_optimistic_spin(struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx,
- const bool use_ww_ctx, const bool waiter)
+static __always_inline bool
+mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ const bool use_ww_ctx, struct mutex_waiter *waiter)
{
- struct task_struct *task = current;
-
if (!waiter) {
/*
* The purpose of the mutex_can_spin_on_owner() function is
@@ -460,40 +533,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
for (;;) {
struct task_struct *owner;
- if (use_ww_ctx && ww_ctx->acquired > 0) {
- struct ww_mutex *ww;
-
- ww = container_of(lock, struct ww_mutex, base);
- /*
- * If ww->ctx is set the contents are undefined, only
- * by acquiring wait_lock there is a guarantee that
- * they are not invalid when reading.
- *
- * As such, when deadlock detection needs to be
- * performed the optimistic spinning cannot be done.
- */
- if (READ_ONCE(ww->ctx))
- goto fail_unlock;
- }
+ /* Try to acquire the mutex... */
+ owner = __mutex_trylock_or_owner(lock);
+ if (!owner)
+ break;
/*
- * If there's an owner, wait for it to either
+ * There's an owner, wait for it to either
* release the lock or go to sleep.
*/
- owner = __mutex_owner(lock);
- if (owner) {
- if (waiter && owner == task) {
- smp_mb(); /* ACQUIRE */
- break;
- }
-
- if (!mutex_spin_on_owner(lock, owner))
- goto fail_unlock;
- }
-
- /* Try to acquire the mutex if it is unlocked. */
- if (__mutex_trylock(lock, waiter))
- break;
+ if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter))
+ goto fail_unlock;
/*
* The cpu_relax() call is a compiler barrier which forces
@@ -532,9 +582,9 @@ fail:
return false;
}
#else
-static bool mutex_optimistic_spin(struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx,
- const bool use_ww_ctx, const bool waiter)
+static __always_inline bool
+mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ const bool use_ww_ctx, struct mutex_waiter *waiter)
{
return false;
}
@@ -594,23 +644,88 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
EXPORT_SYMBOL(ww_mutex_unlock);
static inline int __sched
-__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
+__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
+ struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
+ struct mutex_waiter *cur;
+
+ if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
+ goto deadlock;
- if (!hold_ctx)
+ /*
+ * If there is a waiter in front of us that has a context, then its
+ * stamp is earlier than ours and we must back off.
+ */
+ cur = waiter;
+ list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) {
+ if (cur->ww_ctx)
+ goto deadlock;
+ }
+
+ return 0;
+
+deadlock:
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
+ ctx->contending_lock = ww;
+#endif
+ return -EDEADLK;
+}
+
+static inline int __sched
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+ struct mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+ struct list_head *pos;
+
+ if (!ww_ctx) {
+ list_add_tail(&waiter->list, &lock->wait_list);
return 0;
+ }
- if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
- (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them.
+ */
+ pos = &lock->wait_list;
+ list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+ /* Back off immediately if necessary. */
+ if (ww_ctx->acquired > 0) {
#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
- ctx->contending_lock = ww;
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+ ww_ctx->contending_lock = ww;
#endif
- return -EDEADLK;
+ return -EDEADLK;
+ }
+
+ break;
+ }
+
+ pos = &cur->list;
+
+ /*
+ * Wake up the waiter so that it gets a chance to back
+ * off.
+ */
+ if (cur->ww_ctx->acquired > 0) {
+ debug_mutex_wake_waiter(lock, cur);
+ wake_up_process(cur->task);
+ }
}
+ list_add_tail(&waiter->list, pos);
return 0;
}
@@ -622,15 +737,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
- struct task_struct *task = current;
struct mutex_waiter waiter;
- unsigned long flags;
bool first = false;
struct ww_mutex *ww;
int ret;
- if (use_ww_ctx) {
- ww = container_of(lock, struct ww_mutex, base);
+ might_sleep();
+
+ ww = container_of(lock, struct ww_mutex, base);
+ if (use_ww_ctx && ww_ctx) {
if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
return -EALREADY;
}
@@ -638,36 +753,54 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
- if (__mutex_trylock(lock, false) ||
- mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
+ if (__mutex_trylock(lock) ||
+ mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {
/* got the lock, yay! */
lock_acquired(&lock->dep_map, ip);
- if (use_ww_ctx)
+ if (use_ww_ctx && ww_ctx)
ww_mutex_set_context_fastpath(ww, ww_ctx);
preempt_enable();
return 0;
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
/*
* After waiting to acquire the wait_lock, try again.
*/
- if (__mutex_trylock(lock, false))
+ if (__mutex_trylock(lock)) {
+ if (use_ww_ctx && ww_ctx)
+ __ww_mutex_wakeup_for_backoff(lock, ww_ctx);
+
goto skip_wait;
+ }
debug_mutex_lock_common(lock, &waiter);
- debug_mutex_add_waiter(lock, &waiter, task);
+ debug_mutex_add_waiter(lock, &waiter, current);
- /* add waiting tasks to the end of the waitqueue (FIFO): */
- list_add_tail(&waiter.list, &lock->wait_list);
- waiter.task = task;
+ lock_contended(&lock->dep_map, ip);
+
+ if (!use_ww_ctx) {
+ /* add waiting tasks to the end of the waitqueue (FIFO): */
+ list_add_tail(&waiter.list, &lock->wait_list);
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ waiter.ww_ctx = MUTEX_POISON_WW_CTX;
+#endif
+ } else {
+ /* Add in stamp order, waking up waiters that must back off. */
+ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+ if (ret)
+ goto err_early_backoff;
+
+ waiter.ww_ctx = ww_ctx;
+ }
+
+ waiter.task = current;
if (__mutex_waiter_is_first(lock, &waiter))
__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
- lock_contended(&lock->dep_map, ip);
-
- set_task_state(task, state);
+ set_current_state(state);
for (;;) {
/*
* Once we hold wait_lock, we're serialized against
@@ -675,7 +808,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* before testing the error conditions to make sure we pick up
* the handoff.
*/
- if (__mutex_trylock(lock, first))
+ if (__mutex_trylock(lock))
goto acquired;
/*
@@ -683,42 +816,47 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* wait_lock. This ensures the lock cancellation is ordered
* against mutex_unlock() and wake-ups do not go missing.
*/
- if (unlikely(signal_pending_state(state, task))) {
+ if (unlikely(signal_pending_state(state, current))) {
ret = -EINTR;
goto err;
}
- if (use_ww_ctx && ww_ctx->acquired > 0) {
- ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
+ if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) {
+ ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx);
if (ret)
goto err;
}
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
schedule_preempt_disabled();
- if (!first && __mutex_waiter_is_first(lock, &waiter)) {
- first = true;
- __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+ /*
+ * ww_mutex needs to always recheck its position since its waiter
+ * list is not FIFO ordered.
+ */
+ if ((use_ww_ctx && ww_ctx) || !first) {
+ first = __mutex_waiter_is_first(lock, &waiter);
+ if (first)
+ __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
}
- set_task_state(task, state);
+ set_current_state(state);
/*
* Here we order against unlock; we must either see it change
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/
- if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
- __mutex_trylock(lock, first))
+ if (__mutex_trylock(lock) ||
+ (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter)))
break;
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
acquired:
- __set_task_state(task, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
- mutex_remove_waiter(lock, &waiter, task);
+ mutex_remove_waiter(lock, &waiter, current);
if (likely(list_empty(&lock->wait_list)))
__mutex_clear_flag(lock, MUTEX_FLAGS);
@@ -728,30 +866,44 @@ skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
- if (use_ww_ctx)
+ if (use_ww_ctx && ww_ctx)
ww_mutex_set_context_slowpath(ww, ww_ctx);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
preempt_enable();
return 0;
err:
- __set_task_state(task, TASK_RUNNING);
- mutex_remove_waiter(lock, &waiter, task);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ __set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(lock, &waiter, current);
+err_early_backoff:
+ spin_unlock(&lock->wait_lock);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
preempt_enable();
return ret;
}
+static int __sched
+__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+ struct lockdep_map *nest_lock, unsigned long ip)
+{
+ return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
+}
+
+static int __sched
+__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+ struct lockdep_map *nest_lock, unsigned long ip,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true);
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -759,27 +911,21 @@ EXPORT_SYMBOL_GPL(mutex_lock_nested);
void __sched
_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
- might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- 0, nest, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- return __mutex_lock_common(lock, TASK_KILLABLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -824,35 +970,37 @@ ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
}
int __sched
-__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
- ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx, 1);
- if (!ret && ctx->acquired > 1)
+ ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE,
+ 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
+ ctx);
+ if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock);
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
int __sched
-__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
- ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx, 1);
+ ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE,
+ 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
+ ctx);
- if (!ret && ctx->acquired > 1)
+ if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
#endif
@@ -862,8 +1010,8 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
{
struct task_struct *next = NULL;
- unsigned long owner, flags;
DEFINE_WAKE_Q(wake_q);
+ unsigned long owner;
mutex_release(&lock->dep_map, 1, ip);
@@ -880,6 +1028,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+ DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
if (owner & MUTEX_FLAG_HANDOFF)
@@ -897,7 +1046,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
owner = old;
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
debug_mutex_unlock(lock);
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
@@ -914,7 +1063,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
wake_up_q(&wake_q);
}
@@ -977,37 +1126,34 @@ EXPORT_SYMBOL_GPL(mutex_lock_io);
static noinline void __sched
__mutex_lock_slowpath(struct mutex *lock)
{
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
{
- return __mutex_lock_common(lock, TASK_KILLABLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx, 1);
+ return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL,
+ _RET_IP_, ctx);
}
static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
- return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx, 1);
+ return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL,
+ _RET_IP_, ctx);
}
#endif
@@ -1028,7 +1174,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
*/
int __sched mutex_trylock(struct mutex *lock)
{
- bool locked = __mutex_trylock(lock, false);
+ bool locked = __mutex_trylock(lock);
if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
@@ -1039,32 +1185,34 @@ EXPORT_SYMBOL(mutex_trylock);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
-__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
might_sleep();
if (__mutex_trylock_fast(&lock->base)) {
- ww_mutex_set_context_fastpath(lock, ctx);
+ if (ctx)
+ ww_mutex_set_context_fastpath(lock, ctx);
return 0;
}
return __ww_mutex_lock_slowpath(lock, ctx);
}
-EXPORT_SYMBOL(__ww_mutex_lock);
+EXPORT_SYMBOL(ww_mutex_lock);
int __sched
-__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
might_sleep();
if (__mutex_trylock_fast(&lock->base)) {
- ww_mutex_set_context_fastpath(lock, ctx);
+ if (ctx)
+ ww_mutex_set_context_fastpath(lock, ctx);
return 0;
}
return __ww_mutex_lock_interruptible_slowpath(lock, ctx);
}
-EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
+EXPORT_SYMBOL(ww_mutex_lock_interruptible);
#endif
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 4410a4af42a3..6ebc1902f779 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -9,10 +9,6 @@
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
-#define spin_lock_mutex(lock, flags) \
- do { spin_lock(lock); (void)(flags); } while (0)
-#define spin_unlock_mutex(lock, flags) \
- do { spin_unlock(lock); (void)(flags); } while (0)
#define mutex_remove_waiter(lock, waiter, task) \
__list_del((waiter)->list.prev, (waiter)->list.next)
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ce182599cf2e..883cf1b92d90 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,7 +1,6 @@
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
-#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
@@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
- init_waitqueue_head(&sem->writer);
+ rcuwait_init(&sem->writer);
sem->readers_block = 0;
return 0;
}
@@ -103,7 +102,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
__this_cpu_dec(*sem->read_count);
/* Prod writer to recheck readers_active */
- wake_up(&sem->writer);
+ rcuwait_wake_up(&sem->writer);
}
EXPORT_SYMBOL_GPL(__percpu_up_read);
@@ -160,7 +159,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
*/
/* Wait for all now active readers to complete. */
- wait_event(sem->writer, readers_active_check(sem));
+ rcuwait_wait_event(&sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index e3b5520005db..e6b2f7ad3e51 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -263,7 +263,7 @@ pv_wait_early(struct pv_node *prev, int loop)
if ((loop & PV_PREV_CHECK_MASK) != 0)
return false;
- return READ_ONCE(prev->state) != vcpu_running;
+ return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu);
}
/*
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 2f443ed2320a..d340be3a488f 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1179,7 +1179,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
* TASK_INTERRUPTIBLE checks for signals and
* timeout. Ignored otherwise.
*/
- if (unlikely(state == TASK_INTERRUPTIBLE)) {
+ if (likely(state == TASK_INTERRUPTIBLE)) {
/* Signal pending? */
if (signal_pending(current))
ret = -EINTR;
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 1591f6b3539f..5eacab880f67 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -128,7 +128,6 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
void __sched __down_read(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
- struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
@@ -140,13 +139,12 @@ void __sched __down_read(struct rw_semaphore *sem)
goto out;
}
- tsk = current;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
/* set up my own style of waitqueue */
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
- get_task_struct(tsk);
+ get_task_struct(current);
list_add_tail(&waiter.list, &sem->wait_list);
@@ -158,10 +156,10 @@ void __sched __down_read(struct rw_semaphore *sem)
if (!waiter.task)
break;
schedule();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
out:
;
}
@@ -194,15 +192,13 @@ int __down_read_trylock(struct rw_semaphore *sem)
int __sched __down_write_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
- struct task_struct *tsk;
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
- tsk = current;
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
@@ -220,7 +216,7 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
ret = -EINTR;
goto out;
}
- set_task_state(tsk, state);
+ set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 631506004f9e..2ad8d8dc3bb1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -224,10 +224,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
- struct task_struct *tsk = current;
DEFINE_WAKE_Q(wake_q);
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
@@ -254,13 +253,13 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
/* wait to be given the lock */
while (true) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!waiter.task)
break;
schedule();
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
@@ -503,8 +502,6 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
* wake any read locks that were queued ahead of us.
*/
if (count > RWSEM_WAITING_BIAS) {
- DEFINE_WAKE_Q(wake_q);
-
__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
/*
* The wakeup is normally called _after_ the wait_lock
@@ -514,6 +511,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
* for attempting rwsem_try_write_lock().
*/
wake_up_q(&wake_q);
+
+ /*
+ * Reinitialize wake_q after use.
+ */
+ wake_q_init(&wake_q);
}
} else
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index b8120abe594b..9512e37637dc 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -204,19 +204,18 @@ struct semaphore_waiter {
static inline int __sched __down_common(struct semaphore *sem, long state,
long timeout)
{
- struct task_struct *task = current;
struct semaphore_waiter waiter;
list_add_tail(&waiter.list, &sem->wait_list);
- waiter.task = task;
+ waiter.task = current;
waiter.up = false;
for (;;) {
- if (signal_pending_state(state, task))
+ if (signal_pending_state(state, current))
goto interrupted;
if (unlikely(timeout <= 0))
goto timed_out;
- __set_task_state(task, state);
+ __set_current_state(state);
raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->lock);
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index db3ccb1dd614..4b082b5cac9e 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -363,14 +363,6 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_raw_spin_lock_nested);
-void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
-{
- __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
- spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
-}
-EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
-
unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
int subclass)
{
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a596cffa..9aa0fccd5d43 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -103,38 +103,14 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
lock->owner_cpu = -1;
}
-static void __spin_lock_debug(raw_spinlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
-
- for (i = 0; i < loops; i++) {
- if (arch_spin_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- spin_dump(lock, "lockup suspected");
-#ifdef CONFIG_SMP
- trigger_all_cpu_backtrace();
-#endif
-
- /*
- * The trylock above was causing a livelock. Give the lower level arch
- * specific lock code a chance to acquire the lock. We have already
- * printed a warning/backtrace at this point. The non-debug arch
- * specific code might actually succeed in acquiring the lock. If it is
- * not successful, the end-result is the same - there is no forward
- * progress.
- */
- arch_spin_lock(&lock->raw_lock);
-}
-
+/*
+ * We are now relying on the NMI watchdog to detect lockup instead of doing
+ * the detection here with an unfair lock which can cause problem of its own.
+ */
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);
- if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
- __spin_lock_debug(lock);
+ arch_spin_lock(&lock->raw_lock);
debug_spin_lock_after(lock);
}
@@ -172,32 +148,6 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
-#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
-static void __read_lock_debug(rwlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
- int print_once = 1;
-
- for (;;) {
- for (i = 0; i < loops; i++) {
- if (arch_read_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- if (print_once) {
- print_once = 0;
- printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
- "%s/%d, %p\n",
- raw_smp_processor_id(), current->comm,
- current->pid, lock);
- dump_stack();
- }
- }
-}
-#endif
-
void do_raw_read_lock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
@@ -247,32 +197,6 @@ static inline void debug_write_unlock(rwlock_t *lock)
lock->owner_cpu = -1;
}
-#if 0 /* This can cause lockups */
-static void __write_lock_debug(rwlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
- int print_once = 1;
-
- for (;;) {
- for (i = 0; i < loops; i++) {
- if (arch_write_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- if (print_once) {
- print_once = 0;
- printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
- "%s/%d, %p\n",
- raw_smp_processor_id(), current->comm,
- current->pid, lock);
- dump_stack();
- }
- }
-}
-#endif
-
void do_raw_write_lock(rwlock_t *lock)
{
debug_write_lock_before(lock);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
new file mode 100644
index 000000000000..da6c9a34f62f
--- /dev/null
+++ b/kernel/locking/test-ww_mutex.c
@@ -0,0 +1,646 @@
+/*
+ * Module-based API test facility for ww_mutexes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ */
+
+#include <linux/kernel.h>
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ww_mutex.h>
+
+static DEFINE_WW_CLASS(ww_class);
+struct workqueue_struct *wq;
+
+struct test_mutex {
+ struct work_struct work;
+ struct ww_mutex mutex;
+ struct completion ready, go, done;
+ unsigned int flags;
+};
+
+#define TEST_MTX_SPIN BIT(0)
+#define TEST_MTX_TRY BIT(1)
+#define TEST_MTX_CTX BIT(2)
+#define __TEST_MTX_LAST BIT(3)
+
+static void test_mutex_work(struct work_struct *work)
+{
+ struct test_mutex *mtx = container_of(work, typeof(*mtx), work);
+
+ complete(&mtx->ready);
+ wait_for_completion(&mtx->go);
+
+ if (mtx->flags & TEST_MTX_TRY) {
+ while (!ww_mutex_trylock(&mtx->mutex))
+ cpu_relax();
+ } else {
+ ww_mutex_lock(&mtx->mutex, NULL);
+ }
+ complete(&mtx->done);
+ ww_mutex_unlock(&mtx->mutex);
+}
+
+static int __test_mutex(unsigned int flags)
+{
+#define TIMEOUT (HZ / 16)
+ struct test_mutex mtx;
+ struct ww_acquire_ctx ctx;
+ int ret;
+
+ ww_mutex_init(&mtx.mutex, &ww_class);
+ ww_acquire_init(&ctx, &ww_class);
+
+ INIT_WORK_ONSTACK(&mtx.work, test_mutex_work);
+ init_completion(&mtx.ready);
+ init_completion(&mtx.go);
+ init_completion(&mtx.done);
+ mtx.flags = flags;
+
+ schedule_work(&mtx.work);
+
+ wait_for_completion(&mtx.ready);
+ ww_mutex_lock(&mtx.mutex, (flags & TEST_MTX_CTX) ? &ctx : NULL);
+ complete(&mtx.go);
+ if (flags & TEST_MTX_SPIN) {
+ unsigned long timeout = jiffies + TIMEOUT;
+
+ ret = 0;
+ do {
+ if (completion_done(&mtx.done)) {
+ ret = -EINVAL;
+ break;
+ }
+ cpu_relax();
+ } while (time_before(jiffies, timeout));
+ } else {
+ ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);
+ }
+ ww_mutex_unlock(&mtx.mutex);
+ ww_acquire_fini(&ctx);
+
+ if (ret) {
+ pr_err("%s(flags=%x): mutual exclusion failure\n",
+ __func__, flags);
+ ret = -EINVAL;
+ }
+
+ flush_work(&mtx.work);
+ destroy_work_on_stack(&mtx.work);
+ return ret;
+#undef TIMEOUT
+}
+
+static int test_mutex(void)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < __TEST_MTX_LAST; i++) {
+ ret = __test_mutex(i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int test_aa(void)
+{
+ struct ww_mutex mutex;
+ struct ww_acquire_ctx ctx;
+ int ret;
+
+ ww_mutex_init(&mutex, &ww_class);
+ ww_acquire_init(&ctx, &ww_class);
+
+ ww_mutex_lock(&mutex, &ctx);
+
+ if (ww_mutex_trylock(&mutex)) {
+ pr_err("%s: trylocked itself!\n", __func__);
+ ww_mutex_unlock(&mutex);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ww_mutex_lock(&mutex, &ctx);
+ if (ret != -EALREADY) {
+ pr_err("%s: missed deadlock for recursing, ret=%d\n",
+ __func__, ret);
+ if (!ret)
+ ww_mutex_unlock(&mutex);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ ww_mutex_unlock(&mutex);
+ ww_acquire_fini(&ctx);
+ return ret;
+}
+
+struct test_abba {
+ struct work_struct work;
+ struct ww_mutex a_mutex;
+ struct ww_mutex b_mutex;
+ struct completion a_ready;
+ struct completion b_ready;
+ bool resolve;
+ int result;
+};
+
+static void test_abba_work(struct work_struct *work)
+{
+ struct test_abba *abba = container_of(work, typeof(*abba), work);
+ struct ww_acquire_ctx ctx;
+ int err;
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&abba->b_mutex, &ctx);
+
+ complete(&abba->b_ready);
+ wait_for_completion(&abba->a_ready);
+
+ err = ww_mutex_lock(&abba->a_mutex, &ctx);
+ if (abba->resolve && err == -EDEADLK) {
+ ww_mutex_unlock(&abba->b_mutex);
+ ww_mutex_lock_slow(&abba->a_mutex, &ctx);
+ err = ww_mutex_lock(&abba->b_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(&abba->a_mutex);
+ ww_mutex_unlock(&abba->b_mutex);
+ ww_acquire_fini(&ctx);
+
+ abba->result = err;
+}
+
+static int test_abba(bool resolve)
+{
+ struct test_abba abba;
+ struct ww_acquire_ctx ctx;
+ int err, ret;
+
+ ww_mutex_init(&abba.a_mutex, &ww_class);
+ ww_mutex_init(&abba.b_mutex, &ww_class);
+ INIT_WORK_ONSTACK(&abba.work, test_abba_work);
+ init_completion(&abba.a_ready);
+ init_completion(&abba.b_ready);
+ abba.resolve = resolve;
+
+ schedule_work(&abba.work);
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&abba.a_mutex, &ctx);
+
+ complete(&abba.a_ready);
+ wait_for_completion(&abba.b_ready);
+
+ err = ww_mutex_lock(&abba.b_mutex, &ctx);
+ if (resolve && err == -EDEADLK) {
+ ww_mutex_unlock(&abba.a_mutex);
+ ww_mutex_lock_slow(&abba.b_mutex, &ctx);
+ err = ww_mutex_lock(&abba.a_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(&abba.b_mutex);
+ ww_mutex_unlock(&abba.a_mutex);
+ ww_acquire_fini(&ctx);
+
+ flush_work(&abba.work);
+ destroy_work_on_stack(&abba.work);
+
+ ret = 0;
+ if (resolve) {
+ if (err || abba.result) {
+ pr_err("%s: failed to resolve ABBA deadlock, A err=%d, B err=%d\n",
+ __func__, err, abba.result);
+ ret = -EINVAL;
+ }
+ } else {
+ if (err != -EDEADLK && abba.result != -EDEADLK) {
+ pr_err("%s: missed ABBA deadlock, A err=%d, B err=%d\n",
+ __func__, err, abba.result);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
+struct test_cycle {
+ struct work_struct work;
+ struct ww_mutex a_mutex;
+ struct ww_mutex *b_mutex;
+ struct completion *a_signal;
+ struct completion b_signal;
+ int result;
+};
+
+static void test_cycle_work(struct work_struct *work)
+{
+ struct test_cycle *cycle = container_of(work, typeof(*cycle), work);
+ struct ww_acquire_ctx ctx;
+ int err;
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&cycle->a_mutex, &ctx);
+
+ complete(cycle->a_signal);
+ wait_for_completion(&cycle->b_signal);
+
+ err = ww_mutex_lock(cycle->b_mutex, &ctx);
+ if (err == -EDEADLK) {
+ ww_mutex_unlock(&cycle->a_mutex);
+ ww_mutex_lock_slow(cycle->b_mutex, &ctx);
+ err = ww_mutex_lock(&cycle->a_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(cycle->b_mutex);
+ ww_mutex_unlock(&cycle->a_mutex);
+ ww_acquire_fini(&ctx);
+
+ cycle->result = err;
+}
+
+static int __test_cycle(unsigned int nthreads)
+{
+ struct test_cycle *cycles;
+ unsigned int n, last = nthreads - 1;
+ int ret;
+
+ cycles = kmalloc_array(nthreads, sizeof(*cycles), GFP_KERNEL);
+ if (!cycles)
+ return -ENOMEM;
+
+ for (n = 0; n < nthreads; n++) {
+ struct test_cycle *cycle = &cycles[n];
+
+ ww_mutex_init(&cycle->a_mutex, &ww_class);
+ if (n == last)
+ cycle->b_mutex = &cycles[0].a_mutex;
+ else
+ cycle->b_mutex = &cycles[n + 1].a_mutex;
+
+ if (n == 0)
+ cycle->a_signal = &cycles[last].b_signal;
+ else
+ cycle->a_signal = &cycles[n - 1].b_signal;
+ init_completion(&cycle->b_signal);
+
+ INIT_WORK(&cycle->work, test_cycle_work);
+ cycle->result = 0;
+ }
+
+ for (n = 0; n < nthreads; n++)
+ queue_work(wq, &cycles[n].work);
+
+ flush_workqueue(wq);
+
+ ret = 0;
+ for (n = 0; n < nthreads; n++) {
+ struct test_cycle *cycle = &cycles[n];
+
+ if (!cycle->result)
+ continue;
+
+ pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n",
+ n, nthreads, cycle->result);
+ ret = -EINVAL;
+ break;
+ }
+
+ for (n = 0; n < nthreads; n++)
+ ww_mutex_destroy(&cycles[n].a_mutex);
+ kfree(cycles);
+ return ret;
+}
+
+static int test_cycle(unsigned int ncpus)
+{
+ unsigned int n;
+ int ret;
+
+ for (n = 2; n <= ncpus + 1; n++) {
+ ret = __test_cycle(n);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct stress {
+ struct work_struct work;
+ struct ww_mutex *locks;
+ int nlocks;
+ int nloops;
+};
+
+static int *get_random_order(int count)
+{
+ int *order;
+ int n, r, tmp;
+
+ order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY);
+ if (!order)
+ return order;
+
+ for (n = 0; n < count; n++)
+ order[n] = n;
+
+ for (n = count - 1; n > 1; n--) {
+ r = get_random_int() % (n + 1);
+ if (r != n) {
+ tmp = order[n];
+ order[n] = order[r];
+ order[r] = tmp;
+ }
+ }
+
+ return order;
+}
+
+static void dummy_load(struct stress *stress)
+{
+ usleep_range(1000, 2000);
+}
+
+static void stress_inorder_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ const int nlocks = stress->nlocks;
+ struct ww_mutex *locks = stress->locks;
+ struct ww_acquire_ctx ctx;
+ int *order;
+
+ order = get_random_order(nlocks);
+ if (!order)
+ return;
+
+ ww_acquire_init(&ctx, &ww_class);
+
+ do {
+ int contended = -1;
+ int n, err;
+
+retry:
+ err = 0;
+ for (n = 0; n < nlocks; n++) {
+ if (n == contended)
+ continue;
+
+ err = ww_mutex_lock(&locks[order[n]], &ctx);
+ if (err < 0)
+ break;
+ }
+ if (!err)
+ dummy_load(stress);
+
+ if (contended > n)
+ ww_mutex_unlock(&locks[order[contended]]);
+ contended = n;
+ while (n--)
+ ww_mutex_unlock(&locks[order[n]]);
+
+ if (err == -EDEADLK) {
+ ww_mutex_lock_slow(&locks[order[contended]], &ctx);
+ goto retry;
+ }
+
+ if (err) {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+ } while (--stress->nloops);
+
+ ww_acquire_fini(&ctx);
+
+ kfree(order);
+ kfree(stress);
+}
+
+struct reorder_lock {
+ struct list_head link;
+ struct ww_mutex *lock;
+};
+
+static void stress_reorder_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ LIST_HEAD(locks);
+ struct ww_acquire_ctx ctx;
+ struct reorder_lock *ll, *ln;
+ int *order;
+ int n, err;
+
+ order = get_random_order(stress->nlocks);
+ if (!order)
+ return;
+
+ for (n = 0; n < stress->nlocks; n++) {
+ ll = kmalloc(sizeof(*ll), GFP_KERNEL);
+ if (!ll)
+ goto out;
+
+ ll->lock = &stress->locks[order[n]];
+ list_add(&ll->link, &locks);
+ }
+ kfree(order);
+ order = NULL;
+
+ ww_acquire_init(&ctx, &ww_class);
+
+ do {
+ list_for_each_entry(ll, &locks, link) {
+ err = ww_mutex_lock(ll->lock, &ctx);
+ if (!err)
+ continue;
+
+ ln = ll;
+ list_for_each_entry_continue_reverse(ln, &locks, link)
+ ww_mutex_unlock(ln->lock);
+
+ if (err != -EDEADLK) {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+
+ ww_mutex_lock_slow(ll->lock, &ctx);
+ list_move(&ll->link, &locks); /* restarts iteration */
+ }
+
+ dummy_load(stress);
+ list_for_each_entry(ll, &locks, link)
+ ww_mutex_unlock(ll->lock);
+ } while (--stress->nloops);
+
+ ww_acquire_fini(&ctx);
+
+out:
+ list_for_each_entry_safe(ll, ln, &locks, link)
+ kfree(ll);
+ kfree(order);
+ kfree(stress);
+}
+
+static void stress_one_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ const int nlocks = stress->nlocks;
+ struct ww_mutex *lock = stress->locks + (get_random_int() % nlocks);
+ int err;
+
+ do {
+ err = ww_mutex_lock(lock, NULL);
+ if (!err) {
+ dummy_load(stress);
+ ww_mutex_unlock(lock);
+ } else {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+ } while (--stress->nloops);
+
+ kfree(stress);
+}
+
+#define STRESS_INORDER BIT(0)
+#define STRESS_REORDER BIT(1)
+#define STRESS_ONE BIT(2)
+#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE)
+
+static int stress(int nlocks, int nthreads, int nloops, unsigned int flags)
+{
+ struct ww_mutex *locks;
+ int n;
+
+ locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL);
+ if (!locks)
+ return -ENOMEM;
+
+ for (n = 0; n < nlocks; n++)
+ ww_mutex_init(&locks[n], &ww_class);
+
+ for (n = 0; nthreads; n++) {
+ struct stress *stress;
+ void (*fn)(struct work_struct *work);
+
+ fn = NULL;
+ switch (n & 3) {
+ case 0:
+ if (flags & STRESS_INORDER)
+ fn = stress_inorder_work;
+ break;
+ case 1:
+ if (flags & STRESS_REORDER)
+ fn = stress_reorder_work;
+ break;
+ case 2:
+ if (flags & STRESS_ONE)
+ fn = stress_one_work;
+ break;
+ }
+
+ if (!fn)
+ continue;
+
+ stress = kmalloc(sizeof(*stress), GFP_KERNEL);
+ if (!stress)
+ break;
+
+ INIT_WORK(&stress->work, fn);
+ stress->locks = locks;
+ stress->nlocks = nlocks;
+ stress->nloops = nloops;
+
+ queue_work(wq, &stress->work);
+ nthreads--;
+ }
+
+ flush_workqueue(wq);
+
+ for (n = 0; n < nlocks; n++)
+ ww_mutex_destroy(&locks[n]);
+ kfree(locks);
+
+ return 0;
+}
+
+static int __init test_ww_mutex_init(void)
+{
+ int ncpus = num_online_cpus();
+ int ret;
+
+ wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
+
+ ret = test_mutex();
+ if (ret)
+ return ret;
+
+ ret = test_aa();
+ if (ret)
+ return ret;
+
+ ret = test_abba(false);
+ if (ret)
+ return ret;
+
+ ret = test_abba(true);
+ if (ret)
+ return ret;
+
+ ret = test_cycle(ncpus);
+ if (ret)
+ return ret;
+
+ ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER);
+ if (ret)
+ return ret;
+
+ ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER);
+ if (ret)
+ return ret;
+
+ ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void __exit test_ww_mutex_exit(void)
+{
+ destroy_workqueue(wq);
+}
+
+module_init(test_ww_mutex_init);
+module_exit(test_ww_mutex_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/kernel/pid.c b/kernel/pid.c
index f66162f2359b..0291804151b5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -68,9 +68,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
* the scheme scales to up to 4 million PIDs, runtime.
*/
struct pid_namespace init_pid_ns = {
- .kref = {
- .refcount = ATOMIC_INIT(2),
- },
+ .kref = KREF_INIT(2),
.pidmap = {
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
},
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 07c89e4b5d60..acedbe626d47 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -716,6 +716,19 @@ source "lib/Kconfig.kmemcheck"
source "lib/Kconfig.kasan"
+config DEBUG_REFCOUNT
+ bool "Verbose refcount checks"
+ help
+ Say Y here if you want reference counters (refcount_t and kref) to
+ generate WARNs on dubious usage. Without this refcount_t will still
+ be a saturating counter and avoid Use-After-Free by turning it into
+ a resource leak Denial-Of-Service.
+
+ Use of this option will increase kernel text size but will alert the
+ admin of potential abuse.
+
+ If in doubt, say "N".
+
endmenu # "Memory Debugging"
config ARCH_HAS_KCOV
@@ -1166,6 +1179,18 @@ config LOCK_TORTURE_TEST
Say M if you want these torture tests to build as a module.
Say N if you are unsure.
+config WW_MUTEX_SELFTEST
+ tristate "Wait/wound mutex selftests"
+ help
+ This option provides a kernel module that runs tests on the
+ on the struct ww_mutex locking API.
+
+ It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction
+ with this test harness.
+
+ Say M if you want these self tests to build as a module.
+ Say N if you are unsure.
+
endmenu # lock debugging
config TRACE_IRQFLAGS
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 1904a93f47d5..d491529332f4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -920,7 +920,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
BT_DBG("dev %p removing %speer %p", dev,
last ? "last " : "1 ", peer);
BT_DBG("chan %p orig refcnt %d", chan,
- atomic_read(&chan->kref.refcount));
+ kref_read(&chan->kref));
l2cap_chan_put(chan);
break;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 5f123c3320a7..f0095fd79818 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -810,7 +810,7 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked)
/* AMP Manager functions */
struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr)
{
- BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount));
+ BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref));
kref_get(&mgr->kref);
@@ -833,7 +833,7 @@ static void amp_mgr_destroy(struct kref *kref)
int amp_mgr_put(struct amp_mgr *mgr)
{
- BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount));
+ BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref));
return kref_put(&mgr->kref, &amp_mgr_destroy);
}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index e32f34189007..02a4ccc04e1e 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -24,7 +24,7 @@
void amp_ctrl_get(struct amp_ctrl *ctrl)
{
BT_DBG("ctrl %p orig refcnt %d", ctrl,
- atomic_read(&ctrl->kref.refcount));
+ kref_read(&ctrl->kref));
kref_get(&ctrl->kref);
}
@@ -42,7 +42,7 @@ static void amp_ctrl_destroy(struct kref *kref)
int amp_ctrl_put(struct amp_ctrl *ctrl)
{
BT_DBG("ctrl %p orig refcnt %d", ctrl,
- atomic_read(&ctrl->kref.refcount));
+ kref_read(&ctrl->kref));
return kref_put(&ctrl->kref, &amp_ctrl_destroy);
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ce0b5dd01953..fc7f321a3823 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -481,14 +481,14 @@ static void l2cap_chan_destroy(struct kref *kref)
void l2cap_chan_hold(struct l2cap_chan *c)
{
- BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
+ BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
kref_get(&c->kref);
}
void l2cap_chan_put(struct l2cap_chan *c)
{
- BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
+ BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
kref_put(&c->kref, l2cap_chan_destroy);
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 770c52701efa..bad3d4ae43f6 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3425,7 +3425,7 @@ static void ceph_msg_release(struct kref *kref)
struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
{
dout("%s %p (was %d)\n", __func__, msg,
- atomic_read(&msg->kref.refcount));
+ kref_read(&msg->kref));
kref_get(&msg->kref);
return msg;
}
@@ -3434,7 +3434,7 @@ EXPORT_SYMBOL(ceph_msg_get);
void ceph_msg_put(struct ceph_msg *msg)
{
dout("%s %p (was %d)\n", __func__, msg,
- atomic_read(&msg->kref.refcount));
+ kref_read(&msg->kref));
kref_put(&msg->kref, ceph_msg_release);
}
EXPORT_SYMBOL(ceph_msg_put);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049abb86..f3378ba1a828 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -438,7 +438,7 @@ static void ceph_osdc_release_request(struct kref *kref)
void ceph_osdc_get_request(struct ceph_osd_request *req)
{
dout("%s %p (was %d)\n", __func__, req,
- atomic_read(&req->r_kref.refcount));
+ kref_read(&req->r_kref));
kref_get(&req->r_kref);
}
EXPORT_SYMBOL(ceph_osdc_get_request);
@@ -447,7 +447,7 @@ void ceph_osdc_put_request(struct ceph_osd_request *req)
{
if (req) {
dout("%s %p (was %d)\n", __func__, req,
- atomic_read(&req->r_kref.refcount));
+ kref_read(&req->r_kref));
kref_put(&req->r_kref, ceph_osdc_release_request);
}
}
@@ -487,11 +487,11 @@ static void request_reinit(struct ceph_osd_request *req)
struct ceph_msg *reply_msg = req->r_reply;
dout("%s req %p\n", __func__, req);
- WARN_ON(atomic_read(&req->r_kref.refcount) != 1);
+ WARN_ON(kref_read(&req->r_kref) != 1);
request_release_checks(req);
- WARN_ON(atomic_read(&request_msg->kref.refcount) != 1);
- WARN_ON(atomic_read(&reply_msg->kref.refcount) != 1);
+ WARN_ON(kref_read(&request_msg->kref) != 1);
+ WARN_ON(kref_read(&reply_msg->kref) != 1);
target_destroy(&req->r_t);
request_init(req);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8147e8d56eb2..f39e3e11f9aa 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1358,7 +1358,7 @@ static int c_show(struct seq_file *m, void *p)
ifdebug(CACHE)
seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
convert_to_wallclock(cp->expiry_time),
- atomic_read(&cp->ref.refcount), cp->flags);
+ kref_read(&cp->ref), cp->flags);
cache_get(cp);
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9c9db55a0c1e..7bfe1fb42add 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -490,7 +490,7 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
svc_xprt_get(xprt);
dprintk("svc: transport %p dequeued, inuse=%d\n",
- xprt, atomic_read(&xprt->xpt_ref.refcount));
+ xprt, kref_read(&xprt->xpt_ref));
}
spin_unlock_bh(&pool->sp_lock);
out:
@@ -822,7 +822,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
- atomic_read(&xprt->xpt_ref.refcount));
+ kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp);
@@ -980,7 +980,7 @@ static void svc_age_temp_xprts(unsigned long closure)
* through, close it. */
if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
continue;
- if (atomic_read(&xprt->xpt_ref.refcount) > 1 ||
+ if (kref_read(&xprt->xpt_ref) > 1 ||
test_bit(XPT_BUSY, &xprt->xpt_flags))
continue;
list_del_init(le);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index e112da8005b5..bb8db3cb8032 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -126,13 +126,18 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
static struct hlist_head auth_domain_table[DN_HASHMAX];
static DEFINE_SPINLOCK(auth_domain_lock);
+static void auth_domain_release(struct kref *kref)
+{
+ struct auth_domain *dom = container_of(kref, struct auth_domain, ref);
+
+ hlist_del(&dom->hash);
+ dom->flavour->domain_release(dom);
+ spin_unlock(&auth_domain_lock);
+}
+
void auth_domain_put(struct auth_domain *dom)
{
- if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
- hlist_del(&dom->hash);
- dom->flavour->domain_release(dom);
- spin_unlock(&auth_domain_lock);
- }
+ kref_put_lock(&dom->ref, auth_domain_release, &auth_domain_lock);
}
EXPORT_SYMBOL_GPL(auth_domain_put);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ca2799af05a6..39652d390a9c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1201,9 +1201,9 @@ static void __svc_rdma_free(struct work_struct *work)
ib_drain_qp(rdma->sc_qp);
/* We should only be called from kref_put */
- if (atomic_read(&xprt->xpt_ref.refcount) != 0)
+ if (kref_read(&xprt->xpt_ref) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- atomic_read(&xprt->xpt_ref.refcount));
+ kref_read(&xprt->xpt_ref));
/*
* Destroy queued, but not processed read completions. Note
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 5d721e990876..f067be814626 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -78,12 +78,6 @@ static inline void *kvzalloc(size_t size)
return __aa_kvmalloc(size, __GFP_ZERO);
}
-/* returns 0 if kref not incremented */
-static inline int kref_get_not0(struct kref *kref)
-{
- return atomic_inc_not_zero(&kref->refcount);
-}
-
/**
* aa_strneq - compare null terminated @str to a non null terminated substring
* @str: a null terminated string
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 52275f040a5f..46467aaa557b 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -287,7 +287,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
*/
static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p)
{
- if (p && kref_get_not0(&p->count))
+ if (p && kref_get_unless_zero(&p->count))
return p;
return NULL;
@@ -307,7 +307,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
rcu_read_lock();
do {
c = rcu_dereference(*p);
- } while (c && !kref_get_not0(&c->count));
+ } while (c && !kref_get_unless_zero(&c->count));
rcu_read_unlock();
return c;
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
new file mode 100644
index 000000000000..6905da965f3b
--- /dev/null
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+
+if /sbin/modprobe -q test-ww_mutex; then
+ /sbin/modprobe -q -r test-ww_mutex
+ echo "locking/ww_mutex: ok"
+else
+ echo "locking/ww_mutex: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
index b9611c523723..41bae5824339 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -4,3 +4,4 @@ LOCK03
LOCK04
LOCK05
LOCK06
+LOCK07
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
new file mode 100644
index 000000000000..97dadd1a9e45
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
@@ -0,0 +1 @@
+locktorture.torture_type=ww_mutex_lock