aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds2017-02-21 10:57:33 -0800
committerLinus Torvalds2017-02-21 10:57:33 -0800
commit772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch)
treed2b34e8f1841a169d59adf53074de217a9e0f977 /lib
parentfd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff)
parent818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff)
Merge tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - blk-mq scheduling framework from me and Omar, with a port of the deadline scheduler for this framework. A port of BFQ from Paolo is in the works, and should be ready for 4.12. - Various fixups and improvements to the above scheduling framework from Omar, Paolo, Bart, me, others. - Cleanup of the exported sysfs blk-mq data into debugfs, from Omar. This allows us to export more information that helps debug hangs or performance issues, without cluttering or abusing the sysfs API. - Fixes for the sbitmap code, the scalable bitmap code that was migrated from blk-mq, from Omar. - Removal of the BLOCK_PC support in struct request, and refactoring of carrying SCSI payloads in the block layer. This cleans up the code nicely, and enables us to kill the SCSI specific parts of struct request, shrinking it down nicely. From Christoph mainly, with help from Hannes. - Support for ranged discard requests and discard merging, also from Christoph. - Support for OPAL in the block layer, and for NVMe as well. Mainly from Scott Bauer, with fixes/updates from various others folks. - Error code fixup for gdrom from Christophe. - cciss pci irq allocation cleanup from Christoph. - Making the cdrom device operations read only, from Kees Cook. - Fixes for duplicate bdi registrations and bdi/queue life time problems from Jan and Dan. - Set of fixes and updates for lightnvm, from Matias and Javier. - A few fixes for nbd from Josef, using idr to name devices and a workqueue deadlock fix on receive. Also marks Josef as the current maintainer of nbd. - Fix from Josef, overwriting queue settings when the number of hardware queues is updated for a blk-mq device. - NVMe fix from Keith, ensuring that we don't repeatedly mark and IO aborted, if we didn't end up aborting it. - SG gap merging fix from Ming Lei for block. - Loop fix also from Ming, fixing a race and crash between setting loop status and IO. - Two block race fixes from Tahsin, fixing request list iteration and fixing a race between device registration and udev device add notifiations. - Double free fix from cgroup writeback, from Tejun. - Another double free fix in blkcg, from Hou Tao. - Partition overflow fix for EFI from Alden Tondettar. * tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block: (156 commits) nvme: Check for Security send/recv support before issuing commands. block/sed-opal: allocate struct opal_dev dynamically block/sed-opal: tone down not supported warnings block: don't defer flushes on blk-mq + scheduling blk-mq-sched: ask scheduler for work, if we failed dispatching leftovers blk-mq: don't special case flush inserts for blk-mq-sched blk-mq-sched: don't add flushes to the head of requeue queue blk-mq: have blk_mq_dispatch_rq_list() return if we queued IO or not block: do not allow updates through sysfs until registration completes lightnvm: set default lun range when no luns are specified lightnvm: fix off-by-one error on target initialization Maintainers: Modify SED list from nvme to block Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN uapi: sed-opal fix IOW for activate lsp to use correct struct cdrom: Make device operations read-only elevator: fix loading wrong elevator type for blk-mq devices cciss: switch to pci_irq_alloc_vectors block/loop: fix race between I/O and set_status blk-mq-sched: don't hold queue_lock when calling exit_icq block: set make_request_fn manually in blk_mq_update_nr_hw_queues ...
Diffstat (limited to 'lib')
-rw-r--r--lib/sbitmap.c139
1 files changed, 131 insertions, 8 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2cecf05c82fd..55e11c4b2f3b 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -17,6 +17,7 @@
#include <linux/random.h>
#include <linux/sbitmap.h>
+#include <linux/seq_file.h>
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
gfp_t flags, int node)
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb)
}
EXPORT_SYMBOL_GPL(sbitmap_weight);
+void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
+{
+ seq_printf(m, "depth=%u\n", sb->depth);
+ seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
+ seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
+ seq_printf(m, "map_nr=%u\n", sb->map_nr);
+}
+EXPORT_SYMBOL_GPL(sbitmap_show);
+
+static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
+{
+ if ((offset & 0xf) == 0) {
+ if (offset != 0)
+ seq_putc(m, '\n');
+ seq_printf(m, "%08x:", offset);
+ }
+ if ((offset & 0x1) == 0)
+ seq_putc(m, ' ');
+ seq_printf(m, "%02x", byte);
+}
+
+void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
+{
+ u8 byte = 0;
+ unsigned int byte_bits = 0;
+ unsigned int offset = 0;
+ int i;
+
+ for (i = 0; i < sb->map_nr; i++) {
+ unsigned long word = READ_ONCE(sb->map[i].word);
+ unsigned int word_bits = READ_ONCE(sb->map[i].depth);
+
+ while (word_bits > 0) {
+ unsigned int bits = min(8 - byte_bits, word_bits);
+
+ byte |= (word & (BIT(bits) - 1)) << byte_bits;
+ byte_bits += bits;
+ if (byte_bits == 8) {
+ emit_byte(m, offset, byte);
+ byte = 0;
+ byte_bits = 0;
+ offset++;
+ }
+ word >>= bits;
+ word_bits -= bits;
+ }
+ }
+ if (byte_bits) {
+ emit_byte(m, offset, byte);
+ offset++;
+ }
+ if (offset)
+ seq_putc(m, '\n');
+}
+EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
+
static unsigned int sbq_calc_wake_batch(unsigned int depth)
{
unsigned int wake_batch;
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
- sbq->wake_batch = sbq_calc_wake_batch(depth);
+ unsigned int wake_batch = sbq_calc_wake_batch(depth);
+ int i;
+
+ if (sbq->wake_batch != wake_batch) {
+ WRITE_ONCE(sbq->wake_batch, wake_batch);
+ /*
+ * Pairs with the memory barrier in sbq_wake_up() to ensure that
+ * the batch size is updated before the wait counts.
+ */
+ smp_mb__before_atomic();
+ for (i = 0; i < SBQ_WAIT_QUEUES; i++)
+ atomic_set(&sbq->ws[i].wait_cnt, 1);
+ }
sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
static void sbq_wake_up(struct sbitmap_queue *sbq)
{
struct sbq_wait_state *ws;
+ unsigned int wake_batch;
int wait_cnt;
- /* Ensure that the wait list checks occur after clear_bit(). */
- smp_mb();
+ /*
+ * Pairs with the memory barrier in set_current_state() to ensure the
+ * proper ordering of clear_bit()/waitqueue_active() in the waker and
+ * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
+ * the comment on waitqueue_active(). This is __after_atomic because we
+ * just did clear_bit() in the caller.
+ */
+ smp_mb__after_atomic();
ws = sbq_wake_ptr(sbq);
if (!ws)
return;
wait_cnt = atomic_dec_return(&ws->wait_cnt);
- if (unlikely(wait_cnt < 0))
- wait_cnt = atomic_inc_return(&ws->wait_cnt);
- if (wait_cnt == 0) {
- atomic_add(sbq->wake_batch, &ws->wait_cnt);
+ if (wait_cnt <= 0) {
+ wake_batch = READ_ONCE(sbq->wake_batch);
+ /*
+ * Pairs with the memory barrier in sbitmap_queue_resize() to
+ * ensure that we see the batch size update before the wait
+ * count is reset.
+ */
+ smp_mb__before_atomic();
+ /*
+ * If there are concurrent callers to sbq_wake_up(), the last
+ * one to decrement the wait count below zero will bump it back
+ * up. If there is a concurrent resize, the count reset will
+ * either cause the cmpxchg to fail or overwrite after the
+ * cmpxchg.
+ */
+ atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
sbq_index_atomic_inc(&sbq->wake_index);
wake_up(&ws->wait);
}
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
int i, wake_index;
/*
- * Make sure all changes prior to this are visible from other CPUs.
+ * Pairs with the memory barrier in set_current_state() like in
+ * sbq_wake_up().
*/
smp_mb();
wake_index = atomic_read(&sbq->wake_index);
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
}
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
+
+void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
+{
+ bool first;
+ int i;
+
+ sbitmap_show(&sbq->sb, m);
+
+ seq_puts(m, "alloc_hint={");
+ first = true;
+ for_each_possible_cpu(i) {
+ if (!first)
+ seq_puts(m, ", ");
+ first = false;
+ seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
+ }
+ seq_puts(m, "}\n");
+
+ seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
+ seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
+
+ seq_puts(m, "ws={\n");
+ for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
+ struct sbq_wait_state *ws = &sbq->ws[i];
+
+ seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
+ atomic_read(&ws->wait_cnt),
+ waitqueue_active(&ws->wait) ? "active" : "inactive");
+ }
+ seq_puts(m, "}\n");
+
+ seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_show);