aboutsummaryrefslogtreecommitdiff
path: root/block/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c580
1 files changed, 65 insertions, 515 deletions
diff --git a/block/bio.c b/block/bio.c
index 94d697217887..21cbaa6a1c20 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -17,6 +17,7 @@
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -588,6 +589,49 @@ void bio_truncate(struct bio *bio, unsigned new_size)
}
/**
+ * guard_bio_eod - truncate a BIO to fit the block device
+ * @bio: bio to truncate
+ *
+ * This allows us to do IO even on the odd last sectors of a device, even if the
+ * block size is some multiple of the physical sector size.
+ *
+ * We'll just truncate the bio to the size of the device, and clear the end of
+ * the buffer head manually. Truly out-of-range accesses will turn into actual
+ * I/O errors, this only handles the "we need to be able to do I/O at the final
+ * sector" case.
+ */
+void guard_bio_eod(struct bio *bio)
+{
+ sector_t maxsector;
+ struct hd_struct *part;
+
+ rcu_read_lock();
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (part)
+ maxsector = part_nr_sects_read(part);
+ else
+ maxsector = get_capacity(bio->bi_disk);
+ rcu_read_unlock();
+
+ if (!maxsector)
+ return;
+
+ /*
+ * If the *whole* IO is past the end of the device,
+ * let it through, and the IO layer will turn it into
+ * an EIO.
+ */
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
+ return;
+
+ maxsector -= bio->bi_iter.bi_sector;
+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
+ return;
+
+ bio_truncate(bio, maxsector << 9);
+}
+
+/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
@@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_clone_fast);
+const char *bio_devname(struct bio *bio, char *buf)
+{
+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
bool *same_page)
@@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
*
* This should only be used by passthrough bios.
*/
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
bool *same_page)
{
@@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio)
int submit_bio_wait(struct bio *bio)
{
DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
+ unsigned long hang_check;
bio->bi_private = &done;
bio->bi_end_io = submit_bio_wait_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
- wait_for_completion_io(&done);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&done,
+ hang_check * (HZ/2)))
+ ;
+ else
+ wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
}
@@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src)
}
EXPORT_SYMBOL(bio_list_copy_data);
-struct bio_map_data {
- int is_our_pages;
- struct iov_iter iter;
- struct iovec iov[];
-};
-
-static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- if (data->nr_segs > UIO_MAXIOV)
- return NULL;
-
- bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
- if (!bmd)
- return NULL;
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
- bmd->iter = *data;
- bmd->iter.iov = bmd->iov;
- return bmd;
-}
-
-/**
- * bio_copy_from_iter - copy all pages from iov_iter to bio
- * @bio: The &struct bio which describes the I/O as destination
- * @iter: iov_iter as source
- *
- * Copy all pages from iov_iter to bio.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_from_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- iter);
-
- if (!iov_iter_count(iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/**
- * bio_copy_to_iter - copy all pages from bio to iov_iter
- * @bio: The &struct bio which describes the I/O as source
- * @iter: iov_iter as destination
- *
- * Copy all pages from bio to iov_iter.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_to_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- &iter);
-
- if (!iov_iter_count(&iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
@@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio)
}
EXPORT_SYMBOL(bio_free_pages);
-/**
- * bio_uncopy_user - finish previously mapped bio
- * @bio: bio being terminated
- *
- * Free pages allocated from bio_copy_user_iov() and write back data
- * to user space in case of a read.
- */
-int bio_uncopy_user(struct bio *bio)
-{
- struct bio_map_data *bmd = bio->bi_private;
- int ret = 0;
-
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
- /*
- * if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free
- * and return -EINTR so user space doesn't expect any data.
- */
- if (!current->mm)
- ret = -EINTR;
- else if (bio_data_dir(bio) == READ)
- ret = bio_copy_to_iter(bio, bmd->iter);
- if (bmd->is_our_pages)
- bio_free_pages(bio);
- }
- kfree(bmd);
- bio_put(bio);
- return ret;
-}
-
-/**
- * bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user_iov(struct request_queue *q,
- struct rq_map_data *map_data,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- struct page *page;
- struct bio *bio;
- int i = 0, ret;
- int nr_pages;
- unsigned int len = iter->count;
- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
-
- bmd = bio_alloc_map_data(iter, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- /*
- * We need to do a deep copy of the iov_iter including the iovecs.
- * The caller provided iov might point to an on-stack or otherwise
- * shortlived one.
- */
- bmd->is_our_pages = map_data ? 0 : 1;
-
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- if (nr_pages > BIO_MAX_PAGES)
- nr_pages = BIO_MAX_PAGES;
-
- ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- ret = 0;
-
- if (map_data) {
- nr_pages = 1 << map_data->page_order;
- i = map_data->offset / PAGE_SIZE;
- }
- while (len) {
- unsigned int bytes = PAGE_SIZE;
-
- bytes -= offset;
-
- if (bytes > len)
- bytes = len;
-
- if (map_data) {
- if (i == map_data->nr_entries * nr_pages) {
- ret = -ENOMEM;
- break;
- }
-
- page = map_data->pages[i / nr_pages];
- page += (i % nr_pages);
-
- i++;
- } else {
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
- if (!map_data)
- __free_page(page);
- break;
- }
-
- len -= bytes;
- offset = 0;
- }
-
- if (ret)
- goto cleanup;
-
- if (map_data)
- map_data->offset += bio->bi_iter.bi_size;
-
- /*
- * success
- */
- if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
- ret = bio_copy_from_iter(bio, iter);
- if (ret)
- goto cleanup;
- } else {
- if (bmd->is_our_pages)
- zero_fill_bio(bio);
- iov_iter_advance(iter, bio->bi_iter.bi_size);
- }
-
- bio->bi_private = bmd;
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
- return bio;
-cleanup:
- if (!map_data)
- bio_free_pages(bio);
- bio_put(bio);
-out_bmd:
- kfree(bmd);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user_iov - map user iovec into bio
- * @q: the struct request_queue for the bio
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- int j;
- struct bio *bio;
- int ret;
-
- if (!iov_iter_count(iter))
- return ERR_PTR(-EINVAL);
-
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- ssize_t bytes;
- size_t offs, added = 0;
- int npages;
-
- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(q))) {
- ret = -EINVAL;
- j = 0;
- } else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!__bio_add_pc_page(q, bio, page, n, offs,
- &same_page)) {
- if (same_page)
- put_page(page);
- break;
- }
-
- added += n;
- bytes -= n;
- offs = 0;
- }
- iov_iter_advance(iter, added);
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- put_page(pages[j++]);
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes)
- break;
- }
-
- bio_set_flag(bio, BIO_USER_MAPPED);
-
- /*
- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
- * it would normally disappear when its bi_end_io is run.
- * however, we need it for the unmap, so grab an extra
- * reference to it
- */
- bio_get(bio);
- return bio;
-
- out_unmap:
- bio_release_pages(bio, false);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_unmap_user - unmap a bio
- * @bio: the bio being unmapped
- *
- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
- * process context.
- *
- * bio_unmap_user() may sleep.
- */
-void bio_unmap_user(struct bio *bio)
-{
- bio_release_pages(bio, bio_data_dir(bio) == READ);
- bio_put(bio);
- bio_put(bio);
-}
-
-static void bio_invalidate_vmalloc_pages(struct bio *bio)
-{
-#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
- if (bio->bi_private && !op_is_write(bio_op(bio))) {
- unsigned long i, len = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++)
- len += bio->bi_io_vec[i].bv_len;
- invalidate_kernel_vmap_range(bio->bi_private, len);
- }
-#endif
-}
-
-static void bio_map_kern_endio(struct bio *bio)
-{
- bio_invalidate_vmalloc_pages(bio);
- bio_put(bio);
-}
-
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- bool is_vmalloc = is_vmalloc_addr(data);
- struct page *page;
- int offset, i;
- struct bio *bio;
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- if (is_vmalloc) {
- flush_kernel_vmap_range(data, len);
- bio->bi_private = data;
- }
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (!is_vmalloc)
- page = virt_to_page(data);
- else
- page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- bio->bi_end_io = bio_map_kern_endio;
- return bio;
-}
-
-static void bio_copy_kern_endio(struct bio *bio)
-{
- bio_free_pages(bio);
- bio_put(bio);
-}
-
-static void bio_copy_kern_endio_read(struct bio *bio)
-{
- char *p = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
- p += bvec->bv_len;
- }
-
- bio_copy_kern_endio(bio);
-}
-
-/**
- * bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to copy
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
- *
- * copy the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask, int reading)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- struct bio *bio;
- void *p = data;
- int nr_pages = 0;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages = end - start;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page)
- goto cleanup;
-
- if (!reading)
- memcpy(page_address(page), p, bytes);
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
- break;
-
- len -= bytes;
- p += bytes;
- }
-
- if (reading) {
- bio->bi_end_io = bio_copy_kern_endio_read;
- bio->bi_private = data;
- } else {
- bio->bi_end_io = bio_copy_kern_endio;
- }
-
- return bio;
-
-cleanup:
- bio_free_pages(bio);
- bio_put(bio);
- return ERR_PTR(-ENOMEM);
-}
-
/*
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
@@ -1752,14 +1303,14 @@ defer:
schedule_work(&bio_dirty_work);
}
-void update_io_ticks(struct hd_struct *part, unsigned long now)
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
unsigned long stamp;
again:
stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, 1);
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
}
}
if (part->partno) {
@@ -1775,7 +1326,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock();
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op));
@@ -1793,9 +1344,8 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock();
- update_io_ticks(part, now);
+ update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();