aboutsummaryrefslogtreecommitdiff
path: root/block/genhd.c
diff options
context:
space:
mode:
authorLuis Chamberlain2020-06-19 20:47:25 +0000
committerJens Axboe2020-06-24 09:15:58 -0600
commite8c7d14ac6c37c173ec606907d38802b00302988 (patch)
tree297c86899cbacb7fb30ecc20e2fa03a82d3113dd /block/genhd.c
parent763b58923aeb0a06c5a5f7e5fbb4c654c644d91d (diff)
block: revert back to synchronous request_queue removal
Commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") merged on v4.12 moved the work behind blk_release_queue() into a workqueue after a splat floated around which indicated some work on blk_release_queue() could sleep in blk_exit_rl(). This splat would be possible when a driver called blk_put_queue() or blk_cleanup_queue() (which calls blk_put_queue() as its final call) from an atomic context. blk_put_queue() decrements the refcount for the request_queue kobject, and upon reaching 0 blk_release_queue() is called. Although blk_exit_rl() is now removed through commit db6d99523560 ("block: remove request_list code") on v5.0, we reserve the right to be able to sleep within blk_release_queue() context. The last reference for the request_queue must not be called from atomic context. *When* the last reference to the request_queue reaches 0 varies, and so let's take the opportunity to document when that is expected to happen and also document the context of the related calls as best as possible so we can avoid future issues, and with the hopes that the synchronous request_queue removal sticks. We revert back to synchronous request_queue removal because asynchronous removal creates a regression with expected userspace interaction with several drivers. An example is when removing the loopback driver, one uses ioctls from userspace to do so, but upon return and if successful, one expects the device to be removed. Likewise if one races to add another device the new one may not be added as it is still being removed. This was expected behavior before and it now fails as the device is still present and busy still. Moving to asynchronous request_queue removal could have broken many scripts which relied on the removal to have been completed if there was no error. Document this expectation as well so that this doesn't regress userspace again. Using asynchronous request_queue removal however has helped us find other bugs. In the future we can test what could break with this arrangement by enabling CONFIG_DEBUG_KOBJECT_RELEASE. While at it, update the docs with the context expectations for the request_queue / gendisk refcount decrement, and make these expectations explicit by using might_sleep(). Fixes: dc9edc44de6c ("block: Fix a blk_exit_rl() regression") Suggested-by: Nicolai Stange <nstange@suse.de> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Cc: Bart Van Assche <bvanassche@acm.org> Cc: Omar Sandoval <osandov@fb.com> Cc: Hannes Reinecke <hare@suse.com> Cc: Nicolai Stange <nstange@suse.de> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: yu kuai <yukuai3@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/genhd.c')
-rw-r--r--block/genhd.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/block/genhd.c b/block/genhd.c
index 1be86b1f43ec..60ae4e1b4d38 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -889,12 +889,19 @@ static void invalidate_partition(struct gendisk *disk, int partno)
* The final removal of the struct gendisk happens when its refcount reaches 0
* with put_disk(), which should be called after del_gendisk(), if
* __device_add_disk() was used.
+ *
+ * Drivers exist which depend on the release of the gendisk to be synchronous,
+ * it should not be deferred.
+ *
+ * Context: can sleep
*/
void del_gendisk(struct gendisk *disk)
{
struct disk_part_iter piter;
struct hd_struct *part;
+ might_sleep();
+
blk_integrity_del(disk);
disk_del_events(disk);
@@ -1548,11 +1555,15 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
* drivers we also call blk_put_queue() for them, and we expect the
* request_queue refcount to reach 0 at this point, and so the request_queue
* will also be freed prior to the disk.
+ *
+ * Context: can sleep
*/
static void disk_release(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
+ might_sleep();
+
blk_free_devt(dev->devt);
disk_release_events(disk);
kfree(disk->random);
@@ -1797,6 +1808,9 @@ EXPORT_SYMBOL(get_disk_and_module);
*
* This decrements the refcount for the struct gendisk. When this reaches 0
* we'll have disk_release() called.
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ * atomic context.
*/
void put_disk(struct gendisk *disk)
{
@@ -1811,6 +1825,9 @@ EXPORT_SYMBOL(put_disk);
*
* This is a counterpart of get_disk_and_module() and thus also of
* get_gendisk().
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ * atomic context.
*/
void put_disk_and_module(struct gendisk *disk)
{