aboutsummaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorJens Axboe2021-04-29 22:55:18 -0700
committerLinus Torvalds2021-04-30 11:20:36 -0700
commit63135aa3866db99fd923b716c5ff2e468879624a (patch)
tree8591e4cd95b7e4f2d585d7e60b09ff255ae1995b /mm/filemap.c
parentdce44566192ec0b38597fdfd435013c2d54653ff (diff)
mm: provide filemap_range_needs_writeback() helper
Patch series "Improve IOCB_NOWAIT O_DIRECT reads", v3. An internal workload complained because it was using too much CPU, and when I took a look, we had a lot of io_uring workers going to town. For an async buffered read like workload, I am normally expecting _zero_ offloads to a worker thread, but this one had tons of them. I'd drop caches and things would look good again, but then a minute later we'd regress back to using workers. Turns out that every minute something was reading parts of the device, which would add page cache for that inode. I put patches like these in for our kernel, and the problem was solved. Don't -EAGAIN IOCB_NOWAIT dio reads just because we have page cache entries for the given range. This causes unnecessary work from the callers side, when the IO could have been issued totally fine without blocking on writeback when there is none. This patch (of 3): For O_DIRECT reads/writes, we check if we need to issue a call to filemap_write_and_wait_range() to issue and/or wait for writeback for any page in the given range. The existing mechanism just checks for a page in the range, which is suboptimal for IOCB_NOWAIT as we'll fallback to the slow path (and needing retry) if there's just a clean page cache page in the range. Provide filemap_range_needs_writeback() which tries a little harder to check if we actually need to issue and/or wait for writeback in the range. Link: https://lkml.kernel.org/r/20210224164455.1096727-1-axboe@kernel.dk Link: https://lkml.kernel.org/r/20210224164455.1096727-2-axboe@kernel.dk Signed-off-by: Jens Axboe <axboe@kernel.dk> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 151090fdcf29..1750742b5689 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -636,6 +636,49 @@ static bool mapping_needs_writeback(struct address_space *mapping)
}
/**
+ * filemap_range_needs_writeback - check if range potentially needs writeback
+ * @mapping: address space within which to check
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback. Used by O_DIRECT
+ * read/write with IOCB_NOWAIT, to see if the caller needs to do
+ * filemap_write_and_wait_range() before proceeding.
+ *
+ * Return: %true if the caller should do filemap_write_and_wait_range() before
+ * doing O_DIRECT to a page in this range, %false otherwise.
+ */
+bool filemap_range_needs_writeback(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
+ pgoff_t max = end_byte >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!mapping_needs_writeback(mapping))
+ return false;
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+ !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+ return false;
+ if (end_byte < start_byte)
+ return false;
+
+ rcu_read_lock();
+ xas_for_each(&xas, page, max) {
+ if (xas_retry(&xas, page))
+ continue;
+ if (xa_is_value(page))
+ continue;
+ if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
+ break;
+ }
+ rcu_read_unlock();
+ return page != NULL;
+}
+EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+
+/**
* filemap_write_and_wait_range - write out & wait on a file range
* @mapping: the address_space for the pages
* @lstart: offset in bytes where the range starts