diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 105 |
1 files changed, 41 insertions, 64 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 84ae9bf5858a..3f690bae6b78 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2097,34 +2097,25 @@ void __init page_writeback_init(void) * dirty pages in the file (thus it is important for this function to be quick * so that it can tag pages faster than a dirtying process can create them). */ -/* - * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock - * latency. - */ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end) { -#define WRITEBACK_TAG_BATCH 4096 - unsigned long tagged = 0; - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, &mapping->i_pages, start); + unsigned int tagged = 0; + void *page; - xa_lock_irq(&mapping->i_pages); - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, - PAGECACHE_TAG_DIRTY) { - if (iter.index > end) - break; - radix_tree_iter_tag_set(&mapping->i_pages, &iter, - PAGECACHE_TAG_TOWRITE); - tagged++; - if ((tagged % WRITEBACK_TAG_BATCH) != 0) + xas_lock_irq(&xas); + xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) { + xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE); + if (++tagged % XA_CHECK_SCHED) continue; - slot = radix_tree_iter_resume(slot, &iter); - xa_unlock_irq(&mapping->i_pages); + + xas_pause(&xas); + xas_unlock_irq(&xas); cond_resched(); - xa_lock_irq(&mapping->i_pages); + xas_lock_irq(&xas); } - xa_unlock_irq(&mapping->i_pages); + xas_unlock_irq(&xas); } EXPORT_SYMBOL(tag_pages_for_writeback); @@ -2149,6 +2140,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback); * not miss some pages (e.g., because some other process has cleared TOWRITE * tag we set). The rule we follow is that TOWRITE tag can be cleared only * by the process clearing the DIRTY tag (and submitting the page for IO). + * + * To avoid deadlocks between range_cyclic writeback and callers that hold + * pages in PageWriteback to aggregate IO until write_cache_pages() returns, + * we do not loop back to the start of the file. Doing so causes a page + * lock/page writeback access order inversion - we should only ever lock + * multiple pages in ascending page->index order, and looping back to the start + * of the file violates that rule and causes deadlocks. */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, @@ -2162,31 +2160,24 @@ int write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - int cycled; int range_whole = 0; - int tag; + xa_mark_t tag; pagevec_init(&pvec); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; -retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; @@ -2272,17 +2263,14 @@ continue_unlock: pagevec_release(&pvec); cond_resched(); } - if (!cycled && !done) { - /* - * range_cyclic: - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - cycled = 1; - index = 0; - end = writeback_index - 1; - goto retry; - } + + /* + * If we hit the last page and there is more work to be done: wrap + * back the index back to the start of the file for the next + * time we are called. + */ + if (wbc->range_cyclic && !done) + done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; @@ -2445,7 +2433,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, /* * For address_spaces which do not use buffers. Just tag the page as dirty in - * its radix tree. + * the xarray. * * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" @@ -2471,7 +2459,7 @@ int __set_page_dirty_nobuffers(struct page *page) BUG_ON(page_mapping(page) != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->i_pages, page_index(page), + __xa_set_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); xa_unlock_irqrestore(&mapping->i_pages, flags); unlock_page_memcg(page); @@ -2634,13 +2622,13 @@ EXPORT_SYMBOL(__cancel_dirty_page); * Returns true if the page was previously dirty. * * This is for preparing to put the page under writeout. We leave the page - * tagged as dirty in the radix tree so that a concurrent write-for-sync + * tagged as dirty in the xarray so that a concurrent write-for-sync * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage * implementation will run either set_page_writeback() or set_page_dirty(), - * at which stage we bring the page's dirty flag and radix-tree dirty tag + * at which stage we bring the page's dirty flag and xarray dirty tag * back into sync. * - * This incoherency between the page's dirty flag and radix-tree tag is + * This incoherency between the page's dirty flag and xarray tag is * unfortunate, but it only exists while the page is locked. */ int clear_page_dirty_for_io(struct page *page) @@ -2721,7 +2709,7 @@ int test_clear_page_writeback(struct page *page) xa_lock_irqsave(&mapping->i_pages, flags); ret = TestClearPageWriteback(page); if (ret) { - radix_tree_tag_clear(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) { struct bdi_writeback *wb = inode_to_wb(inode); @@ -2761,11 +2749,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write) lock_page_memcg(page); if (mapping && mapping_use_writeback_tags(mapping)) { + XA_STATE(xas, &mapping->i_pages, page_index(page)); struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; - xa_lock_irqsave(&mapping->i_pages, flags); + xas_lock_irqsave(&xas, flags); + xas_load(&xas); ret = TestSetPageWriteback(page); if (!ret) { bool on_wblist; @@ -2773,8 +2763,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); - radix_tree_tag_set(&mapping->i_pages, page_index(page), - PAGECACHE_TAG_WRITEBACK); + xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); @@ -2787,12 +2776,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write) sb_mark_inode_writeback(mapping->host); } if (!PageDirty(page)) - radix_tree_tag_clear(&mapping->i_pages, page_index(page), - PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); if (!keep_write) - radix_tree_tag_clear(&mapping->i_pages, page_index(page), - PAGECACHE_TAG_TOWRITE); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irqrestore(&xas, flags); } else { ret = TestSetPageWriteback(page); } @@ -2806,16 +2793,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write) } EXPORT_SYMBOL(__test_set_page_writeback); -/* - * Return true if any of the pages in the mapping are marked with the - * passed tag. - */ -int mapping_tagged(struct address_space *mapping, int tag) -{ - return radix_tree_tagged(&mapping->i_pages, tag); -} -EXPORT_SYMBOL(mapping_tagged); - /** * wait_for_stable_page() - wait for writeback to finish, if necessary. * @page: The page to wait on. |