From 73e10ded33a1cfc0c72404aaedc493e9813b6239 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 10 Feb 2020 10:00:21 +0000 Subject: mm: Add set/end/wait functions for PG_private_2 Add three functions to manipulate PG_private_2: (*) set_page_private_2() - Set the flag and take an appropriate reference on the flagged page. (*) end_page_private_2() - Clear the flag, drop the reference and wake up any waiters, somewhat analogously with end_page_writeback(). (*) wait_on_page_private_2() - Wait for the flag to be cleared. Wrappers will need to be placed in the netfs lib header in the patch that adds that. [This implements a suggestion by Linus[1] to not mix the terminology of PG_private_2 and PG_fscache in the mm core function] Changes: v7: - Use compound_head() in all the functions to make them THP safe[6]. v5: - Add set and end functions, calling the end function end rather than unlock[3]. - Keep a ref on the page when PG_private_2 is set[4][5]. v4: - Remove extern from the declaration[2]. Suggested-by: Linus Torvalds Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Alexander Viro cc: Christoph Hellwig cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/1330473.1612974547@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/CAHk-=wjgA-74ddehziVk=XAEMTKswPu1Yw4uaro1R3ibs27ztw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/20210216102659.GA27714@lst.de/ [2] Link: https://lore.kernel.org/r/161340387944.1303470.7944159520278177652.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539528910.286939.1252328699383291173.stgit@warthog.procyon.org.uk # v4 Link: https://lore.kernel.org/r/20210321105309.GG3420@casper.infradead.org [3] Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4] Link: https://lore.kernel.org/r/CAHk-=wjSGsRj7xwhSMQ6dAQiz53xA39pOG+XA_WeTgwBBu4uqg@mail.gmail.com/ [5] Link: https://lore.kernel.org/r/20210408145057.GN2531743@casper.infradead.org/ [6] Link: https://lore.kernel.org/r/161653788200.2770958.9517755716374927208.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789066013.6155.9816857201817288382.stgit@warthog.procyon.org.uk/ # v6 --- mm/filemap.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 43700480d897..afe22f09960e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1432,6 +1432,67 @@ void unlock_page(struct page *page) } EXPORT_SYMBOL(unlock_page); +/** + * end_page_private_2 - Clear PG_private_2 and release any waiters + * @page: The page + * + * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for + * this. The page ref held for PG_private_2 being set is released. + * + * This is, for example, used when a netfs page is being written to a local + * disk cache, thereby allowing writes to the cache for the same page to be + * serialised. + */ +void end_page_private_2(struct page *page) +{ + page = compound_head(page); + VM_BUG_ON_PAGE(!PagePrivate2(page), page); + clear_bit_unlock(PG_private_2, &page->flags); + wake_up_page_bit(page, PG_private_2); + put_page(page); +} +EXPORT_SYMBOL(end_page_private_2); + +/** + * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. + */ +void wait_on_page_private_2(struct page *page) +{ + page = compound_head(page); + while (PagePrivate2(page)) + wait_on_page_bit(page, PG_private_2); +} +EXPORT_SYMBOL(wait_on_page_private_2); + +/** + * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a + * fatal signal is received by the calling task. + * + * Return: + * - 0 if successful. + * - -EINTR if a fatal signal was encountered. + */ +int wait_on_page_private_2_killable(struct page *page) +{ + int ret = 0; + + page = compound_head(page); + while (PagePrivate2(page)) { + ret = wait_on_page_bit_killable(page, PG_private_2); + if (ret < 0) + break; + } + + return ret; +} +EXPORT_SYMBOL(wait_on_page_private_2_killable); + /** * end_page_writeback - end writeback against a page * @page: the page -- cgit v1.2.3 From fcd9ae4f7f3b5fbd549285bab0478a339113620e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox (Oracle) Date: Wed, 7 Apr 2021 21:18:55 +0100 Subject: mm/filemap: Pass the file_ra_state in the ractl For readahead_expand(), we need to modify the file ra_state, so pass it down by adding it to the ractl. We have to do this because it's not always the same as f_ra in the struct file that is already being passed. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne Link: https://lore.kernel.org/r/20210407201857.3582797-2-willy@infradead.org/ Link: https://lore.kernel.org/r/161789067431.6155.8063840447229665720.stgit@warthog.procyon.org.uk/ # v6 --- fs/ext4/verity.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/verity.c | 2 +- include/linux/pagemap.h | 20 +++++++++++--------- mm/filemap.c | 4 ++-- mm/internal.h | 7 +++---- mm/readahead.c | 22 +++++++++++----------- 7 files changed, 30 insertions(+), 29 deletions(-) (limited to 'mm') diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 00e3cbde472e..07438f46b558 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -370,7 +370,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); struct page *page; index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d26ff2ae3f5e..c1e6f669a0c4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4051,7 +4051,7 @@ out: static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx); + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); struct address_space *mapping = inode->i_mapping; struct page *page; pgoff_t redirty_idx = page_idx; diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 054ec852b5ea..a7beff28a3c5 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -228,7 +228,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); struct page *page; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bb4433c98d02..4220ded38f4b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -812,20 +812,23 @@ static inline int add_to_page_cache(struct page *page, * @file: The file, used primarily by network filesystems for authentication. * May be NULL if invoked internally by the filesystem. * @mapping: Readahead this filesystem object. + * @ra: File readahead state. May be NULL. */ struct readahead_control { struct file *file; struct address_space *mapping; + struct file_ra_state *ra; /* private: use the readahead_* accessors instead */ pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; }; -#define DEFINE_READAHEAD(rac, f, m, i) \ - struct readahead_control rac = { \ +#define DEFINE_READAHEAD(ractl, f, r, m, i) \ + struct readahead_control ractl = { \ .file = f, \ .mapping = m, \ + .ra = r, \ ._index = i, \ } @@ -833,10 +836,9 @@ struct readahead_control { void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); -void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *, +void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); +void page_cache_async_ra(struct readahead_control *, struct page *, unsigned long req_count); -void page_cache_async_ra(struct readahead_control *, struct file_ra_state *, - struct page *, unsigned long req_count); /** * page_cache_sync_readahead - generic file readahead @@ -856,8 +858,8 @@ void page_cache_sync_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t index, unsigned long req_count) { - DEFINE_READAHEAD(ractl, file, mapping, index); - page_cache_sync_ra(&ractl, ra, req_count); + DEFINE_READAHEAD(ractl, file, ra, mapping, index); + page_cache_sync_ra(&ractl, req_count); } /** @@ -879,8 +881,8 @@ void page_cache_async_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, struct page *page, pgoff_t index, unsigned long req_count) { - DEFINE_READAHEAD(ractl, file, mapping, index); - page_cache_async_ra(&ractl, ra, page, req_count); + DEFINE_READAHEAD(ractl, file, ra, mapping, index); + page_cache_async_ra(&ractl, page, req_count); } /** diff --git a/mm/filemap.c b/mm/filemap.c index afe22f09960e..46e0321ba87a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2832,7 +2832,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; - DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff); + DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; unsigned int mmap_miss; @@ -2844,7 +2844,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) if (vmf->vma->vm_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); - page_cache_sync_ra(&ractl, ra, ra->ra_pages); + page_cache_sync_ra(&ractl, ra->ra_pages); return fpin; } diff --git a/mm/internal.h b/mm/internal.h index 1432feec62df..83a07b2a7b1f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -51,13 +51,12 @@ void unmap_page_range(struct mmu_gather *tlb, void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_size); -void force_page_cache_ra(struct readahead_control *, struct file_ra_state *, - unsigned long nr); +void force_page_cache_ra(struct readahead_control *, unsigned long nr); static inline void force_page_cache_readahead(struct address_space *mapping, struct file *file, pgoff_t index, unsigned long nr_to_read) { - DEFINE_READAHEAD(ractl, file, mapping, index); - force_page_cache_ra(&ractl, &file->f_ra, nr_to_read); + DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index); + force_page_cache_ra(&ractl, nr_to_read); } unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, diff --git a/mm/readahead.c b/mm/readahead.c index c5b0457415be..2088569a947e 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -272,9 +272,10 @@ void do_page_cache_ra(struct readahead_control *ractl, * memory at once. */ void force_page_cache_ra(struct readahead_control *ractl, - struct file_ra_state *ra, unsigned long nr_to_read) + unsigned long nr_to_read) { struct address_space *mapping = ractl->mapping; + struct file_ra_state *ra = ractl->ra; struct backing_dev_info *bdi = inode_to_bdi(mapping->host); unsigned long max_pages, index; @@ -433,10 +434,10 @@ static int try_context_readahead(struct address_space *mapping, * A minimal readahead algorithm for trivial sequential/random reads. */ static void ondemand_readahead(struct readahead_control *ractl, - struct file_ra_state *ra, bool hit_readahead_marker, - unsigned long req_size) + bool hit_readahead_marker, unsigned long req_size) { struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host); + struct file_ra_state *ra = ractl->ra; unsigned long max_pages = ra->ra_pages; unsigned long add_pages; unsigned long index = readahead_index(ractl); @@ -550,7 +551,7 @@ readit: } void page_cache_sync_ra(struct readahead_control *ractl, - struct file_ra_state *ra, unsigned long req_count) + unsigned long req_count) { bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); @@ -560,7 +561,7 @@ void page_cache_sync_ra(struct readahead_control *ractl, * read-ahead will do the right thing and limit the read to just the * requested range, which we'll set to 1 page for this case. */ - if (!ra->ra_pages || blk_cgroup_congested()) { + if (!ractl->ra->ra_pages || blk_cgroup_congested()) { if (!ractl->file) return; req_count = 1; @@ -569,21 +570,20 @@ void page_cache_sync_ra(struct readahead_control *ractl, /* be dumb */ if (do_forced_ra) { - force_page_cache_ra(ractl, ra, req_count); + force_page_cache_ra(ractl, req_count); return; } /* do read-ahead */ - ondemand_readahead(ractl, ra, false, req_count); + ondemand_readahead(ractl, false, req_count); } EXPORT_SYMBOL_GPL(page_cache_sync_ra); void page_cache_async_ra(struct readahead_control *ractl, - struct file_ra_state *ra, struct page *page, - unsigned long req_count) + struct page *page, unsigned long req_count) { /* no read-ahead */ - if (!ra->ra_pages) + if (!ractl->ra->ra_pages) return; /* @@ -604,7 +604,7 @@ void page_cache_async_ra(struct readahead_control *ractl, return; /* do read-ahead */ - ondemand_readahead(ractl, ra, true, req_count); + ondemand_readahead(ractl, true, req_count); } EXPORT_SYMBOL_GPL(page_cache_async_ra); -- cgit v1.2.3 From f615bd5c4725fde94387d3f0f4e752b4c01a4592 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox (Oracle) Date: Wed, 21 Apr 2021 18:09:23 +0100 Subject: mm/readahead: Handle ractl nr_pages being modified Filesystems are not currently permitted to modify the number of pages in the ractl. An upcoming patch to add readahead_expand() changes that rule, so remove the check and resync the loop counter after every call to the filesystem. Tested-by: Jeff Layton Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Link: https://lore.kernel.org/r/20210420200116.3715790-1-willy@infradead.org/ Link: https://lore.kernel.org/r/20210421170923.4005574-1-willy@infradead.org/ # v2 --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/readahead.c b/mm/readahead.c index 2088569a947e..5b423ecc99f1 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -198,8 +198,6 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, for (i = 0; i < nr_to_read; i++) { struct page *page = xa_load(&mapping->i_pages, index + i); - BUG_ON(index + i != ractl->_index + ractl->_nr_pages); - if (page && !xa_is_value(page)) { /* * Page already present? Kick off the current batch @@ -210,6 +208,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, * not worth getting one just for that. */ read_pages(ractl, &page_pool, true); + i = ractl->_index + ractl->_nr_pages - index - 1; continue; } @@ -223,6 +222,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, gfp_mask) < 0) { put_page(page); read_pages(ractl, &page_pool, true); + i = ractl->_index + ractl->_nr_pages - index - 1; continue; } if (i == nr_to_read - lookahead_size) -- cgit v1.2.3 From 3ca236440126f75c91281c53f137794b8d5f884a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Sep 2020 14:03:27 +0100 Subject: mm: Implement readahead_control pageset expansion Provide a function, readahead_expand(), that expands the set of pages specified by a readahead_control object to encompass a revised area with a proposed size and length. The proposed area must include all of the old area and may be expanded yet more by this function so that the edges align on (transparent huge) page boundaries as allocated. The expansion will be cut short if a page already exists in either of the areas being expanded into. Note that any expansion made in such a case is not rolled back. This will be used by fscache so that reads can be expanded to cache granule boundaries, thereby allowing whole granules to be stored in the cache, but there are other potential users also. Changes: v6: - Fold in a patch from Matthew Wilcox to tell the ondemand readahead algorithm about the expansion so that the next readahead starts at the right place[2]. v4: - Moved the declaration of readahead_expand() to a better place[1]. Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Alexander Viro cc: Christoph Hellwig cc: Mike Marshall cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210217161358.GM2858050@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/20210407201857.3582797-4-willy@infradead.org/ [2] Link: https://lore.kernel.org/r/159974633888.2094769.8326206446358128373.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588479816.3465195.553952688795241765.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118131787.1232039.4863969952441067985.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161028670.2537118.13831420617039766044.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340389201.1303470.14353807284546854878.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539530488.286939.18085961677838089157.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653789422.2770958.2108046612147345000.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789069829.6155.4295672417565512161.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/pagemap.h | 2 ++ mm/readahead.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'mm') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4220ded38f4b..63ca6430aef5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -839,6 +839,8 @@ void page_cache_ra_unbounded(struct readahead_control *, void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); void page_cache_async_ra(struct readahead_control *, struct page *, unsigned long req_count); +void readahead_expand(struct readahead_control *ractl, + loff_t new_start, size_t new_len); /** * page_cache_sync_readahead - generic file readahead diff --git a/mm/readahead.c b/mm/readahead.c index 5b423ecc99f1..d589f147f4c2 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -638,3 +638,78 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) { return ksys_readahead(fd, offset, count); } + +/** + * readahead_expand - Expand a readahead request + * @ractl: The request to be expanded + * @new_start: The revised start + * @new_len: The revised size of the request + * + * Attempt to expand a readahead request outwards from the current size to the + * specified size by inserting locked pages before and after the current window + * to increase the size to the new window. This may involve the insertion of + * THPs, in which case the window may get expanded even beyond what was + * requested. + * + * The algorithm will stop if it encounters a conflicting page already in the + * pagecache and leave a smaller expansion than requested. + * + * The caller must check for this by examining the revised @ractl object for a + * different expansion than was requested. + */ +void readahead_expand(struct readahead_control *ractl, + loff_t new_start, size_t new_len) +{ + struct address_space *mapping = ractl->mapping; + struct file_ra_state *ra = ractl->ra; + pgoff_t new_index, new_nr_pages; + gfp_t gfp_mask = readahead_gfp_mask(mapping); + + new_index = new_start / PAGE_SIZE; + + /* Expand the leading edge downwards */ + while (ractl->_index > new_index) { + unsigned long index = ractl->_index - 1; + struct page *page = xa_load(&mapping->i_pages, index); + + if (page && !xa_is_value(page)) + return; /* Page apparently present */ + + page = __page_cache_alloc(gfp_mask); + if (!page) + return; + if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) { + put_page(page); + return; + } + + ractl->_nr_pages++; + ractl->_index = page->index; + } + + new_len += new_start - readahead_pos(ractl); + new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE); + + /* Expand the trailing edge upwards */ + while (ractl->_nr_pages < new_nr_pages) { + unsigned long index = ractl->_index + ractl->_nr_pages; + struct page *page = xa_load(&mapping->i_pages, index); + + if (page && !xa_is_value(page)) + return; /* Page apparently present */ + + page = __page_cache_alloc(gfp_mask); + if (!page) + return; + if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) { + put_page(page); + return; + } + ractl->_nr_pages++; + if (ra) { + ra->size++; + ra->async_size++; + } + } +} +EXPORT_SYMBOL(readahead_expand); -- cgit v1.2.3