diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 193 |
1 files changed, 73 insertions, 120 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 446942677cd4..56bf122e0bb4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -322,24 +322,20 @@ void shmem_uncharge(struct inode *inode, long pages) } /* - * Replace item expected in radix tree by a new item, while holding tree lock. + * Replace item expected in xarray by a new item, while holding xa_lock. */ -static int shmem_radix_tree_replace(struct address_space *mapping, +static int shmem_replace_entry(struct address_space *mapping, pgoff_t index, void *expected, void *replacement) { - struct radix_tree_node *node; - void __rcu **pslot; + XA_STATE(xas, &mapping->i_pages, index); void *item; VM_BUG_ON(!expected); VM_BUG_ON(!replacement); - item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot); - if (!item) - return -ENOENT; + item = xas_load(&xas); if (item != expected) return -ENOENT; - __radix_tree_replace(&mapping->i_pages, node, pslot, - replacement, NULL); + xas_store(&xas, replacement); return 0; } @@ -353,12 +349,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping, static bool shmem_confirm_swap(struct address_space *mapping, pgoff_t index, swp_entry_t swap) { - void *item; - - rcu_read_lock(); - item = radix_tree_lookup(&mapping->i_pages, index); - rcu_read_unlock(); - return item == swp_to_radix_entry(swap); + return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); } /* @@ -586,9 +577,11 @@ static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo) */ static int shmem_add_to_page_cache(struct page *page, struct address_space *mapping, - pgoff_t index, void *expected) + pgoff_t index, void *expected, gfp_t gfp) { - int error, nr = hpage_nr_pages(page); + XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); + unsigned long i = 0; + unsigned long nr = 1UL << compound_order(page); VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(index != round_down(index, nr), page); @@ -600,47 +593,39 @@ static int shmem_add_to_page_cache(struct page *page, page->mapping = mapping; page->index = index; - xa_lock_irq(&mapping->i_pages); - if (PageTransHuge(page)) { - void __rcu **results; - pgoff_t idx; - int i; - - error = 0; - if (radix_tree_gang_lookup_slot(&mapping->i_pages, - &results, &idx, index, 1) && - idx < index + HPAGE_PMD_NR) { - error = -EEXIST; + do { + void *entry; + xas_lock_irq(&xas); + entry = xas_find_conflict(&xas); + if (entry != expected) + xas_set_err(&xas, -EEXIST); + xas_create_range(&xas); + if (xas_error(&xas)) + goto unlock; +next: + xas_store(&xas, page + i); + if (++i < nr) { + xas_next(&xas); + goto next; } - - if (!error) { - for (i = 0; i < HPAGE_PMD_NR; i++) { - error = radix_tree_insert(&mapping->i_pages, - index + i, page + i); - VM_BUG_ON(error); - } + if (PageTransHuge(page)) { count_vm_event(THP_FILE_ALLOC); + __inc_node_page_state(page, NR_SHMEM_THPS); } - } else if (!expected) { - error = radix_tree_insert(&mapping->i_pages, index, page); - } else { - error = shmem_radix_tree_replace(mapping, index, expected, - page); - } - - if (!error) { mapping->nrpages += nr; - if (PageTransHuge(page)) - __inc_node_page_state(page, NR_SHMEM_THPS); __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr); - xa_unlock_irq(&mapping->i_pages); - } else { +unlock: + xas_unlock_irq(&xas); + } while (xas_nomem(&xas, gfp)); + + if (xas_error(&xas)) { page->mapping = NULL; - xa_unlock_irq(&mapping->i_pages); page_ref_sub(page, nr); + return xas_error(&xas); } - return error; + + return 0; } /* @@ -654,7 +639,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap) VM_BUG_ON_PAGE(PageCompound(page), page); xa_lock_irq(&mapping->i_pages); - error = shmem_radix_tree_replace(mapping, page->index, page, radswap); + error = shmem_replace_entry(mapping, page->index, page, radswap); page->mapping = NULL; mapping->nrpages--; __dec_node_page_state(page, NR_FILE_PAGES); @@ -665,7 +650,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap) } /* - * Remove swap entry from radix tree, free the swap and its page cache. + * Remove swap entry from page cache, free the swap and its page cache. */ static int shmem_free_swap(struct address_space *mapping, pgoff_t index, void *radswap) @@ -673,7 +658,7 @@ static int shmem_free_swap(struct address_space *mapping, void *old; xa_lock_irq(&mapping->i_pages); - old = radix_tree_delete_item(&mapping->i_pages, index, radswap); + old = __xa_cmpxchg(&mapping->i_pages, index, radswap, NULL, 0); xa_unlock_irq(&mapping->i_pages); if (old != radswap) return -ENOENT; @@ -691,29 +676,19 @@ static int shmem_free_swap(struct address_space *mapping, unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end) { - struct radix_tree_iter iter; - void __rcu **slot; + XA_STATE(xas, &mapping->i_pages, start); struct page *page; unsigned long swapped = 0; rcu_read_lock(); - - radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { - if (iter.index >= end) - break; - - page = radix_tree_deref_slot(slot); - - if (radix_tree_deref_retry(page)) { - slot = radix_tree_iter_retry(&iter); + xas_for_each(&xas, page, end - 1) { + if (xas_retry(&xas, page)) continue; - } - - if (radix_tree_exceptional_entry(page)) + if (xa_is_value(page)) swapped++; if (need_resched()) { - slot = radix_tree_iter_resume(slot, &iter); + xas_pause(&xas); cond_resched_rcu(); } } @@ -788,7 +763,7 @@ void shmem_unlock_mapping(struct address_space *mapping) } /* - * Remove range of pages and swap entries from radix tree, and free them. + * Remove range of pages and swap entries from page cache, and free them. * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. */ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, @@ -824,7 +799,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (index >= end) break; - if (radix_tree_exceptional_entry(page)) { + if (xa_is_value(page)) { if (unfalloc) continue; nr_swaps_freed += !shmem_free_swap(mapping, @@ -921,7 +896,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (index >= end) break; - if (radix_tree_exceptional_entry(page)) { + if (xa_is_value(page)) { if (unfalloc) continue; if (shmem_free_swap(mapping, index, page)) { @@ -1110,34 +1085,27 @@ static void shmem_evict_inode(struct inode *inode) clear_inode(inode); } -static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) +static unsigned long find_swap_entry(struct xarray *xa, void *item) { - struct radix_tree_iter iter; - void __rcu **slot; - unsigned long found = -1; + XA_STATE(xas, xa, 0); unsigned int checked = 0; + void *entry; rcu_read_lock(); - radix_tree_for_each_slot(slot, root, &iter, 0) { - void *entry = radix_tree_deref_slot(slot); - - if (radix_tree_deref_retry(entry)) { - slot = radix_tree_iter_retry(&iter); + xas_for_each(&xas, entry, ULONG_MAX) { + if (xas_retry(&xas, entry)) continue; - } - if (entry == item) { - found = iter.index; + if (entry == item) break; - } checked++; - if ((checked % 4096) != 0) + if ((checked % XA_CHECK_SCHED) != 0) continue; - slot = radix_tree_iter_resume(slot, &iter); + xas_pause(&xas); cond_resched_rcu(); } - rcu_read_unlock(); - return found; + + return entry ? xas.xa_index : -1; } /* @@ -1175,10 +1143,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, * We needed to drop mutex to make that restrictive page * allocation, but the inode might have been freed while we * dropped it: although a racing shmem_evict_inode() cannot - * complete without emptying the radix_tree, our page lock + * complete without emptying the page cache, our page lock * on this swapcache page is not enough to prevent that - * free_swap_and_cache() of our swap entry will only - * trylock_page(), removing swap from radix_tree whatever. + * trylock_page(), removing swap from page cache whatever. * * We must not proceed to shmem_add_to_page_cache() if the * inode has been freed, but of course we cannot rely on @@ -1200,7 +1168,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, */ if (!error) error = shmem_add_to_page_cache(*pagep, mapping, index, - radswap); + radswap, gfp); if (error != -ENOMEM) { /* * Truncation and eviction use free_swap_and_cache(), which @@ -1244,7 +1212,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) &memcg, false); if (error) goto out; - /* No radix_tree_preload: swap entry keeps a place for page in tree */ + /* No memory allocation: swap entry occupies the slot for the page */ error = -EAGAIN; mutex_lock(&shmem_swaplist_mutex); @@ -1453,23 +1421,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; - struct inode *inode = &info->vfs_inode; - struct address_space *mapping = inode->i_mapping; - pgoff_t idx, hindex; - void __rcu **results; + struct address_space *mapping = info->vfs_inode.i_mapping; + pgoff_t hindex; struct page *page; if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) return NULL; hindex = round_down(index, HPAGE_PMD_NR); - rcu_read_lock(); - if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx, - hindex, 1) && idx < hindex + HPAGE_PMD_NR) { - rcu_read_unlock(); + if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, + XA_PRESENT)) return NULL; - } - rcu_read_unlock(); shmem_pseudo_vma_init(&pvma, info, hindex); page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, @@ -1578,8 +1540,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, * a nice clean interface for us to replace oldpage by newpage there. */ xa_lock_irq(&swap_mapping->i_pages); - error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, - newpage); + error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); if (!error) { __inc_node_page_state(newpage, NR_FILE_PAGES); __dec_node_page_state(oldpage, NR_FILE_PAGES); @@ -1643,7 +1604,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, repeat: swap.val = 0; page = find_lock_entry(mapping, index); - if (radix_tree_exceptional_entry(page)) { + if (xa_is_value(page)) { swap = radix_to_swp_entry(page); page = NULL; } @@ -1718,7 +1679,7 @@ repeat: false); if (!error) { error = shmem_add_to_page_cache(page, mapping, index, - swp_to_radix_entry(swap)); + swp_to_radix_entry(swap), gfp); /* * We already confirmed swap under page lock, and make * no memory allocation here, so usually no possibility @@ -1824,13 +1785,8 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, PageTransHuge(page)); if (error) goto unacct; - error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK, - compound_order(page)); - if (!error) { - error = shmem_add_to_page_cache(page, mapping, hindex, - NULL); - radix_tree_preload_end(); - } + error = shmem_add_to_page_cache(page, mapping, hindex, + NULL, gfp & GFP_RECLAIM_MASK); if (error) { mem_cgroup_cancel_charge(page, memcg, PageTransHuge(page)); @@ -1931,7 +1887,7 @@ unlock: spin_unlock_irq(&info->lock); goto repeat; } - if (error == -EEXIST) /* from above or from radix_tree_insert */ + if (error == -EEXIST) goto repeat; return error; } @@ -2299,11 +2255,8 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (ret) goto out_release; - ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); - if (!ret) { - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL); - radix_tree_preload_end(); - } + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, + gfp & GFP_RECLAIM_MASK); if (ret) goto out_release_uncharge; @@ -2548,7 +2501,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } /* - * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. + * llseek SEEK_DATA or SEEK_HOLE through the page cache. */ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, pgoff_t index, pgoff_t end, int whence) @@ -2578,7 +2531,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, index = indices[i]; } page = pvec.pages[i]; - if (page && !radix_tree_exceptional_entry(page)) { + if (page && !xa_is_value(page)) { if (!PageUptodate(page)) page = NULL; } |