diff options
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r-- | fs/erofs/zdata.c | 254 |
1 files changed, 169 insertions, 85 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3851e1a64f73..78e4b598ecca 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -11,6 +11,93 @@ #include <trace/events/erofs.h> /* + * since pclustersize is variable for big pcluster feature, introduce slab + * pools implementation for different pcluster sizes. + */ +struct z_erofs_pcluster_slab { + struct kmem_cache *slab; + unsigned int maxpages; + char name[48]; +}; + +#define _PCLP(n) { .maxpages = n } + +static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { + _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), + _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) +}; + +static void z_erofs_destroy_pcluster_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + if (!pcluster_pool[i].slab) + continue; + kmem_cache_destroy(pcluster_pool[i].slab); + pcluster_pool[i].slab = NULL; + } +} + +static int z_erofs_create_pcluster_pool(void) +{ + struct z_erofs_pcluster_slab *pcs; + struct z_erofs_pcluster *a; + unsigned int size; + + for (pcs = pcluster_pool; + pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) { + size = struct_size(a, compressed_pages, pcs->maxpages); + + sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages); + pcs->slab = kmem_cache_create(pcs->name, size, 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (pcs->slab) + continue; + + z_erofs_destroy_pcluster_pool(); + return -ENOMEM; + } + return 0; +} + +static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + struct z_erofs_pcluster *pcl; + + if (nrpages > pcs->maxpages) + continue; + + pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + if (!pcl) + return ERR_PTR(-ENOMEM); + pcl->pclusterpages = nrpages; + return pcl; + } + return ERR_PTR(-EINVAL); +} + +static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + + if (pcl->pclusterpages > pcs->maxpages) + continue; + + kmem_cache_free(pcs->slab, pcl); + return; + } + DBG_BUGON(1); +} + +/* * a compressed_pages[] placeholder in order to avoid * being filled with file pages for in-place decompression. */ @@ -37,12 +124,11 @@ typedef tagptr1_t compressed_page_t; tagptr_fold(compressed_page_t, page, 1) static struct workqueue_struct *z_erofs_workqueue __read_mostly; -static struct kmem_cache *pcluster_cachep __read_mostly; void z_erofs_exit_zip_subsystem(void) { destroy_workqueue(z_erofs_workqueue); - kmem_cache_destroy(pcluster_cachep); + z_erofs_destroy_pcluster_pool(); } static inline int z_erofs_init_workqueue(void) @@ -59,32 +145,16 @@ static inline int z_erofs_init_workqueue(void) return z_erofs_workqueue ? 0 : -ENOMEM; } -static void z_erofs_pcluster_init_once(void *ptr) -{ - struct z_erofs_pcluster *pcl = ptr; - struct z_erofs_collection *cl = z_erofs_primarycollection(pcl); - unsigned int i; - - mutex_init(&cl->lock); - cl->nr_pages = 0; - cl->vcnt = 0; - for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i) - pcl->compressed_pages[i] = NULL; -} - int __init z_erofs_init_zip_subsystem(void) { - pcluster_cachep = kmem_cache_create("erofs_compress", - Z_EROFS_WORKGROUP_SIZE, 0, - SLAB_RECLAIM_ACCOUNT, - z_erofs_pcluster_init_once); - if (pcluster_cachep) { - if (!z_erofs_init_workqueue()) - return 0; - - kmem_cache_destroy(pcluster_cachep); - } - return -ENOMEM; + int err = z_erofs_create_pcluster_pool(); + + if (err) + return err; + err = z_erofs_init_workqueue(); + if (err) + z_erofs_destroy_pcluster_pool(); + return err; } enum z_erofs_collectmode { @@ -104,6 +174,12 @@ enum z_erofs_collectmode { * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| */ COLLECT_PRIMARY_HOOKED, + /* + * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it + * could be dispatched into bypass queue later due to uptodated managed + * pages. All related online pages cannot be reused for inplace I/O (or + * pagevec) since it can be directly decoded without I/O submission. + */ COLLECT_PRIMARY_FOLLOWED_NOINPLACE, /* * The current collection has been linked with the owned chain, and @@ -128,7 +204,8 @@ struct z_erofs_collector { struct z_erofs_pcluster *pcl, *tailpcl; struct z_erofs_collection *cl; - struct page **compressedpages; + /* a pointer used to pick up inplace I/O pages */ + struct page **icpage_ptr; z_erofs_next_pcluster_t owned_head; enum z_erofs_collectmode mode; @@ -162,18 +239,19 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, enum z_erofs_cache_alloctype type, struct list_head *pagepool) { - const struct z_erofs_pcluster *pcl = clt->pcl; - const unsigned int clusterpages = BIT(pcl->clusterbits); - struct page **pages = clt->compressedpages; - pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages); + struct z_erofs_pcluster *pcl = clt->pcl; bool standalone = true; gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; + struct page **pages; + pgoff_t index; if (clt->mode < COLLECT_PRIMARY_FOLLOWED) return; - for (; pages < pcl->compressed_pages + clusterpages; ++pages) { + pages = pcl->compressed_pages; + index = pcl->obj.index; + for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) { struct page *page; compressed_page_t t; struct page *newpage = NULL; @@ -186,21 +264,25 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, if (page) { t = tag_compressed_page_justfound(page); - } else if (type == DELAYEDALLOC) { - t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED); - } else if (type == TRYALLOC) { - newpage = erofs_allocpage(pagepool, gfp); - if (!newpage) - goto dontalloc; - - set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); - t = tag_compressed_page_justfound(newpage); - } else { /* DONTALLOC */ -dontalloc: - if (standalone) - clt->compressedpages = pages; + } else { + /* I/O is needed, no possible to decompress directly */ standalone = false; - continue; + switch (type) { + case DELAYEDALLOC: + t = tagptr_init(compressed_page_t, + PAGE_UNALLOCATED); + break; + case TRYALLOC: + newpage = erofs_allocpage(pagepool, gfp); + if (!newpage) + continue; + set_page_private(newpage, + Z_EROFS_PREALLOCATED_PAGE); + t = tag_compressed_page_justfound(newpage); + break; + default: /* DONTALLOC */ + continue; + } } if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t))) @@ -214,7 +296,11 @@ dontalloc: } } - if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */ + /* + * don't do inplace I/O if all compressed pages are available in + * managed cache since it can be moved to the bypass queue instead. + */ + if (standalone) clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } @@ -225,14 +311,13 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); struct address_space *const mapping = MNGD_MAPPING(sbi); - const unsigned int clusterpages = BIT(pcl->clusterbits); int i; /* * refcount of workgroup is now freezed as 1, * therefore no need to worry about available decompression users. */ - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { struct page *page = pcl->compressed_pages[i]; if (!page) @@ -257,13 +342,12 @@ int erofs_try_to_free_cached_page(struct address_space *mapping, struct page *page) { struct z_erofs_pcluster *const pcl = (void *)page_private(page); - const unsigned int clusterpages = BIT(pcl->clusterbits); int ret = 0; /* 0 - busy */ if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { unsigned int i; - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { if (pcl->compressed_pages[i] == page) { WRITE_ONCE(pcl->compressed_pages[i], NULL); ret = 1; @@ -279,16 +363,14 @@ int erofs_try_to_free_cached_page(struct address_space *mapping, } /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, - struct page *page) +static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, + struct page *page) { struct z_erofs_pcluster *const pcl = clt->pcl; - const unsigned int clusterpages = BIT(pcl->clusterbits); - while (clt->compressedpages < pcl->compressed_pages + clusterpages) { - if (!cmpxchg(clt->compressedpages++, NULL, page)) + while (clt->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--clt->icpage_ptr, NULL, page)) return true; - } return false; } @@ -399,10 +481,10 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, struct erofs_workgroup *grp; int err; - /* no available workgroup, let's allocate one */ - pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS); - if (!pcl) - return -ENOMEM; + /* no available pcluster, let's allocate one */ + pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT); + if (IS_ERR(pcl)) + return PTR_ERR(pcl); atomic_set(&pcl->obj.refcount, 1); pcl->obj.index = map->m_pa >> PAGE_SHIFT; @@ -416,25 +498,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, else pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; - pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0]; - pcl->clusterbits -= PAGE_SHIFT; - /* new pclusters should be claimed as type 1, primary and followed */ pcl->next = clt->owned_head; clt->mode = COLLECT_PRIMARY_FOLLOWED; cl = z_erofs_primarycollection(pcl); - - /* must be cleaned before freeing to slab */ - DBG_BUGON(cl->nr_pages); - DBG_BUGON(cl->vcnt); - cl->pageofs = map->m_la & ~PAGE_MASK; /* * lock all primary followed works before visible to others * and mutex_trylock *never* fails for a new pcluster. */ + mutex_init(&cl->lock); DBG_BUGON(!mutex_trylock(&cl->lock)); grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj); @@ -458,7 +533,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, err_out: mutex_unlock(&cl->lock); - kmem_cache_free(pcluster_cachep, pcl); + z_erofs_free_pcluster(pcl); return err; } @@ -502,9 +577,8 @@ out: z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, clt->cl->pagevec, clt->cl->vcnt); - clt->compressedpages = clt->pcl->compressed_pages; - if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */ - clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES; + /* since file-backed online pages are traversed in reverse order */ + clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages; return 0; } @@ -517,9 +591,8 @@ static void z_erofs_rcu_callback(struct rcu_head *head) struct z_erofs_collection *const cl = container_of(head, struct z_erofs_collection, rcu); - kmem_cache_free(pcluster_cachep, - container_of(cl, struct z_erofs_pcluster, - primary_collection)); + z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster, + primary_collection)); } void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) @@ -706,9 +779,12 @@ err_out: goto out; } +static void z_erofs_decompressqueue_work(struct work_struct *work); static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, bool sync, int bios) { + struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + /* wake up the caller thread for sync decompression */ if (sync) { unsigned long flags; @@ -720,8 +796,15 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, return; } - if (!atomic_add_return(bios, &io->pending_bios)) + if (atomic_add_return(bios, &io->pending_bios)) + return; + /* Use workqueue and sync decompression for atomic contexts only */ + if (in_atomic() || irqs_disabled()) { queue_work(z_erofs_workqueue, &io->u.work); + sbi->ctx.readahead_sync_decompress = true; + return; + } + z_erofs_decompressqueue_work(&io->u.work); } static bool z_erofs_page_is_invalidated(struct page *page) @@ -761,9 +844,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, struct list_head *pagepool) { struct erofs_sb_info *const sbi = EROFS_SB(sb); - const unsigned int clusterpages = BIT(pcl->clusterbits); struct z_erofs_pagevec_ctor ctor; - unsigned int i, outputsize, llen, nr_pages; + unsigned int i, inputsize, outputsize, llen, nr_pages; struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; struct page **pages, **compressed_pages, *page; @@ -843,7 +925,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, overlapped = false; compressed_pages = pcl->compressed_pages; - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { unsigned int pagenr; page = compressed_pages[i]; @@ -896,12 +978,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, partial = true; } + inputsize = pcl->pclusterpages * PAGE_SIZE; err = z_erofs_decompress(&(struct z_erofs_decompress_req) { .sb = sb, .in = compressed_pages, .out = pages, .pageofs_out = cl->pageofs, - .inputsize = PAGE_SIZE, + .inputsize = inputsize, .outputsize = outputsize, .alg = pcl->algorithmformat, .inplace_io = overlapped, @@ -909,8 +992,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, }, pagepool); out: - /* must handle all compressed pages before endding pages */ - for (i = 0; i < clusterpages; ++i) { + /* must handle all compressed pages before ending pages */ + for (i = 0; i < pcl->pclusterpages; ++i) { page = compressed_pages[i]; if (erofs_page_is_managed(sbi, page)) @@ -1213,7 +1296,7 @@ static void z_erofs_submit_queue(struct super_block *sb, pcl = container_of(owned_head, struct z_erofs_pcluster, next); cur = pcl->obj.index; - end = cur + BIT(pcl->clusterbits); + end = cur + pcl->pclusterpages; /* close the main owned chain at first */ owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, @@ -1333,7 +1416,8 @@ static void z_erofs_readahead(struct readahead_control *rac) struct erofs_sb_info *const sbi = EROFS_I_SB(inode); unsigned int nr_pages = readahead_count(rac); - bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages); + bool sync = (sbi->ctx.readahead_sync_decompress && + nr_pages <= sbi->ctx.max_sync_decompress_pages); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); struct page *page, *head = NULL; LIST_HEAD(pagepool); |