diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/f2fs/compress.c | 229 | ||||
-rw-r--r-- | fs/f2fs/data.c | 82 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 2 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 102 | ||||
-rw-r--r-- | fs/f2fs/file.c | 79 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 11 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 21 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 3 | ||||
-rw-r--r-- | fs/f2fs/node.c | 14 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 79 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 11 | ||||
-rw-r--r-- | fs/f2fs/super.c | 90 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 56 |
13 files changed, 526 insertions, 253 deletions
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 009e6c519e98..70e97075e535 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -729,14 +729,19 @@ out: return ret; } -void f2fs_decompress_cluster(struct decompress_io_ctx *dic) +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc); +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc); + +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; + bool bypass_callback = false; int ret; - int i; trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, dic->cluster_size, fi->i_compress_algorithm); @@ -746,41 +751,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) goto out_end_io; } - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); - if (!dic->tpages) { - ret = -ENOMEM; - goto out_end_io; - } - - for (i = 0; i < dic->cluster_size; i++) { - if (dic->rpages[i]) { - dic->tpages[i] = dic->rpages[i]; - continue; - } - - dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) { - ret = -ENOMEM; - goto out_end_io; - } - } - - if (cops->init_decompress_ctx) { - ret = cops->init_decompress_ctx(dic); - if (ret) - goto out_end_io; - } - - dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); - if (!dic->rbuf) { - ret = -ENOMEM; - goto out_destroy_decompress_ctx; - } - - dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); - if (!dic->cbuf) { - ret = -ENOMEM; - goto out_vunmap_rbuf; + ret = f2fs_prepare_decomp_mem(dic, false); + if (ret) { + bypass_callback = true; + goto out_release; } dic->clen = le32_to_cpu(dic->cbuf->clen); @@ -788,7 +762,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { ret = -EFSCORRUPTED; - goto out_vunmap_cbuf; + goto out_release; } ret = cops->decompress_pages(dic); @@ -809,17 +783,13 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) } } -out_vunmap_cbuf: - vm_unmap_ram(dic->cbuf, dic->nr_cpages); -out_vunmap_rbuf: - vm_unmap_ram(dic->rbuf, dic->cluster_size); -out_destroy_decompress_ctx: - if (cops->destroy_decompress_ctx) - cops->destroy_decompress_ctx(dic); +out_release: + f2fs_release_decomp_mem(dic, bypass_callback, false); + out_end_io: trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, dic->clen, ret); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, in_task); } /* @@ -829,7 +799,7 @@ out_end_io: * (or in the case of a failure, cleans up without actually decompressing). */ void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr) + block_t blkaddr, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); @@ -839,12 +809,12 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, if (failed) WRITE_ONCE(dic->failed, true); - else if (blkaddr) + else if (blkaddr && in_task) f2fs_cache_compressed_page(sbi, page, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, in_task); } static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -871,19 +841,26 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) return is_page_in_cluster(cc, index); } -bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, - int index, int nr_pages) +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, + int index, int nr_pages, bool uptodate) { - unsigned long pgidx; - int i; + unsigned long pgidx = pages[index]->index; + int i = uptodate ? 0 : 1; - if (nr_pages - index < cc->cluster_size) + /* + * when uptodate set to true, try to check all pages in cluster is + * uptodate or not. + */ + if (uptodate && (pgidx % cc->cluster_size)) return false; - pgidx = pvec->pages[index]->index; + if (nr_pages - index < cc->cluster_size) + return false; - for (i = 1; i < cc->cluster_size; i++) { - if (pvec->pages[index + i]->index != pgidx + i) + for (; i < cc->cluster_size; i++) { + if (pages[index + i]->index != pgidx + i) + return false; + if (uptodate && !PageUptodate(pages[index + i])) return false; } @@ -1552,16 +1529,85 @@ destroy_out: return err; } -static void f2fs_free_dic(struct decompress_io_ctx *dic); +static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, + bool pre_alloc) +{ + return pre_alloc ^ f2fs_low_mem_mode(sbi); +} + +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + int i; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return 0; + + dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + if (!dic->tpages) + return -ENOMEM; + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) { + dic->tpages[i] = dic->rpages[i]; + continue; + } + + dic->tpages[i] = f2fs_compress_alloc_page(); + if (!dic->tpages[i]) + return -ENOMEM; + } + + dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); + if (!dic->rbuf) + return -ENOMEM; + + dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); + if (!dic->cbuf) + return -ENOMEM; + + if (cops->init_decompress_ctx) { + int ret = cops->init_decompress_ctx(dic); + + if (ret) + return ret; + } + + return 0; +} + +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return; + + if (!bypass_destroy_callback && cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); + + if (dic->cbuf) + vm_unmap_ram(dic->cbuf, dic->nr_cpages); + + if (dic->rbuf) + vm_unmap_ram(dic->rbuf, dic->cluster_size); +} + +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) { struct decompress_io_ctx *dic; pgoff_t start_idx = start_idx_of_cluster(cc); - int i; + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + int i, ret; - dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, - false, F2FS_I_SB(cc->inode)); + dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, false, sbi); if (!dic) return ERR_PTR(-ENOMEM); @@ -1587,32 +1633,43 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->nr_rpages = cc->cluster_size; dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); - if (!dic->cpages) + if (!dic->cpages) { + ret = -ENOMEM; goto out_free; + } for (i = 0; i < dic->nr_cpages; i++) { struct page *page; page = f2fs_compress_alloc_page(); - if (!page) + if (!page) { + ret = -ENOMEM; goto out_free; + } f2fs_set_compressed_page(page, cc->inode, start_idx + i + 1, dic); dic->cpages[i] = page; } + ret = f2fs_prepare_decomp_mem(dic, true); + if (ret) + goto out_free; + return dic; out_free: - f2fs_free_dic(dic); - return ERR_PTR(-ENOMEM); + f2fs_free_dic(dic, true); + return ERR_PTR(ret); } -static void f2fs_free_dic(struct decompress_io_ctx *dic) +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback) { int i; + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + if (dic->tpages) { for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) @@ -1637,17 +1694,33 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic) kmem_cache_free(dic_entry_slab, dic); } -static void f2fs_put_dic(struct decompress_io_ctx *dic) +static void f2fs_late_free_dic(struct work_struct *work) { - if (refcount_dec_and_test(&dic->refcnt)) - f2fs_free_dic(dic); + struct decompress_io_ctx *dic = + container_of(work, struct decompress_io_ctx, free_work); + + f2fs_free_dic(dic, false); +} + +static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) +{ + if (refcount_dec_and_test(&dic->refcnt)) { + if (in_task) { + f2fs_free_dic(dic, false); + } else { + INIT_WORK(&dic->free_work, f2fs_late_free_dic); + queue_work(F2FS_I_SB(dic->inode)->post_read_wq, + &dic->free_work); + } + } } /* * Update and unlock the cluster's pagecache pages, and release the reference to * the decompress_io_ctx that was being held for I/O completion. */ -static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { int i; @@ -1668,7 +1741,7 @@ static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) unlock_page(rpage); } - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } static void f2fs_verify_cluster(struct work_struct *work) @@ -1685,14 +1758,15 @@ static void f2fs_verify_cluster(struct work_struct *work) SetPageError(rpage); } - __f2fs_decompress_end_io(dic, false); + __f2fs_decompress_end_io(dic, false, true); } /* * This is called when a compressed cluster has been decompressed * (or failed to be read and/or decompressed). */ -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { if (!failed && dic->need_verity) { /* @@ -1704,7 +1778,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) INIT_WORK(&dic->verity_work, f2fs_verify_cluster); fsverity_enqueue_verify_work(&dic->verity_work); } else { - __f2fs_decompress_end_io(dic, failed); + __f2fs_decompress_end_io(dic, failed, in_task); } } @@ -1713,12 +1787,12 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) * * This is called when the page is no longer needed and can be freed. */ -void f2fs_put_page_dic(struct page *page) +void f2fs_put_page_dic(struct page *page, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } /* @@ -1903,6 +1977,9 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; char slab_name[32]; + if (!f2fs_sb_has_compression(sbi)) + return 0; + sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev)); sbi->page_array_slab_size = sizeof(struct page *) << diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ed503d50f95f..aa3ccddfa037 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -119,7 +119,7 @@ struct bio_post_read_ctx { block_t fs_blkaddr; }; -static void f2fs_finish_read_bio(struct bio *bio) +static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -133,8 +133,9 @@ static void f2fs_finish_read_bio(struct bio *bio) if (f2fs_is_compressed_page(page)) { if (bio->bi_status) - f2fs_end_read_compressed_page(page, true, 0); - f2fs_put_page_dic(page); + f2fs_end_read_compressed_page(page, true, 0, + in_task); + f2fs_put_page_dic(page, in_task); continue; } @@ -191,7 +192,7 @@ static void f2fs_verify_bio(struct work_struct *work) fsverity_verify_bio(bio); } - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, true); } /* @@ -203,7 +204,7 @@ static void f2fs_verify_bio(struct work_struct *work) * can involve reading verity metadata pages from the file, and these verity * metadata pages may be encrypted and/or compressed. */ -static void f2fs_verify_and_finish_bio(struct bio *bio) +static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) { struct bio_post_read_ctx *ctx = bio->bi_private; @@ -211,7 +212,7 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) INIT_WORK(&ctx->work, f2fs_verify_bio); fsverity_enqueue_verify_work(&ctx->work); } else { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, in_task); } } @@ -224,7 +225,8 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) * that the bio includes at least one compressed page. The actual decompression * is done on a per-cluster basis, not a per-bio basis. */ -static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) +static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, + bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -237,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) f2fs_end_read_compressed_page(page, PageError(page), - blkaddr); + blkaddr, in_task); else all_compressed = false; @@ -262,15 +264,16 @@ static void f2fs_post_read_work(struct work_struct *work) fscrypt_decrypt_bio(ctx->bio); if (ctx->enabled_steps & STEP_DECOMPRESS) - f2fs_handle_step_decompress(ctx); + f2fs_handle_step_decompress(ctx, true); - f2fs_verify_and_finish_bio(ctx->bio); + f2fs_verify_and_finish_bio(ctx->bio, true); } static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); struct bio_post_read_ctx *ctx; + bool intask = in_task(); iostat_update_and_unbind_ctx(bio, 0); ctx = bio->bi_private; @@ -281,16 +284,29 @@ static void f2fs_read_end_io(struct bio *bio) } if (bio->bi_status) { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, intask); return; } - if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { - INIT_WORK(&ctx->work, f2fs_post_read_work); - queue_work(ctx->sbi->post_read_wq, &ctx->work); - } else { - f2fs_verify_and_finish_bio(bio); + if (ctx) { + unsigned int enabled_steps = ctx->enabled_steps & + (STEP_DECRYPT | STEP_DECOMPRESS); + + /* + * If we have only decompression step between decompression and + * decrypt, we don't need post processing for this. + */ + if (enabled_steps == STEP_DECOMPRESS && + !f2fs_low_mem_mode(sbi)) { + f2fs_handle_step_decompress(ctx, intask); + } else if (enabled_steps) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + queue_work(ctx->sbi->post_read_wq, &ctx->work); + return; + } } + + f2fs_verify_and_finish_bio(bio, intask); } static void f2fs_write_end_io(struct bio *bio) @@ -1682,8 +1698,6 @@ sync_out: */ f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk); if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; @@ -2223,7 +2237,7 @@ skip_reading_dnode: if (f2fs_load_compressed_page(sbi, page, blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, true); continue; } @@ -2241,7 +2255,7 @@ submit_and_realloc: page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, true); f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; @@ -2731,6 +2745,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .submitted = false, .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, + .post_read = f2fs_post_read_required(inode), .io_type = io_type, .io_wbc = wbc, .bio = bio, @@ -2902,7 +2917,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0, retry = 0; - struct pagevec pvec; + struct page *pages[F2FS_ONSTACK_PAGES]; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; @@ -2933,8 +2948,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int submitted = 0; int i; - pagevec_init(&pvec); - if (get_dirty_pages(mapping->host) <= SM_I(F2FS_M_SB(mapping))->min_hot_blocks) set_inode_flag(mapping->host, FI_HOT_DATA); @@ -2960,13 +2973,13 @@ retry: tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && !retry && (index <= end)) { - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, - tag); + nr_pages = find_get_pages_range_tag(mapping, &index, end, + tag, F2FS_ONSTACK_PAGES, pages); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; + struct page *page = pages[i]; bool need_readd; readd: need_readd = false; @@ -2997,6 +3010,10 @@ readd: if (!f2fs_cluster_is_empty(&cc)) goto lock_page; + if (f2fs_all_cluster_page_ready(&cc, + pages, i, nr_pages, true)) + goto lock_page; + ret2 = f2fs_prepare_compress_overwrite( inode, &pagep, page->index, &fsdata); @@ -3007,8 +3024,8 @@ readd: } else if (ret2 && (!f2fs_compress_write_end(inode, fsdata, page->index, 1) || - !f2fs_all_cluster_page_loaded(&cc, - &pvec, i, nr_pages))) { + !f2fs_all_cluster_page_ready(&cc, + pages, i, nr_pages, false))) { retry = 1; break; } @@ -3098,7 +3115,7 @@ next: if (need_readd) goto readd; } - pagevec_release(&pvec); + release_pages(pages, nr_pages); cond_resched(); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -3408,12 +3425,11 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, struct inode *cow_inode = F2FS_I(inode)->cow_inode; pgoff_t index = page->index; int err = 0; - block_t ori_blk_addr; + block_t ori_blk_addr = NULL_ADDR; /* If pos is beyond the end of file, reserve a new block in COW inode */ if ((pos & PAGE_MASK) >= i_size_read(inode)) - return __reserve_data_block(cow_inode, index, blk_addr, - node_changed); + goto reserve_block; /* Look for the block in COW inode first */ err = __find_data_block(cow_inode, index, blk_addr); @@ -3427,10 +3443,12 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, if (err) return err; +reserve_block: /* Finally, we should reserve a new block in COW inode for the update */ err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); if (err) return err; + inc_atomic_write_cnt(inode); if (ori_blk_addr != NULL_ADDR) *blk_addr = ori_blk_addr; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c92625ef16d0..c01471573977 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,7 +39,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = BLKS_PER_SEC(sbi); + blks_per_sec = CAP_BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, true); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 85921a1f2db8..3c7cdb70fe2e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,6 +159,7 @@ struct f2fs_mount_info { int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ + int memory_mode; /* memory mode */ int discard_unit; /* * discard command's offset/size should * be aligned to this unit: block, @@ -229,7 +230,6 @@ enum { #define CP_PAUSE 0x00000040 #define CP_RESIZE 0x00000080 -#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ @@ -598,6 +598,8 @@ enum { #define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS #define RECOVERY_MIN_RA_BLOCKS 1 +#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -757,6 +759,7 @@ enum { FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ FI_COMPRESS_RELEASED, /* compressed blocks were released */ FI_ALIGNED_WRITE, /* enable aligned write */ + FI_COW_FILE, /* indicate COW file */ FI_MAX, /* max flag, never be used */ }; @@ -812,6 +815,8 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned short i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + + unsigned int atomic_write_cnt; }; static inline void get_extent_info(struct extent_info *ext, @@ -1198,6 +1203,7 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ int compr_blocks; /* # of compressed block addresses */ bool encrypted; /* indicate file is encrypted */ + bool post_read; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1234,7 +1240,6 @@ struct f2fs_dev_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int nr_blkz; /* Total number of zones */ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ - block_t *zone_capacity_blocks; /* Array of zone capacity in blks */ #endif }; @@ -1360,6 +1365,13 @@ enum { DISCARD_UNIT_SECTION, /* basic discard unit is section */ }; +enum { + MEMORY_MODE_NORMAL, /* memory mode for normal devices */ + MEMORY_MODE_LOW, /* memory mode for low memry devices */ +}; + + + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -1580,6 +1592,7 @@ struct decompress_io_ctx { void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ + struct work_struct free_work; /* work for late free this structure itself */ }; #define NULL_CLUSTER ((unsigned int)(~0)) @@ -1664,6 +1677,7 @@ struct f2fs_sb_info { unsigned int meta_ino_num; /* meta inode number*/ unsigned int log_blocks_per_seg; /* log2 blocks per segment */ unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ @@ -1804,6 +1818,12 @@ struct f2fs_sb_info { int max_fragment_chunk; /* max chunk size for block fragmentation mode */ int max_fragment_hole; /* max hole size for block fragmentation mode */ + /* For atomic write statistics */ + atomic64_t current_atomic_write; + s64 peak_atomic_write; + u64 committed_atomic_block; + u64 revoked_atomic_block; + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -2418,6 +2438,28 @@ static inline void inode_dec_dirty_pages(struct inode *inode) dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } +static inline void inc_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + u64 current_write; + + fi->atomic_write_cnt++; + atomic64_inc(&sbi->current_atomic_write); + current_write = atomic64_read(&sbi->current_atomic_write); + if (current_write > sbi->peak_atomic_write) + sbi->peak_atomic_write = current_write; +} + +static inline void release_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + atomic64_sub(fi->atomic_write_cnt, &sbi->current_atomic_write); + fi->atomic_write_cnt = 0; +} + static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) { return atomic_read(&sbi->nr_pages[count_type]); @@ -2696,16 +2738,6 @@ static inline struct page *f2fs_pagecache_get_page( return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_copy_page(struct page *src, struct page *dst) -{ - char *src_kaddr = kmap(src); - char *dst_kaddr = kmap(dst); - - memcpy(dst_kaddr, src_kaddr, PAGE_SIZE); - kunmap(dst); - kunmap(src); -} - static inline void f2fs_put_page(struct page *page, int unlock) { if (!page) @@ -3208,6 +3240,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode) return is_inode_flag_set(inode, FI_ATOMIC_FILE); } +static inline bool f2fs_is_cow_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_COW_FILE); +} + static inline bool f2fs_is_first_block_written(struct inode *inode) { return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); @@ -4154,13 +4191,13 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); -void f2fs_decompress_cluster(struct decompress_io_ctx *dic); +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr); + block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); -bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, - int index, int nr_pages); +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, + int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -4175,8 +4212,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); -void f2fs_put_page_dic(struct page *page); +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task); +void f2fs_put_page_dic(struct page *page, bool in_task); unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); @@ -4222,13 +4260,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) } static inline int f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } -static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { } +static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, + bool in_task) { } static inline void f2fs_end_read_compressed_page(struct page *page, - bool failed, block_t blkaddr) + bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); } -static inline void f2fs_put_page_dic(struct page *page) +static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } @@ -4254,8 +4293,9 @@ static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, unsigned int c_len) { } #endif -static inline void set_compress_context(struct inode *inode) +static inline int set_compress_context(struct inode *inode) { +#ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); F2FS_I(inode)->i_compress_algorithm = @@ -4278,6 +4318,10 @@ static inline void set_compress_context(struct inode *inode) stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); f2fs_mark_inode_dirty_sync(inode, true); + return 0; +#else + return -EOPNOTSUPP; +#endif } static inline bool f2fs_disable_compressed_file(struct inode *inode) @@ -4394,10 +4438,15 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; +} + static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || - f2fs_is_atomic_file(inode)) + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode)) return false; return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } @@ -4459,12 +4508,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) return true; - /* - * for blkzoned device, fallback direct IO to buffered IO, so - * all IOs can be serialized by log-structured write. - */ - if (f2fs_sb_has_blkzoned(sbi)) - return true; + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d66e37d80a2d..ce4905a073b3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1272,7 +1272,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } - f2fs_copy_page(psrc, pdst); + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); @@ -1675,7 +1675,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, return 0; if (f2fs_is_pinned_file(inode)) { - block_t sec_blks = BLKS_PER_SEC(sbi); + block_t sec_blks = CAP_BLKS_PER_SEC(sbi); block_t sec_len = roundup(map.m_len, sec_blks); map.m_len = sec_blks; @@ -1816,8 +1816,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) atomic_read(&inode->i_writecount) != 1) return 0; - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); return 0; } @@ -1831,8 +1830,7 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) * until all the writers close its file. Since this should be done * before dropping file lock, it needs to do in ->flush. */ - if (f2fs_is_atomic_file(inode) && - F2FS_I(inode)->atomic_write_task == current) + if (F2FS_I(inode)->atomic_write_task == current) f2fs_abort_atomic_write(inode, true); return 0; } @@ -1867,22 +1865,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (masked_flags & F2FS_COMPR_FL) { if (!f2fs_disable_compressed_file(inode)) return -EINVAL; - } - if (iflags & F2FS_NOCOMP_FL) - return -EINVAL; - if (iflags & F2FS_COMPR_FL) { + } else { if (!f2fs_may_compress(inode)) return -EINVAL; - if (S_ISREG(inode->i_mode) && inode->i_size) + if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) return -EINVAL; - - set_compress_context(inode); + if (set_compress_context(inode)) + return -EOPNOTSUPP; } } - if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { - if (masked_flags & F2FS_COMPR_FL) - return -EINVAL; - } fi->i_flags = iflags | (fi->i_flags & ~mask); f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && @@ -2062,13 +2053,14 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); set_inode_flag(inode, FI_ATOMIC_FILE); - set_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); + set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; stat_update_max_atomic_write(inode); + fi->atomic_write_cnt = 0; out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -2109,6 +2101,30 @@ unlock_out: return ret; } +static int f2fs_ioc_abort_atomic_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + int ret; + + if (!inode_owner_or_capable(mnt_userns, inode)) + return -EACCES; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + f2fs_abort_atomic_write(inode, true); + + inode_unlock(inode); + + mnt_drop_write_file(filp); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return ret; +} + static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2426,7 +2442,7 @@ do_more: ret = -EAGAIN; goto out; } - range->start += BLKS_PER_SEC(sbi); + range->start += CAP_BLKS_PER_SEC(sbi); if (range->start <= end) goto do_more; out: @@ -2551,7 +2567,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; } - sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); + sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can @@ -3897,10 +3913,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) for (i = 0; i < page_len; i++, redirty_idx++) { page = find_lock_page(mapping, redirty_idx); - if (!page) { - ret = -ENOMEM; - break; - } + + /* It will never fail, when page has pinned above */ + f2fs_bug_on(F2FS_I_SB(inode), !page); + set_page_dirty(page); f2fs_put_page(page, 1); f2fs_put_page(page, 0); @@ -3939,6 +3955,11 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4006,6 +4027,11 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4054,9 +4080,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_start_atomic_write(filp); case F2FS_IOC_COMMIT_ATOMIC_WRITE: return f2fs_ioc_commit_atomic_write(filp); + case F2FS_IOC_ABORT_ATOMIC_WRITE: + return f2fs_ioc_abort_atomic_write(filp); case F2FS_IOC_START_VOLATILE_WRITE: case F2FS_IOC_RELEASE_VOLATILE_WRITE: - case F2FS_IOC_ABORT_VOLATILE_WRITE: return -EOPNOTSUPP; case F2FS_IOC_SHUTDOWN: return f2fs_ioc_shutdown(filp, arg); @@ -4725,7 +4752,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_COMMIT_ATOMIC_WRITE: case F2FS_IOC_START_VOLATILE_WRITE: case F2FS_IOC_RELEASE_VOLATILE_WRITE: - case F2FS_IOC_ABORT_VOLATILE_WRITE: + case F2FS_IOC_ABORT_ATOMIC_WRITE: case F2FS_IOC_SHUTDOWN: case FITRIM: case FS_IOC_SET_ENCRYPTION_POLICY: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d5fb426e0747..6da21d405ce1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -150,8 +150,11 @@ do_gc: gc_control.nr_free_secs = foreground ? 1 : 0; /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, &gc_control)) - wait_ms = gc_th->no_gc_sleep_time; + if (f2fs_gc(sbi, &gc_control)) { + /* don't bother wait_ms by foreground gc */ + if (!foreground) + wait_ms = gc_th->no_gc_sleep_time; + } if (foreground) wake_up_all(&gc_th->fggc_wq); @@ -487,7 +490,7 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi, unsigned long long age, u, accu; unsigned long long max_mtime = sit_i->dirty_max_mtime; unsigned long long min_mtime = sit_i->dirty_min_mtime; - unsigned int sec_blocks = BLKS_PER_SEC(sbi); + unsigned int sec_blocks = CAP_BLKS_PER_SEC(sbi); unsigned int vblocks; unsigned int dirty_threshold = max(am->max_candidate_count, am->candidate_ratio * @@ -1487,7 +1490,7 @@ next_step: */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || (!force_migrate && get_valid_blocks(sbi, segno, true) == - BLKS_PER_SEC(sbi))) + CAP_BLKS_PER_SEC(sbi))) return submitted; if (check_valid_map(sbi, segno, off) == 0) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 3fe145e8e594..19b956c2d697 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -120,15 +120,13 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) return free_blks - ovp_blks; } -static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +static inline block_t limit_invalid_user_blocks(block_t user_block_count) { - return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; + return (long)(user_block_count * LIMIT_INVALID_BLOCK) / 100; } -static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +static inline block_t limit_free_user_blocks(block_t reclaimable_user_blocks) { - block_t reclaimable_user_blocks = sbi->user_block_count - - written_block_count(sbi); return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } @@ -163,15 +161,16 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { - block_t invalid_user_blocks = sbi->user_block_count - - written_block_count(sbi); + block_t user_block_count = sbi->user_block_count; + block_t invalid_user_blocks = user_block_count - + written_block_count(sbi); /* * Background GC is triggered with the following conditions. * 1. There are a number of invalid blocks. * 2. There is not enough free space. */ - if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && - free_user_blocks(sbi) < limit_free_user_blocks(sbi)) - return true; - return false; + return (invalid_user_blocks > + limit_invalid_user_blocks(user_block_count) && + free_user_blocks(sbi) < + limit_free_user_blocks(invalid_user_blocks)); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fc55f5bd1fcc..6d11c365d7b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -744,8 +744,7 @@ void f2fs_evict_inode(struct inode *inode) nid_t xnid = F2FS_I(inode)->i_xattr_nid; int err = 0; - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 740c72d088f3..e06a0c478b39 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1292,7 +1292,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) dec_valid_node_count(sbi, dn->inode, !ofs); goto fail; } - f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); + if (unlikely(new_ni.blk_addr != NULL_ADDR)) { + err = -EFSCORRUPTED; + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto fail; + } #endif new_ni.nid = dn->nid; new_ni.ino = dn->inode->i_ino; @@ -1945,7 +1949,6 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; - bool may_dirty = true; /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[NODE]) && @@ -1998,11 +2001,8 @@ continue_unlock: } /* flush dirty inode */ - if (IS_INODE(page) && may_dirty) { - may_dirty = false; - if (flush_dirty_inode(page)) - goto lock_node; - } + if (IS_INODE(page) && flush_dirty_inode(page)) + goto lock_node; write_node: f2fs_wait_on_page_writeback(page, NODE, true, true); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c7afc588cf26..0de21f82d7bc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -190,18 +190,20 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - if (f2fs_is_atomic_file(inode)) { - if (clean) - truncate_inode_pages_final(inode->i_mapping); - clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); - iput(fi->cow_inode); - fi->cow_inode = NULL; - clear_inode_flag(inode, FI_ATOMIC_FILE); + if (!f2fs_is_atomic_file(inode)) + return; - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - sbi->atomic_files--; - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - } + if (clean) + truncate_inode_pages_final(inode->i_mapping); + clear_inode_flag(fi->cow_inode, FI_COW_FILE); + iput(fi->cow_inode); + fi->cow_inode = NULL; + release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_FILE); + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + sbi->atomic_files--; + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@ -335,6 +337,11 @@ next: } out: + if (ret) + sbi->revoked_atomic_block += fi->atomic_write_cnt; + else + sbi->committed_atomic_block += fi->atomic_write_cnt; + __complete_revoke_list(inode, &revoke_list, ret ? true : false); return ret; @@ -728,7 +735,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, get_valid_blocks(sbi, segno, true); f2fs_bug_on(sbi, unlikely(!valid_blocks || - valid_blocks == BLKS_PER_SEC(sbi))); + valid_blocks == CAP_BLKS_PER_SEC(sbi))); if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); @@ -764,7 +771,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); if (!valid_blocks || - valid_blocks == BLKS_PER_SEC(sbi)) { + valid_blocks == CAP_BLKS_PER_SEC(sbi)) { clear_bit(secno, dirty_i->dirty_secmap); return; } @@ -3166,7 +3173,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || - f2fs_is_atomic_file(inode)) + f2fs_is_cow_file(inode)) return CURSEG_HOT_DATA; return f2fs_rw_hint_to_seg_type(inode->i_write_hint); } else { @@ -3433,7 +3440,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - invalidate_mapping_pages(META_MAPPING(sbi), + if (fio->post_read) + invalidate_mapping_pages(META_MAPPING(sbi), fio->new_blkaddr, fio->new_blkaddr); stat_inc_inplace_blocks(fio->sbi); @@ -3616,10 +3624,16 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; + if (!f2fs_post_read_required(inode)) + return; + for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); + + invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) @@ -4362,6 +4376,12 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) return err; seg_info_from_raw_sit(se, &sit); + if (se->type >= NR_PERSISTENT_LOG) { + f2fs_err(sbi, "Invalid segment type: %u, segno: %u", + se->type, start); + return -EFSCORRUPTED; + } + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (f2fs_block_unit_discard(sbi)) { @@ -4410,6 +4430,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) break; seg_info_from_raw_sit(se, &sit); + if (se->type >= NR_PERSISTENT_LOG) { + f2fs_err(sbi, "Invalid segment type: %u, segno: %u", + se->type, start); + err = -EFSCORRUPTED; + break; + } + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (f2fs_block_unit_discard(sbi)) { @@ -4483,7 +4510,6 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno = 0, offset = 0, secno; block_t valid_blocks, usable_blks_in_seg; - block_t blks_per_sec = BLKS_PER_SEC(sbi); while (1) { /* find dirty segment based on free segmap */ @@ -4512,7 +4538,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); - if (!valid_blocks || valid_blocks == blks_per_sec) + if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; if (IS_CURSEC(sbi, secno)) continue; @@ -4895,7 +4921,7 @@ static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno, static inline unsigned int f2fs_usable_zone_segs_in_sec( struct f2fs_sb_info *sbi, unsigned int segno) { - unsigned int dev_idx, zone_idx, unusable_segs_in_sec; + unsigned int dev_idx, zone_idx; dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno)); zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx); @@ -4904,18 +4930,12 @@ static inline unsigned int f2fs_usable_zone_segs_in_sec( if (is_conv_zone(sbi, zone_idx, dev_idx)) return sbi->segs_per_sec; - /* - * If the zone_capacity_blocks array is NULL, then zone capacity - * is equal to the zone size for all zones - */ - if (!FDEV(dev_idx).zone_capacity_blocks) + if (!sbi->unusable_blocks_per_sec) return sbi->segs_per_sec; /* Get the segment count beyond zone capacity block */ - unusable_segs_in_sec = (sbi->blocks_per_blkz - - FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >> - sbi->log_blocks_per_seg; - return sbi->segs_per_sec - unusable_segs_in_sec; + return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >> + sbi->log_blocks_per_seg); } /* @@ -4944,12 +4964,11 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( if (is_conv_zone(sbi, zone_idx, dev_idx)) return sbi->blocks_per_seg; - if (!FDEV(dev_idx).zone_capacity_blocks) + if (!sbi->unusable_blocks_per_sec) return sbi->blocks_per_seg; sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); - sec_cap_blkaddr = sec_start_blkaddr + - FDEV(dev_idx).zone_capacity_blocks[zone_idx]; + sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); /* * If segment starts before zone capacity and spans beyond diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 3f277dfcb131..d1d63766f2c7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -101,6 +101,9 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#define CAP_BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ + (sbi)->unusable_blocks_per_sec) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec) #define GET_SEG_FROM_SEC(sbi, secno) \ @@ -609,10 +612,10 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); - unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi); - unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi); - unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi); - unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi); + unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); + unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int free, need_lower, need_upper; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bce02306f7a0..2451623c05a7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/fs_context.h> #include <linux/sched/mm.h> #include <linux/statfs.h> #include <linux/buffer_head.h> @@ -159,6 +160,7 @@ enum { Opt_gc_merge, Opt_nogc_merge, Opt_discard_unit, + Opt_memory_mode, Opt_err, }; @@ -235,6 +237,7 @@ static match_table_t f2fs_tokens = { {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, {Opt_discard_unit, "discard_unit=%s"}, + {Opt_memory_mode, "memory=%s"}, {Opt_err, NULL}, }; @@ -492,9 +495,19 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, bool is_remount) { struct f2fs_sb_info *sbi = F2FS_SB(sb); -#ifdef CONFIG_FS_ENCRYPTION + struct fs_parameter param = { + .type = fs_value_is_string, + .string = arg->from ? arg->from : "", + }; + struct fscrypt_dummy_policy *policy = + &F2FS_OPTION(sbi).dummy_enc_policy; int err; + if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { + f2fs_warn(sbi, "test_dummy_encryption option not supported"); + return -EINVAL; + } + if (!f2fs_sb_has_encrypt(sbi)) { f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; @@ -506,12 +519,12 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, * needed to allow it to be set or changed during remount. We do allow * it to be specified during remount, but only if there is no change. */ - if (is_remount && !F2FS_OPTION(sbi).dummy_enc_policy.policy) { + if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); return -EINVAL; } - err = fscrypt_set_test_dummy_encryption( - sb, arg->from, &F2FS_OPTION(sbi).dummy_enc_policy); + + err = fscrypt_parse_test_dummy_encryption(¶m, policy); if (err) { if (err == -EEXIST) f2fs_warn(sbi, @@ -524,12 +537,14 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, opt, err); return -EINVAL; } + err = fscrypt_add_test_dummy_key(sb, policy); + if (err) { + f2fs_warn(sbi, "Error adding test dummy encryption key [%d]", + err); + return err; + } f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; -#else - f2fs_warn(sbi, "test_dummy_encryption option not supported"); - return -EINVAL; -#endif } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -1222,6 +1237,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_memory_mode: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "normal")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_NORMAL; + } else if (!strcmp(name, "low")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_LOW; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1380,8 +1411,7 @@ static int f2fs_drop_inode(struct inode *inode) atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); /* should remain fi->extent_tree for writepage */ f2fs_destroy_extent_node(inode); @@ -1491,7 +1521,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); - kfree(FDEV(i).zone_capacity_blocks); #endif } kvfree(sbi->devs); @@ -1993,6 +2022,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) seq_printf(seq, ",discard_unit=%s", "section"); + if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_NORMAL) + seq_printf(seq, ",memory=%s", "normal"); + else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW) + seq_printf(seq, ",memory=%s", "low"); + return 0; } @@ -2014,6 +2048,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; + F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; sbi->sb->s_flags &= ~SB_INLINECRYPT; @@ -3579,6 +3614,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; spin_lock_init(&sbi->gc_urgent_high_lock); + atomic64_set(&sbi->current_atomic_write, 0); sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; @@ -3636,24 +3672,29 @@ err_valid_block: #ifdef CONFIG_BLK_DEV_ZONED struct f2fs_report_zones_args { + struct f2fs_sb_info *sbi; struct f2fs_dev_info *dev; - bool zone_cap_mismatch; }; static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct f2fs_report_zones_args *rz_args = data; + block_t unusable_blocks = (zone->len - zone->capacity) >> + F2FS_LOG_SECTORS_PER_BLOCK; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return 0; set_bit(idx, rz_args->dev->blkz_seq); - rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >> - F2FS_LOG_SECTORS_PER_BLOCK; - if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch) - rz_args->zone_cap_mismatch = true; - + if (!rz_args->sbi->unusable_blocks_per_sec) { + rz_args->sbi->unusable_blocks_per_sec = unusable_blocks; + return 0; + } + if (rz_args->sbi->unusable_blocks_per_sec != unusable_blocks) { + f2fs_err(rz_args->sbi, "F2FS supports single zone capacity\n"); + return -EINVAL; + } return 0; } @@ -3694,26 +3735,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (!FDEV(devi).blkz_seq) return -ENOMEM; - /* Get block zones type and zone-capacity */ - FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi, - FDEV(devi).nr_blkz * sizeof(block_t), - GFP_KERNEL); - if (!FDEV(devi).zone_capacity_blocks) - return -ENOMEM; - + rep_zone_arg.sbi = sbi; rep_zone_arg.dev = &FDEV(devi); - rep_zone_arg.zone_cap_mismatch = false; ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb, &rep_zone_arg); if (ret < 0) return ret; - - if (!rep_zone_arg.zone_cap_mismatch) { - kfree(FDEV(devi).zone_capacity_blocks); - FDEV(devi).zone_capacity_blocks = NULL; - } - return 0; } #endif diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4c50aedd5144..eba5fb1629d7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -339,6 +339,21 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, sbi->gc_reclaimed_segs[sbi->gc_segment_mode]); } + if (!strcmp(a->attr.name, "current_atomic_write")) { + s64 current_write = atomic64_read(&sbi->current_atomic_write); + + return sysfs_emit(buf, "%lld\n", current_write); + } + + if (!strcmp(a->attr.name, "peak_atomic_write")) + return sysfs_emit(buf, "%lld\n", sbi->peak_atomic_write); + + if (!strcmp(a->attr.name, "committed_atomic_block")) + return sysfs_emit(buf, "%llu\n", sbi->committed_atomic_block); + + if (!strcmp(a->attr.name, "revoked_atomic_block")) + return sysfs_emit(buf, "%llu\n", sbi->revoked_atomic_block); + ui = (unsigned int *)(ptr + a->offset); return sprintf(buf, "%u\n", *ui); @@ -608,6 +623,27 @@ out: return count; } + if (!strcmp(a->attr.name, "peak_atomic_write")) { + if (t != 0) + return -EINVAL; + sbi->peak_atomic_write = 0; + return count; + } + + if (!strcmp(a->attr.name, "committed_atomic_block")) { + if (t != 0) + return -EINVAL; + sbi->committed_atomic_block = 0; + return count; + } + + if (!strcmp(a->attr.name, "revoked_atomic_block")) { + if (t != 0) + return -EINVAL; + sbi->revoked_atomic_block = 0; + return count; + } + *ui = (unsigned int)t; return count; @@ -713,6 +749,11 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .offset = _offset \ } +#define F2FS_RO_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0444, \ + f2fs_sbi_show, NULL, \ + offsetof(struct struct_name, elname)) + #define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ F2FS_ATTR_OFFSET(struct_type, name, 0644, \ f2fs_sbi_show, f2fs_sbi_store, \ @@ -811,6 +852,8 @@ F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED F2FS_FEATURE_RO_ATTR(block_zoned); +F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, unusable_blocks_per_sec, + unusable_blocks_per_sec); #endif F2FS_FEATURE_RO_ATTR(atomic_write); F2FS_FEATURE_RO_ATTR(extra_attr); @@ -848,6 +891,12 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole); +/* For atomic write */ +F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, current_atomic_write, current_atomic_write); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); + #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), @@ -919,6 +968,9 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), #endif +#ifdef CONFIG_BLK_DEV_ZONED + ATTR_LIST(unusable_blocks_per_sec), +#endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), ATTR_LIST(compr_saved_block), @@ -934,6 +986,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_reclaimed_segments), ATTR_LIST(max_fragment_chunk), ATTR_LIST(max_fragment_hole), + ATTR_LIST(current_atomic_write), + ATTR_LIST(peak_atomic_write), + ATTR_LIST(committed_atomic_block), + ATTR_LIST(revoked_atomic_block), NULL, }; ATTRIBUTE_GROUPS(f2fs); |