aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
authorJan Kara2024-07-01 12:50:48 +0200
committerAnna Schumaker2024-07-08 13:47:27 -0400
commit2f1f31042ef07719a0d5cb4784b8a32d20c13110 (patch)
tree7a7fe9889f6136d8a61c991b0f9c1ee4afa0841c /fs/nfs
parentf8a3955083f5cc5d62257c6e1103f89f566d3a87 (diff)
nfs: Block on write congestion
Commit 6df25e58532b ("nfs: remove reliance on bdi congestion") introduced NFS-private solution for limiting number of writes outstanding against a particular server. Unlike previous bdi congestion this algorithm actually works and limits number of outstanding writeback pages to nfs_congestion_kb which scales with amount of client's memory and is capped at 256 MB. As a result some workloads such as random buffered writes over NFS got slower (from ~170 MB/s to ~126 MB/s). The fio command to reproduce is: fio --direct=0 --ioengine=sync --thread --invalidate=1 --group_reporting=1 --runtime=300 --fallocate=posix --ramp_time=10 --new_group --rw=randwrite --size=64256m --numjobs=4 --bs=4k --fsync_on_close=1 --end_fsync=1 This happens because the client sends ~256 MB worth of dirty pages to the server and any further background writeback request is ignored until the number of writeback pages gets below the threshold of 192 MB. By the time this happens and clients decides to trigger another round of writeback, the server often has no pages to write and the disk is idle. To fix this problem and make the client react faster to eased congestion of the server by blocking waiting for congestion to resolve instead of aborting writeback. This improves the random 4k buffered write throughput to 184 MB/s. Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/write.c15
2 files changed, 12 insertions, 4 deletions
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3b252dceebf5..8286edd6062d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -994,6 +994,7 @@ struct nfs_server *nfs_alloc_server(void)
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ init_waitqueue_head(&server->write_congestion_wait);
atomic_long_set(&server->writeback, 0);
ida_init(&server->openowner_id);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4321cdc581bb..81845ab2e00a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -425,8 +425,10 @@ static void nfs_folio_end_writeback(struct folio *folio)
folio_end_writeback(folio);
if (atomic_long_dec_return(&nfss->writeback) <
- NFS_CONGESTION_OFF_THRESH)
+ NFS_CONGESTION_OFF_THRESH) {
nfss->write_congested = 0;
+ wake_up_all(&nfss->write_congestion_wait);
+ }
}
static void nfs_page_end_writeback(struct nfs_page *req)
@@ -700,12 +702,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = NULL;
unsigned int mntflags = NFS_SERVER(inode)->flags;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int priority = 0;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return 0;
+ /* Wait with writeback until write congestion eases */
+ if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) {
+ err = wait_event_killable(nfss->write_congestion_wait,
+ nfss->write_congested == 0);
+ if (err)
+ return err;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);