From 0e3c3b901c00364198d31482fa2552ccf2d5c899 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2022 18:42:59 -0400 Subject: No need of likely/unlikely on calls of check_copy_size() it's inline and unlikely() inside of it (including the implicit one in WARN_ON_ONCE()) suffice to convince the compiler that getting false from check_copy_size() is unlikely. Spotted-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- arch/powerpc/include/asm/uaccess.h | 2 +- arch/s390/include/asm/uaccess.h | 4 ++-- include/linux/uaccess.h | 4 ++-- include/linux/uio.h | 15 ++++++--------- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9b82b38ff867..105f200b1e31 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -348,7 +348,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned long size) static inline unsigned long __must_check copy_mc_to_user(void __user *to, const void *from, unsigned long n) { - if (likely(check_copy_size(from, n, true))) { + if (check_copy_size(from, n, true)) { if (access_ok(to, n)) { allow_write_to_user(to, n); n = copy_mc_generic((void *)to, from, n); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f4511e21d646..c2c9995466e0 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -39,7 +39,7 @@ _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned static __always_inline unsigned long __must_check copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) { - if (likely(check_copy_size(to, n, false))) + if (check_copy_size(to, n, false)) n = _copy_from_user_key(to, from, n, key); return n; } @@ -50,7 +50,7 @@ _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned l static __always_inline unsigned long __must_check copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) { - if (likely(check_copy_size(from, n, true))) + if (check_copy_size(from, n, true)) n = _copy_to_user_key(to, from, n, key); return n; } diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a328cf02b75..47e5d374c7eb 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -148,7 +148,7 @@ _copy_to_user(void __user *, const void *, unsigned long); static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (likely(check_copy_size(to, n, false))) + if (check_copy_size(to, n, false)) n = _copy_from_user(to, from, n); return n; } @@ -156,7 +156,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (likely(check_copy_size(from, n, true))) + if (check_copy_size(from, n, true)) n = _copy_to_user(to, from, n); return n; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..76d305f3d4c2 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -156,19 +156,17 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else + if (check_copy_size(addr, bytes, true)) return _copy_to_iter(addr, bytes, i); + return 0; } static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter(addr, bytes, i); + return 0; } static __always_inline __must_check @@ -184,10 +182,9 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter_nocache(addr, bytes, i); + return 0; } static __always_inline __must_check -- cgit v1.2.3 From 36518b6b4da7e8d4387bc19ad21e772f1060e9d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2022 16:04:03 -0400 Subject: teach iomap_dio_rw() to suppress dsync New flag, equivalent to removal of IOCB_DSYNC from iocb flags. This mimics what btrfs is doing (and that's what btrfs will switch to). However, I'm not at all sure that we want to suppress REQ_FUA for those - all btrfs hack really cares about is suppression of generic_write_sync(). For now let's keep the existing behaviour, but I really want to hear more detailed arguments pro or contra. [folded brain fix from willy] Suggested-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- fs/iomap/direct-io.c | 20 +++++++++++--------- include/linux/iomap.h | 6 ++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 370c3241618a..c10c69e2de24 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -548,17 +548,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } /* for data sync or sync, we need sync completion processing */ - if (iocb->ki_flags & IOCB_DSYNC) + if (iocb->ki_flags & IOCB_DSYNC && + !(dio_flags & IOMAP_DIO_NOSYNC)) { dio->flags |= IOMAP_DIO_NEED_SYNC; - /* - * For datasync only writes, we optimistically try using FUA for - * this IO. Any non-FUA write that occurs will clear this flag, - * hence we know before completion whether a cache flush is - * necessary. - */ - if ((iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) == IOCB_DSYNC) - dio->flags |= IOMAP_DIO_WRITE_FUA; + /* + * For datasync only writes, we optimistically try + * using FUA for this IO. Any non-FUA write that + * occurs will clear this flag, hence we know before + * completion whether a cache flush is necessary. + */ + if (!(iocb->ki_flags & IOCB_SYNC)) + dio->flags |= IOMAP_DIO_WRITE_FUA; + } } if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..c8622d8f064e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -353,6 +353,12 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_PARTIAL (1 << 2) +/* + * The caller will sync the write if needed; do not sync it within + * iomap_dio_rw. Overrides IOMAP_DIO_FORCE_WAIT. + */ +#define IOMAP_DIO_NOSYNC (1 << 3) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); -- cgit v1.2.3 From eacdf4eaca632438c8453294727b94d7c5745c62 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2022 16:10:01 -0400 Subject: btrfs: use IOMAP_DIO_NOSYNC ... instead of messing with iocb flags Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- fs/btrfs/file.c | 17 ----------------- fs/btrfs/inode.c | 3 ++- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fd827b99c1b..98f81e304eb1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1848,7 +1848,6 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { - const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC); struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -1901,15 +1900,6 @@ relock: goto buffered; } - /* - * We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw() - * calls generic_write_sync() (through iomap_dio_complete()), because - * that results in calling fsync (btrfs_sync_file()) which will try to - * lock the inode in exclusive/write mode. - */ - if (is_sync_write) - iocb->ki_flags &= ~IOCB_DSYNC; - /* * The iov_iter can be mapped to the same file range we are writing to. * If that's the case, then we will deadlock in the iomap code, because @@ -1964,13 +1954,6 @@ again: btrfs_inode_unlock(inode, ilock_flags); - /* - * Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do - * the fsync (call generic_write_sync()). - */ - if (is_sync_write) - iocb->ki_flags |= IOCB_DSYNC; - /* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */ if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from)) goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 81737eff92f3..fbf0aee7d66a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8152,7 +8152,8 @@ ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_befo struct btrfs_dio_data data; return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - IOMAP_DIO_PARTIAL, &data, done_before); + IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC, + &data, done_before); } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From e87f2c26c8085dac59978dee1beeb05cef31a9dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 09:28:12 -0400 Subject: struct file: use anonymous union member for rcuhead and llist Once upon a time we couldn't afford anon unions; these days minimal gcc version had been raised enough to take care of that. Reviewed-by: Jan Kara Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- fs/file_table.c | 16 ++++++++-------- include/linux/fs.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 5424e3a8df5f..b989e33aacda 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -45,7 +45,7 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp; static void file_free_rcu(struct rcu_head *head) { - struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + struct file *f = container_of(head, struct file, f_rcuhead); put_cred(f->f_cred); kmem_cache_free(filp_cachep, f); @@ -56,7 +56,7 @@ static inline void file_free(struct file *f) security_file_free(f); if (!(f->f_mode & FMODE_NOACCOUNT)) percpu_counter_dec(&nr_files); - call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); + call_rcu(&f->f_rcuhead, file_free_rcu); } /* @@ -142,7 +142,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred) f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { - file_free_rcu(&f->f_u.fu_rcuhead); + file_free_rcu(&f->f_rcuhead); return ERR_PTR(error); } @@ -341,13 +341,13 @@ static void delayed_fput(struct work_struct *unused) struct llist_node *node = llist_del_all(&delayed_fput_list); struct file *f, *t; - llist_for_each_entry_safe(f, t, node, f_u.fu_llist) + llist_for_each_entry_safe(f, t, node, f_llist) __fput(f); } static void ____fput(struct callback_head *work) { - __fput(container_of(work, struct file, f_u.fu_rcuhead)); + __fput(container_of(work, struct file, f_rcuhead)); } /* @@ -374,8 +374,8 @@ void fput(struct file *file) struct task_struct *task = current; if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { - init_task_work(&file->f_u.fu_rcuhead, ____fput); - if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME)) + init_task_work(&file->f_rcuhead, ____fput); + if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME)) return; /* * After this task has run exit_task_work(), @@ -384,7 +384,7 @@ void fput(struct file *file) */ } - if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) + if (llist_add(&file->f_llist, &delayed_fput_list)) schedule_delayed_work(&delayed_fput_work, 1); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..6a2a4906041f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -924,9 +924,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) struct file { union { - struct llist_node fu_llist; - struct rcu_head fu_rcuhead; - } f_u; + struct llist_node f_llist; + struct rcu_head f_rcuhead; + }; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; -- cgit v1.2.3 From 91b94c5d6ae55d1161633047ffeea644b110b35f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 09:39:27 -0400 Subject: iocb: delay evaluation of IS_SYNC(...) until we want to check IOCB_DSYNC New helper to be used instead of direct checks for IOCB_DSYNC: iocb_is_dsync(iocb). Checks converted, which allows to avoid the IS_SYNC(iocb->ki_filp->f_mapping->host) part (4 cache lines) from iocb_flags() - it's checked in iocb_is_dsync() instead Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- block/fops.c | 2 +- fs/btrfs/file.c | 2 +- fs/direct-io.c | 2 +- fs/fuse/file.c | 2 +- fs/iomap/direct-io.c | 3 +-- fs/zonefs/super.c | 2 +- include/linux/fs.h | 10 ++++++++-- 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/block/fops.c b/block/fops.c index d6b3276a6c68..6e86931ab847 100644 --- a/block/fops.c +++ b/block/fops.c @@ -37,7 +37,7 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; /* avoid the need for a I/O completion work item */ - if (iocb->ki_flags & IOCB_DSYNC) + if (iocb_is_dsync(iocb)) op |= REQ_FUA; return op; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 98f81e304eb1..54358a5c9d56 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2021,7 +2021,7 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, struct file *file = iocb->ki_filp; struct btrfs_inode *inode = BTRFS_I(file_inode(file)); ssize_t num_written, num_sync; - const bool sync = iocb->ki_flags & IOCB_DSYNC; + const bool sync = iocb_is_dsync(iocb); /* * If the fs flips readonly due to some impossible error, although we diff --git a/fs/direct-io.c b/fs/direct-io.c index 840752006f60..39647eb56904 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1210,7 +1210,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, */ if (dio->is_async && iov_iter_rw(iter) == WRITE) { retval = 0; - if (iocb->ki_flags & IOCB_DSYNC) + if (iocb_is_dsync(iocb)) retval = dio_set_defer_completion(dio); else if (!dio->inode->i_sb->s_dio_done_wq) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 05caa2b9272e..00fa861aeead 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1042,7 +1042,7 @@ static unsigned int fuse_write_flags(struct kiocb *iocb) { unsigned int flags = iocb->ki_filp->f_flags; - if (iocb->ki_flags & IOCB_DSYNC) + if (iocb_is_dsync(iocb)) flags |= O_DSYNC; if (iocb->ki_flags & IOCB_SYNC) flags |= O_SYNC; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index c10c69e2de24..31c7f1035b20 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -548,8 +548,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } /* for data sync or sync, we need sync completion processing */ - if (iocb->ki_flags & IOCB_DSYNC && - !(dio_flags & IOMAP_DIO_NOSYNC)) { + if (iocb_is_dsync(iocb) && !(dio_flags & IOMAP_DIO_NOSYNC)) { dio->flags |= IOMAP_DIO_NEED_SYNC; /* diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index bcb21aea990a..04a98b4cd7ee 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -746,7 +746,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); bio->bi_iter.bi_sector = zi->i_zsector; bio->bi_ioprio = iocb->ki_ioprio; - if (iocb->ki_flags & IOCB_DSYNC) + if (iocb_is_dsync(iocb)) bio->bi_opf |= REQ_FUA; ret = bio_iov_iter_get_pages(bio, from); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a2a4906041f..380a1292f4f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2720,6 +2720,12 @@ extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); +static inline bool iocb_is_dsync(const struct kiocb *iocb) +{ + return (iocb->ki_flags & IOCB_DSYNC) || + IS_SYNC(iocb->ki_filp->f_mapping->host); +} + /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount @@ -2727,7 +2733,7 @@ extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { - if (iocb->ki_flags & IOCB_DSYNC) { + if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); @@ -3262,7 +3268,7 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; - if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; -- cgit v1.2.3 From 164f4064ca81eefcea29f7f5dcf394f92be1d0c0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 11:38:11 -0400 Subject: keep iocb_flags() result cached in struct file * calculate at the time we set FMODE_OPENED (do_dentry_open() for normal opens, alloc_file() for pipe()/socket()/etc.) * update when handling F_SETFL * keep in a new field - file->f_iocb_flags; since that thing is needed only before the refcount reaches zero, we can put it into the same anon union where ->f_rcuhead and ->f_llist live - those are used only after refcount reaches zero. Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- drivers/nvme/target/io-cmd-file.c | 2 +- fs/aio.c | 2 +- fs/fcntl.c | 1 + fs/file_table.c | 1 + fs/io_uring.c | 2 +- fs/open.c | 1 + include/linux/fs.h | 5 ++--- 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index f3d58abf11e0..64b47e2a4633 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -112,7 +112,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, iocb->ki_pos = pos; iocb->ki_filp = req->ns->file; - iocb->ki_flags = ki_flags | iocb_flags(req->ns->file); + iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags; return call_iter(iocb, &iter); } diff --git a/fs/aio.c b/fs/aio.c index 3c249b938632..2bdd444d408b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1475,7 +1475,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = iocb_flags(req->ki_filp); + req->ki_flags = req->ki_filp->f_iocb_flags; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/fs/fcntl.c b/fs/fcntl.c index 34a3faa4886d..146c9ab0cd4b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -78,6 +78,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) } spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); + filp->f_iocb_flags = iocb_flags(filp); spin_unlock(&filp->f_lock); out: diff --git a/fs/file_table.c b/fs/file_table.c index b989e33aacda..905792b0521c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -241,6 +241,7 @@ static struct file *alloc_file(const struct path *path, int flags, if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) file->f_mode |= FMODE_CAN_WRITE; + file->f_iocb_flags = iocb_flags(file); file->f_mode |= FMODE_OPENED; file->f_op = fop; if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3aab4182fd89..53424b1f019f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4330,7 +4330,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) if (!io_req_ffs_set(req)) req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; - kiocb->ki_flags = iocb_flags(file); + kiocb->ki_flags = file->f_iocb_flags; ret = kiocb_set_rw_flags(kiocb, req->rw.flags); if (unlikely(ret)) return ret; diff --git a/fs/open.c b/fs/open.c index 1d57fbde2feb..d80441a0bf17 100644 --- a/fs/open.c +++ b/fs/open.c @@ -862,6 +862,7 @@ static int do_dentry_open(struct file *f, f->f_mode |= FMODE_CAN_ODIRECT; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + f->f_iocb_flags = iocb_flags(f); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); diff --git a/include/linux/fs.h b/include/linux/fs.h index 380a1292f4f9..c82b9d442f56 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -926,6 +926,7 @@ struct file { union { struct llist_node f_llist; struct rcu_head f_rcuhead; + unsigned int f_iocb_flags; }; struct path f_path; struct inode *f_inode; /* cached value */ @@ -2199,13 +2200,11 @@ static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, !gid_valid(i_gid_into_mnt(mnt_userns, inode)); } -static inline int iocb_flags(struct file *file); - static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, - .ki_flags = iocb_flags(filp), + .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } -- cgit v1.2.3 From 59bb69c67cf1475a04cd5629d9c4f6dbbcba5e4a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 May 2022 19:07:11 -0400 Subject: copy_page_{to,from}_iter(): switch iovec variants to generic we can do copyin/copyout under kmap_local_page(); it shouldn't overflow the kmap stack - the maximal footprint increase only by one here. Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- lib/iov_iter.c | 191 ++------------------------------------------------------- 1 file changed, 4 insertions(+), 187 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6dd5330f7a99..4c658a25e29c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -168,174 +168,6 @@ static int copyin(void *to, const void __user *from, size_t n) return n; } -static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *from; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { - kaddr = kmap_atomic(page); - from = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = from - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - from = kaddr + offset; - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; -} - -static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *to; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { - kaddr = kmap_atomic(page); - to = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = to - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - to = kaddr + offset; - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; -} - #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { @@ -848,24 +680,14 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (likely(iter_is_iovec(i))) - return copy_page_to_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { + if (unlikely(iov_iter_is_pipe(i))) { + return copy_page_to_iter_pipe(page, offset, bytes, i); + } else { void *kaddr = kmap_local_page(page); size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); kunmap_local(kaddr); return wanted; } - if (iov_iter_is_pipe(i)) - return copy_page_to_iter_pipe(page, offset, bytes, i); - if (unlikely(iov_iter_is_discard(i))) { - if (unlikely(i->count < bytes)) - bytes = i->count; - i->count -= bytes; - return bytes; - } - WARN_ON(1); - return 0; } size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, @@ -896,17 +718,12 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (unlikely(!page_copy_sane(page, offset, bytes))) - return 0; - if (likely(iter_is_iovec(i))) - return copy_page_from_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { + if (page_copy_sane(page, offset, bytes)) { void *kaddr = kmap_local_page(page); size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); kunmap_local(kaddr); return wanted; } - WARN_ON(1); return 0; } EXPORT_SYMBOL(copy_page_from_iter); -- cgit v1.2.3 From 18fa9af7263164ec9a8d7b28a848324825f14672 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2022 23:44:33 -0400 Subject: iov_iter_bvec_advance(): don't bother with bvec_iter do what we do for iovec/kvec; that ends up generating better code, AFAICS. Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- lib/iov_iter.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4c658a25e29c..c51314639615 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -846,17 +846,22 @@ static void pipe_advance(struct iov_iter *i, size_t size) static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { - struct bvec_iter bi; + const struct bio_vec *bvec, *end; - bi.bi_size = i->count; - bi.bi_bvec_done = i->iov_offset; - bi.bi_idx = 0; - bvec_iter_advance(i->bvec, &bi, size); + if (!i->count) + return; + i->count -= size; + + size += i->iov_offset; - i->bvec += bi.bi_idx; - i->nr_segs -= bi.bi_idx; - i->count = bi.bi_size; - i->iov_offset = bi.bi_bvec_done; + for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { + if (likely(size < bvec->bv_len)) + break; + size -= bvec->bv_len; + } + i->iov_offset = size; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; } static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) -- cgit v1.2.3 From 7392ed1734c319150b5ddec3f192a6405728e8d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jun 2022 16:44:21 -0400 Subject: iov_iter_get_pages{,_alloc}(): cap the maxsize with MAX_RW_COUNT All callers can and should handle iov_iter_get_pages() returning fewer pages than requested. All in-kernel ones do. And it makes the arithmetical overflow analysis much simpler... Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c51314639615..225b968ed8c5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1348,6 +1348,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, maxsize = i->count; if (!maxsize) return 0; + if (maxsize > MAX_RW_COUNT) + maxsize = MAX_RW_COUNT; if (likely(iter_is_iovec(i))) { unsigned int gup_flags = 0; @@ -1474,6 +1476,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, maxsize = i->count; if (!maxsize) return 0; + if (maxsize > MAX_RW_COUNT) + maxsize = MAX_RW_COUNT; if (likely(iter_is_iovec(i))) { unsigned int gup_flags = 0; -- cgit v1.2.3 From 599a0bdd72f0a7ed5f55faef0ecdcd36cb1bc287 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 20:53:17 -0400 Subject: iov_iter: lift dealing with maxpages out of first_{iovec,bvec}_segment() caller can do that just as easily Signed-off-by: Al Viro --- lib/iov_iter.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 225b968ed8c5..1b5e96ddddf3 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1295,7 +1295,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, /* must be done on non-empty ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size, size_t *start, - size_t maxsize, unsigned maxpages) + size_t maxsize) { size_t skip; long k; @@ -1309,8 +1309,6 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, if (len > maxsize) len = maxsize; len += (*start = addr % PAGE_SIZE); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; *size = len; return addr & PAGE_MASK; } @@ -1320,7 +1318,7 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, /* must be done on non-empty ITER_BVEC one */ static struct page *first_bvec_segment(const struct iov_iter *i, size_t *size, size_t *start, - size_t maxsize, unsigned maxpages) + size_t maxsize) { struct page *page; size_t skip = i->iov_offset, len; @@ -1331,8 +1329,6 @@ static struct page *first_bvec_segment(const struct iov_iter *i, skip += i->bvec->bv_offset; page = i->bvec->bv_page + skip / PAGE_SIZE; len += (*start = skip % PAGE_SIZE); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; *size = len; return page; } @@ -1360,7 +1356,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, maxpages); + addr = first_iovec_segment(i, &len, start, maxsize); + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; n = DIV_ROUND_UP(len, PAGE_SIZE); res = get_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) @@ -1370,7 +1368,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize, maxpages); + page = first_bvec_segment(i, &len, start, maxsize); + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; n = DIV_ROUND_UP(len, PAGE_SIZE); while (n--) get_page(*pages++ = page++); @@ -1488,7 +1488,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize, ~0U); + addr = first_iovec_segment(i, &len, start, maxsize); n = DIV_ROUND_UP(len, PAGE_SIZE); p = get_pages_array(n); if (!p) @@ -1505,7 +1505,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize, ~0U); + page = first_bvec_segment(i, &len, start, maxsize); n = DIV_ROUND_UP(len, PAGE_SIZE); *pages = p = get_pages_array(n); if (!p) -- cgit v1.2.3 From dda8e5d17c170415a3c10f68365f3a2800a6e68f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Jun 2022 15:55:19 -0400 Subject: iov_iter: first_{iovec,bvec}_segment() - simplify a bit We return length + offset in page via *size. Don't bother - the caller can do that arithmetics just as well; just report the length to it. Signed-off-by: Al Viro --- lib/iov_iter.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1b5e96ddddf3..45dccecae946 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1308,7 +1308,7 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, continue; if (len > maxsize) len = maxsize; - len += (*start = addr % PAGE_SIZE); + *start = addr % PAGE_SIZE; *size = len; return addr & PAGE_MASK; } @@ -1328,7 +1328,7 @@ static struct page *first_bvec_segment(const struct iov_iter *i, len = maxsize; skip += i->bvec->bv_offset; page = i->bvec->bv_page + skip / PAGE_SIZE; - len += (*start = skip % PAGE_SIZE); + *start = skip % PAGE_SIZE; *size = len; return page; } @@ -1357,24 +1357,24 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, gup_flags |= FOLL_NOFAULT; addr = first_iovec_segment(i, &len, start, maxsize); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; - n = DIV_ROUND_UP(len, PAGE_SIZE); + n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + if (n > maxpages) + n = maxpages; res = get_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) return res; - return (res == n ? len : res * PAGE_SIZE) - *start; + return min_t(size_t, len, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; page = first_bvec_segment(i, &len, start, maxsize); - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; - n = DIV_ROUND_UP(len, PAGE_SIZE); - while (n--) + n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + if (n > maxpages) + n = maxpages; + for (int k = 0; k < n; k++) get_page(*pages++ = page++); - return len - *start; + return min_t(size_t, len, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1489,7 +1489,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, gup_flags |= FOLL_NOFAULT; addr = first_iovec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len, PAGE_SIZE); + n = DIV_ROUND_UP(len + *start, PAGE_SIZE); p = get_pages_array(n); if (!p) return -ENOMEM; @@ -1500,19 +1500,19 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return res; } *pages = p; - return (res == n ? len : res * PAGE_SIZE) - *start; + return min_t(size_t, len, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; page = first_bvec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len, PAGE_SIZE); + n = DIV_ROUND_UP(len + *start, PAGE_SIZE); *pages = p = get_pages_array(n); if (!p) return -ENOMEM; - while (n--) + for (int k = 0; k < n; k++) get_page(*p++ = page++); - return len - *start; + return min_t(size_t, len, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages_alloc(i, pages, maxsize, start); -- cgit v1.2.3 From 59dbd7d0904a887ede1538b55bb8095ff2ce5078 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Jun 2022 16:10:37 -0400 Subject: iov_iter: massage calling conventions for first_{iovec,bvec}_segment() Pass maxsize by reference, return length via the same. Signed-off-by: Al Viro --- lib/iov_iter.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 45dccecae946..d93c6a1ffe26 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1294,8 +1294,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, /* must be done on non-empty ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, - size_t *size, size_t *start, - size_t maxsize) + size_t *size, size_t *start) { size_t skip; long k; @@ -1306,10 +1305,9 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, if (unlikely(!len)) continue; - if (len > maxsize) - len = maxsize; + if (*size > len) + *size = len; *start = addr % PAGE_SIZE; - *size = len; return addr & PAGE_MASK; } BUG(); // if it had been empty, we wouldn't get called @@ -1317,19 +1315,17 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, /* must be done on non-empty ITER_BVEC one */ static struct page *first_bvec_segment(const struct iov_iter *i, - size_t *size, size_t *start, - size_t maxsize) + size_t *size, size_t *start) { struct page *page; size_t skip = i->iov_offset, len; len = i->bvec->bv_len - skip; - if (len > maxsize) - len = maxsize; + if (*size > len) + *size = len; skip += i->bvec->bv_offset; page = i->bvec->bv_page + skip / PAGE_SIZE; *start = skip % PAGE_SIZE; - *size = len; return page; } @@ -1337,7 +1333,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - size_t len; int n, res; if (maxsize > i->count) @@ -1356,25 +1351,25 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + addr = first_iovec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; res = get_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) return res; - return min_t(size_t, len, res * PAGE_SIZE - *start); + return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + page = first_bvec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; for (int k = 0; k < n; k++) get_page(*pages++ = page++); - return min_t(size_t, len, n * PAGE_SIZE - *start); + return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1469,7 +1464,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - size_t len; int n, res; if (maxsize > i->count) @@ -1488,8 +1482,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + addr = first_iovec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); p = get_pages_array(n); if (!p) return -ENOMEM; @@ -1500,19 +1494,19 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return res; } *pages = p; - return min_t(size_t, len, res * PAGE_SIZE - *start); + return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { struct page *page; - page = first_bvec_segment(i, &len, start, maxsize); - n = DIV_ROUND_UP(len + *start, PAGE_SIZE); + page = first_bvec_segment(i, &maxsize, start); + n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); *pages = p = get_pages_array(n); if (!p) return -ENOMEM; for (int k = 0; k < n; k++) get_page(*p++ = page++); - return min_t(size_t, len, n * PAGE_SIZE - *start); + return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) return pipe_get_pages_alloc(i, pages, maxsize, start); -- cgit v1.2.3 From dd45ab9dd28c82fc495d98cd9788666fd8d76b99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 16:07:49 -0400 Subject: first_iovec_segment(): just return address ... and calculate the offset in the caller Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index d93c6a1ffe26..a4a44065cd37 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1293,22 +1293,19 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, } /* must be done on non-empty ITER_IOVEC one */ -static unsigned long first_iovec_segment(const struct iov_iter *i, - size_t *size, size_t *start) +static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k; for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { - unsigned long addr = (unsigned long)i->iov[k].iov_base + skip; size_t len = i->iov[k].iov_len - skip; if (unlikely(!len)) continue; if (*size > len) *size = len; - *start = addr % PAGE_SIZE; - return addr & PAGE_MASK; + return (unsigned long)i->iov[k].iov_base + skip; } BUG(); // if it had been empty, we wouldn't get called } @@ -1351,7 +1348,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &maxsize, start); + addr = first_iovec_segment(i, &maxsize); + *start = addr % PAGE_SIZE; + addr &= PAGE_MASK; n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; @@ -1482,7 +1481,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT; - addr = first_iovec_segment(i, &maxsize, start); + addr = first_iovec_segment(i, &maxsize); + *start = addr % PAGE_SIZE; + addr &= PAGE_MASK; n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); p = get_pages_array(n); if (!p) -- cgit v1.2.3