diff options
Diffstat (limited to 'fs')
163 files changed, 1878 insertions, 698 deletions
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index b573c3b9a328..8cf941c3b511 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -450,7 +450,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf); - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) pr_warn("Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; va_end(args); diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 675148950fed..2b2112475ec2 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -19,7 +19,7 @@ affs_count_free_blocks(struct super_block *sb) pr_debug("%s()\n", __func__); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; mutex_lock(&AFFS_SB(sb)->s_bmlock); diff --git a/fs/affs/super.c b/fs/affs/super.c index 7bf47a41cb4f..884bedab7266 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -80,7 +80,7 @@ void affs_mark_sb_dirty(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); unsigned long delay; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; spin_lock(&sbi->work_lock); @@ -464,7 +464,7 @@ got_root: * not recommended. */ if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS - || chksum == MUFS_DCOFS) && !(sb->s_flags & MS_RDONLY)) { + || chksum == MUFS_DCOFS) && !sb_rdonly(sb)) { pr_notice("Dircache FS - mounting %s read only\n", sb->s_id); sb->s_flags |= MS_RDONLY; } @@ -596,7 +596,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) memcpy(sbi->s_volume, volume, 32); spin_unlock(&sbi->symlink_lock); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) return 0; if (*flags & MS_RDONLY) @@ -1606,12 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; } - if ((req->common.ki_flags & IOCB_NOWAIT) && - !(req->common.ki_flags & IOCB_DIRECT)) { - ret = -EOPNOTSUPP; - goto out_put_req; - } - ret = put_user(KIOCB_KEY, &user_iocb->aio_key); if (unlikely(ret)) { pr_debug("EFAULT: aio_key\n"); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 24a58bf9ca72..4ac49d038bf3 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -56,19 +56,14 @@ static int autofs4_write(struct autofs_sb_info *sbi, struct file *file, const void *addr, int bytes) { unsigned long sigpipe, flags; - mm_segment_t fs; const char *data = (const char *)addr; ssize_t wr = 0; sigpipe = sigismember(¤t->pending.signal, SIGPIPE); - /* Save pointer to user space and point back to kernel space */ - fs = get_fs(); - set_fs(KERNEL_DS); - mutex_lock(&sbi->pipe_mutex); while (bytes) { - wr = __vfs_write(file, data, bytes, &file->f_pos); + wr = __kernel_write(file, data, bytes, &file->f_pos); if (wr <= 0) break; data += wr; @@ -76,8 +71,6 @@ static int autofs4_write(struct autofs_sb_info *sbi, } mutex_unlock(&sbi->pipe_mutex); - set_fs(fs); - /* Keep the currently executing process from receiving a * SIGPIPE unless it was already supposed to get one */ diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 4a4a5a366158..a92355cc453b 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -838,7 +838,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) befs_debug(sb, "---> %s", __func__); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { befs_warning(sb, "No write support. Marking filesystem read-only"); sb->s_flags |= MS_RDONLY; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 9be82c4e14a4..ce1824f47ba6 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -341,11 +341,12 @@ static int load_aout_library(struct file *file) unsigned long error; int retval; struct exec ex; + loff_t pos = 0; inode = file_inode(file); retval = -ENOEXEC; - error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); + error = kernel_read(file, &ex, sizeof(ex), &pos); if (error != sizeof(ex)) goto out; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index ec45d24875b1..73b01e474fdc 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -409,6 +409,7 @@ static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, { struct elf_phdr *elf_phdata = NULL; int retval, size, err = -1; + loff_t pos = elf_ex->e_phoff; /* * If the size of this structure has changed, then punt, since @@ -432,8 +433,7 @@ static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, goto out; /* Read in the program headers */ - retval = kernel_read(elf_file, elf_ex->e_phoff, - (char *)elf_phdata, size); + retval = kernel_read(elf_file, elf_phdata, size, &pos); if (retval != size) { err = (retval < 0) ? retval : -EIO; goto out; @@ -698,6 +698,7 @@ static int load_elf_binary(struct linux_binprm *bprm) struct elfhdr interp_elf_ex; } *loc; struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; + loff_t pos; loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { @@ -750,9 +751,9 @@ static int load_elf_binary(struct linux_binprm *bprm) if (!elf_interpreter) goto out_free_ph; - retval = kernel_read(bprm->file, elf_ppnt->p_offset, - elf_interpreter, - elf_ppnt->p_filesz); + pos = elf_ppnt->p_offset; + retval = kernel_read(bprm->file, elf_interpreter, + elf_ppnt->p_filesz, &pos); if (retval != elf_ppnt->p_filesz) { if (retval >= 0) retval = -EIO; @@ -776,9 +777,9 @@ static int load_elf_binary(struct linux_binprm *bprm) would_dump(bprm, interpreter); /* Get the exec headers */ - retval = kernel_read(interpreter, 0, - (void *)&loc->interp_elf_ex, - sizeof(loc->interp_elf_ex)); + pos = 0; + retval = kernel_read(interpreter, &loc->interp_elf_ex, + sizeof(loc->interp_elf_ex), &pos); if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; @@ -1175,9 +1176,10 @@ static int load_elf_library(struct file *file) unsigned long elf_bss, bss, len; int retval, error, i, j; struct elfhdr elf_ex; + loff_t pos = 0; error = -ENOEXEC; - retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex)); + retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos); if (retval != sizeof(elf_ex)) goto out; @@ -1201,7 +1203,8 @@ static int load_elf_library(struct file *file) eppnt = elf_phdata; error = -ENOEXEC; - retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j); + pos = elf_ex.e_phoff; + retval = kernel_read(file, eppnt, j, &pos); if (retval != j) goto out_free_ph; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 5aa9199dfb13..e70c039ac190 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -145,6 +145,7 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct elf32_phdr *phdr; unsigned long size; int retval, loop; + loff_t pos = params->hdr.e_phoff; if (params->hdr.e_phentsize != sizeof(struct elf_phdr)) return -ENOMEM; @@ -156,8 +157,7 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, if (!params->phdrs) return -ENOMEM; - retval = kernel_read(file, params->hdr.e_phoff, - (char *) params->phdrs, size); + retval = kernel_read(file, params->phdrs, size, &pos); if (unlikely(retval != size)) return retval < 0 ? retval : -ENOEXEC; @@ -199,6 +199,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) char *interpreter_name = NULL; int executable_stack; int retval, i; + loff_t pos; kdebug("____ LOAD %d ____", current->pid); @@ -246,10 +247,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) if (!interpreter_name) goto error; - retval = kernel_read(bprm->file, - phdr->p_offset, - interpreter_name, - phdr->p_filesz); + pos = phdr->p_offset; + retval = kernel_read(bprm->file, interpreter_name, + phdr->p_filesz, &pos); if (unlikely(retval != phdr->p_filesz)) { if (retval >= 0) retval = -ENOEXEC; @@ -277,8 +277,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) */ would_dump(bprm, interpreter); - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); + pos = 0; + retval = kernel_read(interpreter, bprm->buf, + BINPRM_BUF_SIZE, &pos); if (unlikely(retval != BINPRM_BUF_SIZE)) { if (retval >= 0) retval = -ENOEXEC; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index ce6537c50ec1..475d083f8088 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -176,19 +176,14 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start #define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ #define RESERVED 0xC0 /* bit 6,7: reserved */ -static int decompress_exec( - struct linux_binprm *bprm, - unsigned long offset, - char *dst, - long len, - int fd) +static int decompress_exec(struct linux_binprm *bprm, loff_t fpos, char *dst, + long len, int fd) { unsigned char *buf; z_stream strm; - loff_t fpos; int ret, retval; - pr_debug("decompress_exec(offset=%lx,buf=%p,len=%lx)\n", offset, dst, len); + pr_debug("decompress_exec(offset=%llx,buf=%p,len=%lx)\n", fpos, dst, len); memset(&strm, 0, sizeof(strm)); strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); @@ -202,13 +197,11 @@ static int decompress_exec( } /* Read in first chunk of data and parse gzip header. */ - fpos = offset; - ret = kernel_read(bprm->file, offset, buf, LBUFSIZE); + ret = kernel_read(bprm->file, buf, LBUFSIZE, &fpos); strm.next_in = buf; strm.avail_in = ret; strm.total_in = 0; - fpos += ret; retval = -ENOEXEC; @@ -274,7 +267,7 @@ static int decompress_exec( } while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) { - ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE); + ret = kernel_read(bprm->file, buf, LBUFSIZE, &fpos); if (ret <= 0) break; len -= ret; @@ -282,7 +275,6 @@ static int decompress_exec( strm.next_in = buf; strm.avail_in = ret; strm.total_in = 0; - fpos += ret; } if (ret < 0) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index f4718098ac31..ce7181ea60fa 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -218,12 +218,15 @@ static int load_misc_binary(struct linux_binprm *bprm) bprm->file = interp_file; if (fmt->flags & MISC_FMT_CREDENTIALS) { + loff_t pos = 0; + /* * No need to call prepare_binprm(), it's already been * done. bprm->buf is stale, update from interp_file. */ memset(bprm->buf, 0, BINPRM_BUF_SIZE); - retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); + retval = kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, + &pos); } else retval = prepare_binprm(bprm); diff --git a/fs/block_dev.c b/fs/block_dev.c index bb715b2fcfb8..93d088ffc05c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1740,6 +1740,8 @@ static int blkdev_open(struct inode * inode, struct file * filp) */ filp->f_flags |= O_LARGEFILE; + filp->f_mode |= FMODE_NOWAIT; + if (filp->f_flags & O_NDELAY) filp->f_mode |= FMODE_NDELAY; if (filp->f_flags & O_EXCL) @@ -1892,6 +1894,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos >= size) return -ENOSPC; + if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) + return -EOPNOTSUPP; + iov_iter_truncate(from, size - iocb->ki_pos); blk_start_plug(&plug); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 80e9c18ea64f..a26c63b4ad68 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -6,6 +6,8 @@ config BTRFS_FS select ZLIB_DEFLATE select LZO_COMPRESS select LZO_DECOMPRESS + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS select RAID6_PQ select XOR_BLOCKS select SRCU diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 128ce17a80b0..962a95aefb81 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o free-space-cache.o zlib.o lzo.o \ + export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ uuid-tree.o props.o hash.o free-space-tree.o diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 883ecc58fd0d..280384bf34f1 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio) struct inode *inode; struct page *page; unsigned long index; - int ret; + unsigned int mirror = btrfs_io_bio(bio)->mirror_num; + int ret = 0; if (bio->bi_status) cb->errors = 1; @@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio) if (!refcount_dec_and_test(&cb->pending_bios)) goto out; + /* + * Record the correct mirror_num in cb->orig_bio so that + * read-repair can work properly. + */ + ASSERT(btrfs_io_bio(cb->orig_bio)); + btrfs_io_bio(cb->orig_bio)->mirror_num = mirror; + cb->mirror_num = mirror; + + /* + * Some IO in this cb have failed, just skip checksum as there + * is no way it could be correct. + */ + if (cb->errors == 1) + goto csum_failed; + inode = cb->inode; ret = check_compressed_csum(BTRFS_I(inode), cb, (u64)bio->bi_iter.bi_sector << 9); @@ -704,6 +720,7 @@ static struct { static const struct btrfs_compress_op * const btrfs_compress_op[] = { &btrfs_zlib_compress, &btrfs_lzo_compress, + &btrfs_zstd_compress, }; void __init btrfs_init_compress(void) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 3b1b0ac15fdc..d2781ff8f994 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -99,7 +99,8 @@ enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_LZO = 2, - BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_ZSTD = 3, + BTRFS_COMPRESS_TYPES = 3, }; struct btrfs_compress_op { @@ -127,6 +128,7 @@ struct btrfs_compress_op { extern const struct btrfs_compress_op btrfs_zlib_compress; extern const struct btrfs_compress_op btrfs_lzo_compress; +extern const struct btrfs_compress_op btrfs_zstd_compress; int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2add002662f4..899ddaeeacec 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -270,6 +270,7 @@ struct btrfs_super_block { BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ BTRFS_FEATURE_INCOMPAT_RAID56 | \ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ @@ -708,7 +709,6 @@ struct btrfs_delayed_root; #define BTRFS_FS_OPEN 5 #define BTRFS_FS_QUOTA_ENABLED 6 #define BTRFS_FS_QUOTA_ENABLING 7 -#define BTRFS_FS_QUOTA_DISABLING 8 #define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 #define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 #define BTRFS_FS_BTREE_ERR 11 diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7a93a3e1a847..7c655f9a7a50 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -704,7 +704,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) u64 result; int ret; - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; mutex_lock(&dev_replace->lock_finishing_cancel_unmount); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 46329524dd5f..dfdab849037b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2478,7 +2478,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return ret; } - if (fs_info->sb->s_flags & MS_RDONLY) { + if (sb_rdonly(fs_info->sb)) { ret = btrfs_commit_super(fs_info); if (ret) return ret; @@ -2828,6 +2828,8 @@ int open_ctree(struct super_block *sb, features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; if (fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) btrfs_info(fs_info, "has skinny extents"); @@ -2874,7 +2876,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; - if (!(sb->s_flags & MS_RDONLY) && features) { + if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, "cannot mount read-write because of unsupported optional features (%llx)", features); @@ -3036,7 +3038,7 @@ retry_root_backup: goto fail_sysfs; } - if (!(sb->s_flags & MS_RDONLY) && !btrfs_check_rw_degradable(fs_info)) { + if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { btrfs_warn(fs_info, "writeable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@ -3095,7 +3097,7 @@ retry_root_backup: if (ret) goto fail_qgroup; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; @@ -3121,7 +3123,7 @@ retry_root_backup: goto fail_qgroup; } - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && @@ -3641,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) u64 flags; do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); - backup_super_roots(fs_info); + + /* + * max_mirrors == 0 indicates we're from commit_transaction, + * not from fsync where the tree roots in fs_info have not + * been consistent on disk. + */ + if (max_mirrors == 0) + backup_super_roots(fs_info); sb = fs_info->super_for_commit; dev_item = &sb->dev_item; @@ -3876,7 +3885,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_reclaim_work); - if (!(fs_info->sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(fs_info->sb)) { /* * If the cleaner thread is stopped and there are * block groups queued for removal, the deletion will be diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0f077c5db58e..12ab19a4b93e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2060,7 +2060,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); int ret = 0; - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; for (i = 0; i < num_pages; i++) { @@ -2110,7 +2110,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info, failrec->start); goto out; } - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) goto out; spin_lock(&io_tree->lock); @@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned int write_flags = 0; unsigned long nr_written = 0; - if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = REQ_SYNC; + write_flags = wbc_to_write_flags(wbc); trace___extent_writepage(page, inode, wbc); @@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, unsigned long i, num_pages; unsigned long bio_flags = 0; unsigned long start, end; - unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; + unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd) if (epd->bio) { int ret; - bio_set_op_attrs(epd->bio, REQ_OP_WRITE, - epd->sync_io ? REQ_SYNC : 0); - ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 74fd7756cff3..aafcc785f840 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1886,6 +1886,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, loff_t oldsize; int clean_page = 0; + if (!(iocb->ki_flags & IOCB_DIRECT) && + (iocb->ki_flags & IOCB_NOWAIT)) + return -EOPNOTSUPP; + if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; @@ -3112,7 +3116,7 @@ out: static int btrfs_file_open(struct inode *inode, struct file *filp) { - filp->f_mode |= FMODE_AIO_NOWAIT; + filp->f_mode |= FMODE_NOWAIT; return generic_file_open(inode, filp); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17ad018da0a2..d94e3f68b9b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode, const u64 offset, const u64 bytes) { + unsigned long index = offset >> PAGE_SHIFT; + unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + index++; + if (!page) + continue; + ClearPagePrivate2(page); + put_page(page); + } return __endio_write_update_ordered(inode, offset + PAGE_SIZE, bytes - PAGE_SIZE, false); } @@ -5821,7 +5833,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (!IS_ERR(inode) && root != sub_root) { down_read(&fs_info->cleanup_work_sem); - if (!(inode->i_sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(inode->i_sb)) ret = btrfs_orphan_cleanup(sub_root); up_read(&fs_info->cleanup_work_sem); if (ret) { @@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio) struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); blk_status_t err = bio->bi_status; - if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { + if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) err = btrfs_subio_endio_read(inode, io_bio, err); - if (!err) - bio->bi_status = 0; - } unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, dip->logical_offset + dip->bytes - 1); @@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); - dio_bio->bi_status = bio->bi_status; + dio_bio->bi_status = err; dio_end_io(dio_bio); if (io_bio->end_io) @@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode, btrfs_work_func_t func; u64 ordered_offset = offset; u64 ordered_bytes = bytes; + u64 last_offset; int ret; if (btrfs_is_free_space_inode(BTRFS_I(inode))) { @@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode, } again: + last_offset = ordered_offset; ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, ordered_bytes, @@ -8409,6 +8420,12 @@ again: btrfs_queue_work(wq, &ordered->work); out_test: /* + * If btrfs_dec_test_ordered_pending does not find any ordered extent + * in the range, we can exit. + */ + if (ordered_offset == last_offset) + return; + /* * our bio might span multiple ordered extents. If we haven't * completed the accounting for the whole dio, go back and try again */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ae8fbf9d3de2..6c7a49faf4e0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -296,8 +296,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (fs_info->compress_type == BTRFS_COMPRESS_LZO) comp = "lzo"; - else + else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) comp = "zlib"; + else + comp = "zstd"; ret = btrfs_set_prop(inode, "btrfs.compression", comp, strlen(comp), 0); if (ret) @@ -1435,6 +1437,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (range->compress_type == BTRFS_COMPRESS_LZO) { btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); + } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); } ret = defrag_count; @@ -2769,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, } mutex_unlock(&fs_devices->device_list_mutex); - fi_args->nodesize = fs_info->super_copy->nodesize; - fi_args->sectorsize = fs_info->super_copy->sectorsize; - fi_args->clone_alignment = fs_info->super_copy->sectorsize; + fi_args->nodesize = fs_info->nodesize; + fi_args->sectorsize = fs_info->sectorsize; + fi_args->clone_alignment = fs_info->sectorsize; if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; @@ -3028,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, out: if (ret) btrfs_cmp_data_free(cmp); - return 0; + return ret; } static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) @@ -4057,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) ret = PTR_ERR(new_root); goto out; } + if (!is_fstree(new_root->objectid)) { + ret = -ENOENT; + goto out; + } path = btrfs_alloc_path(); if (!path) { @@ -4422,7 +4430,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: - if (fs_info->sb->s_flags & MS_RDONLY) { + if (sb_rdonly(fs_info->sb)) { ret = -EROFS; goto out; } diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 09c0266f248d..f6a05f836629 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -390,6 +390,8 @@ static int prop_compression_validate(const char *value, size_t len) return 0; else if (!strncmp("zlib", value, len)) return 0; + else if (!strncmp("zstd", value, len)) + return 0; return -EINVAL; } @@ -412,6 +414,8 @@ static int prop_compression_apply(struct inode *inode, type = BTRFS_COMPRESS_LZO; else if (!strncmp("zlib", value, 4)) type = BTRFS_COMPRESS_ZLIB; + else if (!strncmp("zstd", value, len)) + type = BTRFS_COMPRESS_ZSTD; else return -EINVAL; @@ -429,6 +433,8 @@ static const char *prop_compression_extract(struct inode *inode) return "zlib"; case BTRFS_COMPRESS_LZO: return "lzo"; + case BTRFS_COMPRESS_ZSTD: + return "zstd"; } return NULL; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5c8b61c86e61..e172d4843eae 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, } ret = 0; out: - set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); btrfs_free_path(path); return ret; } @@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, if (!fs_info->quota_root) goto out; clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; @@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, } } ret = del_qgroup_item(trans, quota_root, qgroupid); + if (ret && ret != -ENOENT) + goto out; while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, @@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); spin_lock(&fs_info->qgroup_lock); while (!list_empty(&fs_info->dirty_qgroups)) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3a49a3c2fca4..9841faef08ea 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list) while (!list_empty(list)) { reloc_root = list_entry(list->next, struct btrfs_root, root_list); + __del_reloc_root(reloc_root); free_extent_buffer(reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->node = NULL; reloc_root->commit_root = NULL; - __del_reloc_root(reloc_root); } } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 9fb9896610e0..95bcc3cce78f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -228,7 +228,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) int ret; bool can_recover = true; - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) can_recover = false; path = btrfs_alloc_path(); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8f1d3d6e7087..8fd195cfe81b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -539,33 +539,23 @@ static struct btrfs_path *alloc_path_for_send(void) static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) { int ret; - mm_segment_t old_fs; u32 pos = 0; - old_fs = get_fs(); - set_fs(KERNEL_DS); - while (pos < len) { - ret = vfs_write(filp, (__force const char __user *)buf + pos, - len - pos, off); + ret = kernel_write(filp, buf + pos, len - pos, off); /* TODO handle that correctly */ /*if (ret == -ERESTARTSYS) { continue; }*/ if (ret < 0) - goto out; + return ret; if (ret == 0) { - ret = -EIO; - goto out; + return -EIO; } pos += ret; } - ret = 0; - -out: - set_fs(old_fs); - return ret; + return 0; } static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) @@ -2640,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino) } else { btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", (int)(mode & S_IFMT)); - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; goto out; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0b7a1d8cd08b..35a128acfbd1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -103,7 +103,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) { struct super_block *sb = fs_info->sb; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { @@ -139,7 +139,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function * Special case: if the error is EROFS, and we're already * under MS_RDONLY, then it is safe here. */ - if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) + if (errno == -EROFS && sb_rdonly(sb)) return; #ifdef CONFIG_PRINTK @@ -514,6 +514,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_LZO); no_compress = 0; + } else if (strcmp(args[0].from, "zstd") == 0) { + compress_type = "zstd"; + info->compress_type = BTRFS_COMPRESS_ZSTD; + btrfs_set_opt(info->mount_opt, COMPRESS); + btrfs_clear_opt(info->mount_opt, NODATACOW); + btrfs_clear_opt(info->mount_opt, NODATASUM); + btrfs_set_fs_incompat(info, COMPRESS_ZSTD); + no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; btrfs_clear_opt(info->mount_opt, COMPRESS); @@ -1230,8 +1238,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (btrfs_test_opt(info, COMPRESS)) { if (info->compress_type == BTRFS_COMPRESS_ZLIB) compress_type = "zlib"; - else + else if (info->compress_type == BTRFS_COMPRESS_LZO) compress_type = "lzo"; + else + compress_type = "zstd"; if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else @@ -1691,8 +1701,7 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, * close or the filesystem is read only. */ if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && - (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || - (fs_info->sb->s_flags & MS_RDONLY))) { + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) { btrfs_cleanup_defrag_inodes(fs_info); } @@ -1739,7 +1748,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & MS_RDONLY) { @@ -1840,7 +1849,7 @@ out: restore: /* We've hit an error - don't reset MS_RDONLY */ - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) old_flags |= MS_RDONLY; sb->s_flags = old_flags; fs_info->mount_opt = old_opts; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c2d5f3580b4c..883881b16c86 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -120,7 +120,7 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, if (!fs_info) return -EPERM; - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; ret = kstrtoul(skip_spaces(buf), 0, &val); @@ -200,6 +200,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF); BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); +BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD); BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); @@ -212,6 +213,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { BTRFS_FEAT_ATTR_PTR(default_subvol), BTRFS_FEAT_ATTR_PTR(mixed_groups), BTRFS_FEAT_ATTR_PTR(compress_lzo), + BTRFS_FEAT_ATTR_PTR(compress_zstd), BTRFS_FEAT_ATTR_PTR(big_metadata), BTRFS_FEAT_ATTR_PTR(extended_iref), BTRFS_FEAT_ATTR_PTR(raid56), @@ -388,7 +390,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj, if (!fs_info) return -EPERM; - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ad7f4bab640b..c800d067fcbf 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct extent_map *em, *n; struct list_head extents; struct extent_map_tree *tree = &inode->extent_tree; + u64 logged_start, logged_end; u64 test_gen; int ret = 0; int num = 0; @@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, down_write(&inode->dio_sem); write_lock(&tree->lock); test_gen = root->fs_info->last_trans_committed; + logged_start = start; + logged_end = end; list_for_each_entry_safe(em, n, &tree->modified_extents, list) { list_del_init(&em->list); - /* * Just an arbitrary number, this can be really CPU intensive * once we start getting a lot of extents, and really once we @@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; + + if (em->start < logged_start) + logged_start = em->start; + if ((em->start + em->len - 1) > logged_end) + logged_end = em->start + em->len - 1; + /* Need a ref to keep it from getting evicted from cache */ refcount_inc(&em->refs); set_bit(EXTENT_FLAG_LOGGING, &em->flags); @@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, &extents, extent_cmp); - btrfs_get_logged_extents(inode, logged_list, start, end); + btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* * Some ordered extents started by fsync might have completed * before we could collect them into the list logged_list, which diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c188256a367c..b39737568c22 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2324,7 +2324,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path int seeding_dev = 0; int ret = 0; - if ((sb->s_flags & MS_RDONLY) && !fs_info->fs_devices->seeding) + if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) return -EROFS; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, @@ -4053,7 +4053,7 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; mutex_lock(&fs_info->balance_mutex); @@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, map_length = length; btrfs_bio_counter_inc_blocked(fs_info); - ret = __btrfs_map_block(fs_info, bio_op(bio), logical, + ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c new file mode 100644 index 000000000000..607ce47b483a --- /dev/null +++ b/fs/btrfs/zstd.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bio.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/refcount.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/zstd.h> +#include "compression.h" + +#define ZSTD_BTRFS_MAX_WINDOWLOG 17 +#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) +#define ZSTD_BTRFS_DEFAULT_LEVEL 3 + +static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) +{ + ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL, + src_len, 0); + + if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) + params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; + WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); + return params; +} + +struct workspace { + void *mem; + size_t size; + char *buf; + struct list_head list; +}; + +static void zstd_free_workspace(struct list_head *ws) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + + kvfree(workspace->mem); + kfree(workspace->buf); + kfree(workspace); +} + +static struct list_head *zstd_alloc_workspace(void) +{ + ZSTD_parameters params = + zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); + struct workspace *workspace; + + workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); + if (!workspace) + return ERR_PTR(-ENOMEM); + + workspace->size = max_t(size_t, + ZSTD_CStreamWorkspaceBound(params.cParams), + ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); + workspace->mem = kvmalloc(workspace->size, GFP_KERNEL); + workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!workspace->mem || !workspace->buf) + goto fail; + + INIT_LIST_HEAD(&workspace->list); + + return &workspace->list; +fail: + zstd_free_workspace(&workspace->list); + return ERR_PTR(-ENOMEM); +} + +static int zstd_compress_pages(struct list_head *ws, + struct address_space *mapping, + u64 start, + struct page **pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + ZSTD_CStream *stream; + int ret = 0; + int nr_pages = 0; + struct page *in_page = NULL; /* The current page to read */ + struct page *out_page = NULL; /* The current page to write to */ + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + unsigned long tot_in = 0; + unsigned long tot_out = 0; + unsigned long len = *total_out; + const unsigned long nr_dest_pages = *out_pages; + unsigned long max_out = nr_dest_pages * PAGE_SIZE; + ZSTD_parameters params = zstd_get_btrfs_parameters(len); + + *out_pages = 0; + *total_out = 0; + *total_in = 0; + + /* Initialize the stream */ + stream = ZSTD_initCStream(params, len, workspace->mem, + workspace->size); + if (!stream) { + pr_warn("BTRFS: ZSTD_initCStream failed\n"); + ret = -EIO; + goto out; + } + + /* map in the first page of input data */ + in_page = find_get_page(mapping, start >> PAGE_SHIFT); + in_buf.src = kmap(in_page); + in_buf.pos = 0; + in_buf.size = min_t(size_t, len, PAGE_SIZE); + + + /* Allocate and map in the output buffer */ + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + pages[nr_pages++] = out_page; + out_buf.dst = kmap(out_page); + out_buf.pos = 0; + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + + while (1) { + size_t ret2; + + ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_compressStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto out; + } + + /* Check to see if we are making it bigger */ + if (tot_in + in_buf.pos > 8192 && + tot_in + in_buf.pos < + tot_out + out_buf.pos) { + ret = -E2BIG; + goto out; + } + + /* We've reached the end of our output range */ + if (out_buf.pos >= max_out) { + tot_out += out_buf.pos; + ret = -E2BIG; + goto out; + } + + /* Check if we need more output space */ + if (out_buf.pos == out_buf.size) { + tot_out += PAGE_SIZE; + max_out -= PAGE_SIZE; + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -E2BIG; + goto out; + } + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + pages[nr_pages++] = out_page; + out_buf.dst = kmap(out_page); + out_buf.pos = 0; + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + } + + /* We've reached the end of the input */ + if (in_buf.pos >= len) { + tot_in += in_buf.pos; + break; + } + + /* Check if we need more input */ + if (in_buf.pos == in_buf.size) { + tot_in += PAGE_SIZE; + kunmap(in_page); + put_page(in_page); + + start += PAGE_SIZE; + len -= PAGE_SIZE; + in_page = find_get_page(mapping, start >> PAGE_SHIFT); + in_buf.src = kmap(in_page); + in_buf.pos = 0; + in_buf.size = min_t(size_t, len, PAGE_SIZE); + } + } + while (1) { + size_t ret2; + + ret2 = ZSTD_endStream(stream, &out_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_endStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto out; + } + if (ret2 == 0) { + tot_out += out_buf.pos; + break; + } + if (out_buf.pos >= max_out) { + tot_out += out_buf.pos; + ret = -E2BIG; + goto out; + } + + tot_out += PAGE_SIZE; + max_out -= PAGE_SIZE; + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -E2BIG; + goto out; + } + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + pages[nr_pages++] = out_page; + out_buf.dst = kmap(out_page); + out_buf.pos = 0; + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + } + + if (tot_out >= tot_in) { + ret = -E2BIG; + goto out; + } + + ret = 0; + *total_in = tot_in; + *total_out = tot_out; +out: + *out_pages = nr_pages; + /* Cleanup */ + if (in_page) { + kunmap(in_page); + put_page(in_page); + } + if (out_page) + kunmap(out_page); + return ret; +} + +static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + struct page **pages_in = cb->compressed_pages; + u64 disk_start = cb->start; + struct bio *orig_bio = cb->orig_bio; + size_t srclen = cb->compressed_len; + ZSTD_DStream *stream; + int ret = 0; + unsigned long page_in_index = 0; + unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); + unsigned long buf_start; + unsigned long total_out = 0; + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + + stream = ZSTD_initDStream( + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); + if (!stream) { + pr_debug("BTRFS: ZSTD_initDStream failed\n"); + ret = -EIO; + goto done; + } + + in_buf.src = kmap(pages_in[page_in_index]); + in_buf.pos = 0; + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); + + out_buf.dst = workspace->buf; + out_buf.pos = 0; + out_buf.size = PAGE_SIZE; + + while (1) { + size_t ret2; + + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto done; + } + buf_start = total_out; + total_out += out_buf.pos; + out_buf.pos = 0; + + ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, + total_out, disk_start, orig_bio); + if (ret == 0) + break; + + if (in_buf.pos >= srclen) + break; + + /* Check if we've hit the end of a frame */ + if (ret2 == 0) + break; + + if (in_buf.pos == in_buf.size) { + kunmap(pages_in[page_in_index++]); + if (page_in_index >= total_pages_in) { + in_buf.src = NULL; + ret = -EIO; + goto done; + } + srclen -= PAGE_SIZE; + in_buf.src = kmap(pages_in[page_in_index]); + in_buf.pos = 0; + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); + } + } + ret = 0; + zero_fill_bio(orig_bio); +done: + if (in_buf.src) + kunmap(pages_in[page_in_index]); + return ret; +} + +static int zstd_decompress(struct list_head *ws, unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + ZSTD_DStream *stream; + int ret = 0; + size_t ret2; + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + unsigned long total_out = 0; + unsigned long pg_offset = 0; + char *kaddr; + + stream = ZSTD_initDStream( + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); + if (!stream) { + pr_warn("BTRFS: ZSTD_initDStream failed\n"); + ret = -EIO; + goto finish; + } + + destlen = min_t(size_t, destlen, PAGE_SIZE); + + in_buf.src = data_in; + in_buf.pos = 0; + in_buf.size = srclen; + + out_buf.dst = workspace->buf; + out_buf.pos = 0; + out_buf.size = PAGE_SIZE; + + ret2 = 1; + while (pg_offset < destlen && in_buf.pos < in_buf.size) { + unsigned long buf_start; + unsigned long buf_offset; + unsigned long bytes; + + /* Check if the frame is over and we still need more input */ + if (ret2 == 0) { + pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); + ret = -EIO; + goto finish; + } + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto finish; + } + + buf_start = total_out; + total_out += out_buf.pos; + out_buf.pos = 0; + + if (total_out <= start_byte) + continue; + + if (total_out > start_byte && buf_start < start_byte) + buf_offset = start_byte - buf_start; + else + buf_offset = 0; + + bytes = min_t(unsigned long, destlen - pg_offset, + out_buf.size - buf_offset); + + kaddr = kmap_atomic(dest_page); + memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); + kunmap_atomic(kaddr); + + pg_offset += bytes; + } + ret = 0; +finish: + if (pg_offset < destlen) { + kaddr = kmap_atomic(dest_page); + memset(kaddr + pg_offset, 0, destlen - pg_offset); + kunmap_atomic(kaddr); + } + return ret; +} + +const struct btrfs_compress_op btrfs_zstd_compress = { + .alloc_workspace = zstd_alloc_workspace, + .free_workspace = zstd_free_workspace, + .compress_pages = zstd_compress_pages, + .decompress_bio = zstd_decompress_bio, + .decompress = zstd_decompress, +}; diff --git a/fs/buffer.c b/fs/buffer.c index 170df856bdb9..bd4d0923cdce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1978,8 +1978,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, case IOMAP_MAPPED: if (offset >= i_size_read(inode)) set_buffer_new(bh); - bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) + - ((offset - iomap->offset) >> inode->i_blkbits); + bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> + inode->i_blkbits; set_buffer_mapped(bh); break; } diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 3ff867f87d73..d9f001078e08 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -133,7 +133,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) goto error_unsupported; ret = -EROFS; - if (root->d_sb->s_flags & MS_RDONLY) + if (sb_rdonly(root->d_sb)) goto error_unsupported; /* determine the security of the on-disk cache as this governs diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9dd6b836ac9e..84edfc60d87a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -7,7 +7,6 @@ #include <linux/sched.h> #include <linux/debugfs.h> #include <linux/seq_file.h> -#include <linux/utsname.h> #include <linux/ratelimit.h> #include "super.h" @@ -884,8 +883,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 void *p; const char* metadata[][2] = { - {"hostname", utsname()->nodename}, - {"kernel_version", utsname()->release}, + {"hostname", mdsc->nodename}, + {"kernel_version", init_utsname()->release}, {"entity_id", opt->name ? : ""}, {"root", fsopt->server_path ? : "/"}, {NULL, NULL} @@ -3539,6 +3538,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; + strncpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename) - 1); return 0; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index db57ae98ed34..636d6b2ec49c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -8,6 +8,7 @@ #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/refcount.h> +#include <linux/utsname.h> #include <linux/ceph/types.h> #include <linux/ceph/messenger.h> @@ -368,6 +369,8 @@ struct ceph_mds_client { struct rw_semaphore pool_perm_rwsem; struct rb_root pool_perm_tree; + + char nodename[__NEW_UTS_LEN + 1]; }; extern const char *ceph_mds_op_name(int op); diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 9727e1dcacd5..cbb9534b89b4 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { - seq_printf(m, "\n%d) entry for %s not fully " - "displayed\n\t", i, ses->serverName); + seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t", + i, ses->serverName, ses->ses_count, + ses->capabilities, ses->status); + if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) + seq_printf(m, "Guest\t"); + else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) + seq_printf(m, "Anonymous\t"); } else { seq_printf(m, "\n%d) Name: %s Domain: %s Uses: %d OS:" diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 180b3356ff86..8c8b75d33f31 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nocase"); if (tcon->retry) seq_puts(s, ",hard"); + else + seq_puts(s, ",soft"); if (tcon->use_persistent) seq_puts(s, ",persistenthandles"); else if (tcon->use_resilient) @@ -1447,7 +1449,7 @@ exit_cifs(void) exit_cifs_idmap(); #endif #ifdef CONFIG_CIFS_UPCALL - unregister_key_type(&cifs_spnego_key_type); + exit_cifs_spnego(); #endif cifs_destroy_request_bufs(); cifs_destroy_mids(); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 30bf89b1fd9a..5a10e566f0e6 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.09" +#define CIFS_VERSION "2.10" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 808486c29f0d..de5b2e1fcce5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -188,6 +188,8 @@ enum smb_version { #ifdef CONFIG_CIFS_SMB311 Smb_311, #endif /* SMB311 */ + Smb_3any, + Smb_default, Smb_version_err }; @@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; +#define SMBDEFAULT_VERSION_STRING "default" +extern struct smb_version_values smbdefault_values; +#define SMB3ANY_VERSION_STRING "3" +extern struct smb_version_values smb3any_values; #define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5aa2d278ca84..0bfc2280436d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_311, SMB311_VERSION_STRING }, { Smb_311, ALT_SMB311_VERSION_STRING }, #endif /* SMB311 */ + { Smb_3any, SMB3ANY_VERSION_STRING }, + { Smb_default, SMBDEFAULT_VERSION_STRING }, { Smb_version_err, NULL } }; @@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->vals = &smb311_values; break; #endif /* SMB311 */ + case Smb_3any: + vol->ops = &smb30_operations; /* currently identical with 3.0 */ + vol->vals = &smb3any_values; + break; + case Smb_default: + vol->ops = &smb30_operations; /* currently identical with 3.0 */ + vol->vals = &smbdefault_values; + break; default: cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); return 1; @@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->actimeo = CIFS_DEF_ACTIMEO; - /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */ - vol->ops = &smb30_operations; /* both secure and accepted widely */ - vol->vals = &smb30_values; + /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */ + vol->ops = &smb30_operations; + vol->vals = &smbdefault_values; vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT; @@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (got_version == false) pr_warn("No dialect specified on mount. Default has changed to " - "a more secure dialect, SMB3 (vers=3.0), from CIFS " + "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS " "(SMB1). To use the less secure SMB1 dialect to access " - "old servers which do not support SMB3 specify vers=1.0" - " on mount. For somewhat newer servers such as Windows " - "7 try vers=2.1.\n"); + "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0" + " on mount.\n"); kfree(mountdata_copy); return 0; @@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) if (vol->nosharesock) return 0; + /* BB update this for smb3any and default case */ if ((server->vals != vol->vals) || (server->ops != vol->ops)) return 0; @@ -4144,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n", server->sec_mode, server->capabilities, server->timeAdj); + if (ses->auth_key.response) { + cifs_dbg(VFS, "Free previous auth_key.response = %p\n", + ses->auth_key.response); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } + if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0786f19d288f..92fdf9c35de2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; @@ -1102,8 +1109,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifs_tcon *tcon; unsigned int num, max_num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; - int types[] = {LOCKING_ANDX_LARGE_FILES, - LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; int i; xid = get_xid(); @@ -1434,8 +1443,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, unsigned int xid) { int rc = 0, stored_rc; - int types[] = {LOCKING_ANDX_LARGE_FILES, - LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; unsigned int i; unsigned int max_num, num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8693632235f..7c732cb44164 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime); fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange); + /* old POSIX extensions don't get create time */ + fattr->cf_mode = le64_to_cpu(info->Permissions); /* @@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat, stat->blksize = CIFS_MAX_MSGSIZE; stat->ino = CIFS_I(inode)->uniqueid; + /* old CIFS Unix Extensions doesn't return create time */ + if (CIFS_I(inode)->createtime) { + stat->result_mask |= STATX_BTIME; + stat->btime = + cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)); + } + + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED); + if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED) + stat->attributes |= STATX_ATTR_ENCRYPTED; + /* * If on a multiuser mount without unix extensions or cifsacl being * enabled, and the admin hasn't overridden them, set the ownership diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index fb2934b9b97c..0dafdbae1f8c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +#ifdef CONFIG_CIFS_XATTR static ssize_t move_smb2_ea_to_cifs(char *dst, size_t dst_size, struct smb2_file_full_ea_info *src, size_t src_size, @@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +#endif static bool smb2_can_echo(struct TCP_Server_Info *server) @@ -3110,6 +3112,46 @@ struct smb_version_values smb21_values = { .create_lease_size = sizeof(struct create_lease), }; +struct smb_version_values smb3any_values = { + .version_string = SMB3ANY_VERSION_STRING, + .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), +}; + +struct smb_version_values smbdefault_values = { + .version_string = SMBDEFAULT_VERSION_STRING, + .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), +}; + struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5531e7ee1210..6f0e6343c15e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req) build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextCount = cpu_to_le16(2); - inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 + inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ } #else @@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->hdr.sync_hdr.SessionId = 0; - req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); - - req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ - inc_rfc1001_len(req, 2); + if (strcmp(ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + req->DialectCount = cpu_to_le16(2); + inc_rfc1001_len(req, 4); + } else if (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + req->DialectCount = cpu_to_le16(3); + inc_rfc1001_len(req, 6); + } else { + /* otherwise send specific dialect */ + req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); + req->DialectCount = cpu_to_le16(1); + inc_rfc1001_len(req, 2); + } /* only one of SMB2 signing flags may be set in SMB2 request */ if (ses->sign) @@ -528,16 +543,43 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) */ if (rc == -EOPNOTSUPP) { cifs_dbg(VFS, "Dialect not supported by server. Consider " - "specifying vers=1.0 or vers=2.1 on mount for accessing" + "specifying vers=1.0 or vers=2.0 on mount for accessing" " older servers\n"); goto neg_exit; } else if (rc != 0) goto neg_exit; + if (strcmp(ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + cifs_dbg(VFS, + "SMB2 dialect returned but not requested\n"); + return -EIO; + } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + cifs_dbg(VFS, + "SMB2.1 dialect returned but not requested\n"); + return -EIO; + } + } else if (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + cifs_dbg(VFS, + "SMB2 dialect returned but not requested\n"); + return -EIO; + } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + /* ops set to 3.0 by default for default so update */ + ses->server->ops = &smb21_operations; + } + } else if (le16_to_cpu(rsp->DialectRevision) != + ses->server->vals->protocol_id) { + /* if requested single dialect ensure returned dialect matched */ + cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n", + le16_to_cpu(rsp->DialectRevision)); + return -EIO; + } + cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); - /* BB we may eventually want to match the negotiated vs. requested - dialect, even though we are only requesting one at a time */ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) cifs_dbg(FYI, "negotiated smb2.0 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) @@ -558,6 +600,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } server->dialect = le16_to_cpu(rsp->DialectRevision); + /* BB: add check that dialect was valid given dialect(s) we asked for */ + /* SMB2 only has an extended negflavor */ server->negflavor = CIFS_NEGFLAVOR_EXTENDED; /* set it to the maximum buffer size value we can send with 1 credit */ @@ -606,20 +650,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) struct validate_negotiate_info_req vneg_inbuf; struct validate_negotiate_info_rsp *pneg_rsp; u32 rsplen; + u32 inbuflen; /* max of 4 dialects */ cifs_dbg(FYI, "validate negotiate\n"); /* * validation ioctl must be signed, so no point sending this if we - * can not sign it. We could eventually change this to selectively + * can not sign it (ie are not known user). Even if signing is not + * required (enabled but not negotiated), in those cases we selectively * sign just this, the first and only signed request on a connection. - * This is good enough for now since a user who wants better security - * would also enable signing on the mount. Having validation of - * negotiate info for signed connections helps reduce attack vectors + * Having validation of negotiate info helps reduce attack vectors. */ - if (tcon->ses->server->sign == false) + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) return 0; /* validation requires signing */ + if (tcon->ses->user_name == NULL) { + cifs_dbg(FYI, "Can't validate negotiate: null user mount\n"); + return 0; /* validation requires signing */ + } + + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) + cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); + vneg_inbuf.Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, @@ -634,9 +686,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) else vneg_inbuf.SecurityMode = 0; - vneg_inbuf.DialectCount = cpu_to_le16(1); - vneg_inbuf.Dialects[0] = - cpu_to_le16(tcon->ses->server->vals->protocol_id); + + if (strcmp(tcon->ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + vneg_inbuf.DialectCount = cpu_to_le16(2); + /* structure is big enough for 3 dialects, sending only 2 */ + inbuflen = sizeof(struct validate_negotiate_info_req) - 2; + } else if (strcmp(tcon->ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + vneg_inbuf.DialectCount = cpu_to_le16(3); + /* structure is big enough for 3 dialects */ + inbuflen = sizeof(struct validate_negotiate_info_req); + } else { + /* otherwise specific dialect was requested */ + vneg_inbuf.Dialects[0] = + cpu_to_le16(tcon->ses->server->vals->protocol_id); + vneg_inbuf.DialectCount = cpu_to_le16(1); + /* structure is big enough for 3 dialects, sending only 1 */ + inbuflen = sizeof(struct validate_negotiate_info_req) - 4; + } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, @@ -1110,6 +1183,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, while (sess_data->func) sess_data->func(sess_data); + if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign)) + cifs_dbg(VFS, "signing requested but authenticated as guest\n"); rc = sess_data->result; out: kfree(sess_data); @@ -1634,7 +1709,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, struct cifs_tcon *tcon = oparms->tcon; struct cifs_ses *ses = tcon->ses; struct kvec iov[4]; - struct kvec rsp_iov; + struct kvec rsp_iov = {NULL, 0}; int resp_buftype; int uni_path_len; __le16 *copy_path = NULL; @@ -1763,7 +1838,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); - if (err_buf) + if (err_buf && rsp) *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, GFP_KERNEL); goto creat_exit; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 393ed5f4e1b6..6c9653a130c8 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -716,7 +716,7 @@ struct validate_negotiate_info_req { __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ + __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */ } __packed; struct validate_negotiate_info_rsp { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index c0474ac6cbf2..274ab5586dd0 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -368,9 +368,10 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) goto out; while (1) { + loff_t pos = ctx->pos - 2; + /* read entries from the directory file */ - ret = kernel_read(host_file, ctx->pos - 2, (char *)vdir, - sizeof(*vdir)); + ret = kernel_read(host_file, vdir, sizeof(*vdir), &pos); if (ret < 0) { pr_err("%s: read dir %s failed %d\n", __func__, coda_f2s(&cii->c_fid), ret); diff --git a/fs/coredump.c b/fs/coredump.c index 592683711c64..0eec03696707 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -161,7 +161,7 @@ static int cn_print_exe_file(struct core_name *cn) if (!exe_file) return cn_esc_printf(cn, "%s (path unknown)", current->comm); - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); if (!pathbuf) { ret = -ENOMEM; goto put_exe_file; @@ -734,7 +734,7 @@ static int dax_writeback_one(struct block_device *bdev, } dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); - dax_flush(dax_dev, pgoff, kaddr, size); + dax_flush(dax_dev, kaddr, size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as @@ -929,7 +929,7 @@ int __dax_zero_page_range(struct block_device *bdev, return rc; } memset(kaddr + offset, 0, size); - dax_flush(dax_dev, pgoff, kaddr + offset, size); + dax_flush(dax_dev, kaddr + offset, size); dax_read_unlock(id); } return 0; @@ -938,7 +938,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range); static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) { - return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); + return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; } static loff_t diff --git a/fs/direct-io.c b/fs/direct-io.c index 5fa2211e49ae..62cf812ed0e5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; + int err; /* * AIO submission can race with bio completion to get here while @@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + */ + if (ret > 0 && dio->op == REQ_OP_WRITE && + dio->inode->i_mapping->nrpages) { + err = invalidate_inode_pages2_range(dio->inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + ret - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(err); + } + if (dio->end_io) { - int err; // XXX: ki_pos?? err = dio->end_io(dio->iocb, offset, ret, dio->private); @@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio) struct dio *dio = bio->bi_private; unsigned long remaining; unsigned long flags; + bool defer_completion = false; /* cleanup the bio */ dio_bio_complete(dio, bio); @@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio) spin_unlock_irqrestore(&dio->bio_lock, flags); if (remaining == 0) { - if (dio->result && dio->defer_completion) { + /* + * Defer completion when defer_completion is set or + * when the inode has pages mapped and this is AIO write. + * We need to invalidate those pages because there is a + * chance they contain stale data in the case buffered IO + * went in between AIO submission and completion into the + * same region. + */ + if (dio->result) + defer_completion = dio->defer_completion || + (dio->op == REQ_OP_WRITE && + dio->inode->i_mapping->nrpages); + if (defer_completion) { INIT_WORK(&dio->complete_work, dio_aio_complete_work); queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); @@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, * For AIO O_(D)SYNC writes we need to defer completions to a workqueue * so that we can call ->fsync. */ - if (dio->is_async && iov_iter_rw(iter) == WRITE && - ((iocb->ki_filp->f_flags & O_DSYNC) || - IS_SYNC(iocb->ki_filp->f_mapping->host))) { - retval = dio_set_defer_completion(dio); + if (dio->is_async && iov_iter_rw(iter) == WRITE) { + retval = 0; + if ((iocb->ki_filp->f_flags & O_DSYNC) || + IS_SYNC(iocb->ki_filp->f_mapping->host)) + retval = dio_set_defer_completion(dio); + else if (!dio->inode->i_sb->s_dio_done_wq) { + /* + * In case of AIO write racing with buffered read we + * need to defer completion. We can't decide this now, + * however the workqueue needs to be initialized here. + */ + retval = sb_init_dio_done_wq(dio->inode->i_sb); + } if (retval) { /* * We grab i_mutex only for reads so we don't have diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9014479d0160..6b801186baa5 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -568,8 +568,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags * 1) The lower mount is ro * 2) The ecryptfs_encrypted_view mount option is specified */ - if (path.dentry->d_sb->s_flags & MS_RDONLY || - mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + if (sb_rdonly(path.dentry->d_sb) || mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) s->s_flags |= MS_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 039e627194a9..c596e7c03424 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -47,7 +47,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; if (!lower_file) return -EIO; - rc = kernel_write(lower_file, data, size, offset); + rc = kernel_write(lower_file, data, size, &offset); mark_inode_dirty_sync(ecryptfs_inode); return rc; } @@ -237,7 +237,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; if (!lower_file) return -EIO; - return kernel_read(lower_file, offset, data, size); + return kernel_read(lower_file, data, size, &offset); } /** diff --git a/fs/efs/super.c b/fs/efs/super.c index 368f7dd21c61..5c42f1e34a2f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -306,7 +306,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) } brelse(bh); - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { #ifdef DEBUG pr_info("forcing read-only mode\n"); #endif diff --git a/fs/exec.c b/fs/exec.c index 01a9fb9d8ac3..ac34d9724684 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -885,23 +885,6 @@ struct file *open_exec(const char *name) } EXPORT_SYMBOL(open_exec); -int kernel_read(struct file *file, loff_t offset, - char *addr, unsigned long count) -{ - mm_segment_t old_fs; - loff_t pos = offset; - int result; - - old_fs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - result = vfs_read(file, (void __user *)addr, count, &pos); - set_fs(old_fs); - return result; -} - -EXPORT_SYMBOL(kernel_read); - int kernel_read_file(struct file *file, void **buf, loff_t *size, loff_t max_size, enum kernel_read_file_id id) { @@ -939,8 +922,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, pos = 0; while (pos < i_size) { - bytes = kernel_read(file, pos, (char *)(*buf) + pos, - i_size - pos); + bytes = kernel_read(file, *buf + pos, i_size - pos, &pos); if (bytes < 0) { ret = bytes; goto out; @@ -948,7 +930,6 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, if (bytes == 0) break; - pos += bytes; } if (pos != i_size) { @@ -974,7 +955,7 @@ out: } EXPORT_SYMBOL_GPL(kernel_read_file); -int kernel_read_file_from_path(char *path, void **buf, loff_t *size, +int kernel_read_file_from_path(const char *path, void **buf, loff_t *size, loff_t max_size, enum kernel_read_file_id id) { struct file *file; @@ -1567,6 +1548,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) int prepare_binprm(struct linux_binprm *bprm) { int retval; + loff_t pos = 0; bprm_fill_uid(bprm); @@ -1577,7 +1559,7 @@ int prepare_binprm(struct linux_binprm *bprm) bprm->called_set_creds = 1; memset(bprm->buf, 0, BINPRM_BUF_SIZE); - return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); + return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos); } EXPORT_SYMBOL(prepare_binprm); @@ -1763,9 +1745,9 @@ static int do_execveat_common(int fd, struct filename *filename, bprm->filename = filename->name; } else { if (filename->name[0] == '\0') - pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd); + pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); else - pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s", + pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", fd, filename->name); if (!pathbuf) { retval = -ENOMEM; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 4dca6f348714..1b8fc73de4a1 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -820,11 +820,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret == 0) { iomap->type = IOMAP_HOLE; - iomap->blkno = IOMAP_NULL_BLOCK; + iomap->addr = IOMAP_NULL_ADDR; iomap->length = 1 << blkbits; } else { iomap->type = IOMAP_MAPPED; - iomap->blkno = (sector_t)bno << (blkbits - 9); + iomap->addr = (u64)bno << blkbits; iomap->length = (u64)ret << blkbits; iomap->flags |= IOMAP_F_MERGED; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fc18edd81815..1458706bd2ec 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -52,7 +52,7 @@ void ext2_error(struct super_block *sb, const char *function, struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { spin_lock(&sbi->s_lock); sbi->s_mount_state |= EXT2_ERROR_FS; es->s_state |= cpu_to_le16(EXT2_ERROR_FS); @@ -151,7 +151,7 @@ static void ext2_put_super (struct super_block * sb) ext2_xattr_destroy_cache(sbi->s_ea_block_cache); sbi->s_ea_block_cache = NULL; } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { struct ext2_super_block *es = sbi->s_es; spin_lock(&sbi->s_lock); @@ -943,8 +943,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(features)); goto failed_mount; } - if (!(sb->s_flags & MS_RDONLY) && - (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ + if (!sb_rdonly(sb) && (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of " "unsupported optional features (%x)", le32_to_cpu(features)); @@ -1173,7 +1172,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_msg(sb, KERN_WARNING, "warning: mounting ext3 filesystem as ext2"); - if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) + if (ext2_setup_super (sb, es, sb_rdonly(sb))) sb->s_flags |= MS_RDONLY; ext2_write_super(sb); return 0; @@ -1305,7 +1304,7 @@ static int ext2_unfreeze(struct super_block *sb) static void ext2_write_super(struct super_block *sb) { - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) ext2_sync_fs(sb, 1); } @@ -1343,7 +1342,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) "dax flag with busy inodes while remounting"); sbi->s_mount_opt ^= EXT2_MOUNT_DAX; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) { spin_unlock(&sbi->s_lock); return 0; } diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index dd106b1d5d89..5b342ac67d2e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -47,7 +47,7 @@ static int ext4_journal_check_start(struct super_block *sb) if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return -EIO; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 57dcaea762c3..b1da660ac3bc 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -223,6 +223,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_DAX(inode)) return ext4_dax_write_iter(iocb, from); #endif + if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT)) + return -EOPNOTSUPP; if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) @@ -371,7 +373,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) return -EIO; if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && - !(sb->s_flags & MS_RDONLY))) { + !sb_rdonly(sb))) { sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; /* * Sample where the filesystem has been mounted and @@ -431,9 +433,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) return ret; } - /* Set the flags to support nowait AIO */ - filp->f_mode |= FMODE_AIO_NOWAIT; - + filp->f_mode |= FMODE_NOWAIT; return dquot_file_open(inode, filp); } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index aae2c3971cef..f9230580a84b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -107,7 +107,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_ext4_sync_file_enter(file, datasync); - if (inode->i_sb->s_flags & MS_RDONLY) { + if (sb_rdonly(inode->i_sb)) { /* Make sure that we read updated s_mount_flags value */ smp_rmb(); if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 71e93a23cec3..ee823022aa34 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1382,7 +1382,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int num, ret = 0, used_blks = 0; /* This should not happen, but just to be sure check this */ - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { ret = 1; goto out; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 31db875bc7a1..d9e633c12aae 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3472,7 +3472,7 @@ retry: if (ret == 0) { iomap->type = IOMAP_HOLE; - iomap->blkno = IOMAP_NULL_BLOCK; + iomap->addr = IOMAP_NULL_ADDR; iomap->length = (u64)map.m_len << blkbits; } else { if (map.m_flags & EXT4_MAP_MAPPED) { @@ -3483,7 +3483,7 @@ retry: WARN_ON_ONCE(1); return -EIO; } - iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); + iomap->addr = (u64)map.m_pblk << blkbits; iomap->length = (u64)map.m_len << blkbits; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 77cdce1f17ce..84c54f15f1dd 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -185,7 +185,7 @@ static int kmmpd(void *data) goto exit_thread; } - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { ext4_warning(sb, "kmmpd being stopped since filesystem " "has been remounted as readonly."); goto exit_thread; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 71b9a667e1bc..b104096fce9e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -405,7 +405,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) static void ext4_handle_error(struct super_block *sb) { - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; if (!test_opt(sb, ERRORS_CONT)) { @@ -587,8 +587,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, /* Special case: if the error is EROFS, and we're not already * inside a transaction, then there's really no point in logging * an error. */ - if (errno == -EROFS && journal_current_handle() == NULL && - (sb->s_flags & MS_RDONLY)) + if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb)) return; if (ext4_error_ratelimit(sb)) { @@ -628,7 +627,7 @@ void __ext4_abort(struct super_block *sb, const char *function, sb->s_id, function, line, &vaf); va_end(args); - if ((sb->s_flags & MS_RDONLY) == 0) { + if (sb_rdonly(sb) == 0) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; /* @@ -889,11 +888,11 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); - if (!(sb->s_flags & MS_RDONLY) && !aborted) { + if (!sb_rdonly(sb) && !aborted) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); for (i = 0; i < sbi->s_gdb_count; i++) @@ -2100,7 +2099,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset) struct super_block *sb = seq->private; int rc; - seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); + seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw"); rc = _ext4_show_options(seq, sb, 1); seq_puts(seq, "\n"); return rc; @@ -2368,7 +2367,7 @@ static int ext4_check_descriptors(struct super_block *sb, "Checksum for group %u failed (%u!=%u)", i, le16_to_cpu(ext4_group_desc_csum(sb, i, gdp)), le16_to_cpu(gdp->bg_checksum)); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { ext4_unlock_group(sb, i); return 0; } @@ -3136,8 +3135,7 @@ int ext4_register_li_request(struct super_block *sb, goto out; } - if (first_not_zeroed == ngroups || - (sb->s_flags & MS_RDONLY) || + if (first_not_zeroed == ngroups || sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE)) goto out; @@ -3683,7 +3681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem. */ - if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) + if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); @@ -3812,12 +3810,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_hash_unsigned = 3; else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { #ifdef __CHAR_UNSIGNED__ - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); sbi->s_hash_unsigned = 3; #else - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif @@ -4017,7 +4015,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) needs_recovery = (es->s_last_orphan != 0 || ext4_has_feature_journal_needs_recovery(sb)); - if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY)) + if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) goto failed_mount3a; @@ -4029,7 +4027,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = ext4_load_journal(sb, es, journal_devnum); if (err) goto failed_mount3a; - } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && + } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); @@ -4143,7 +4141,7 @@ no_journal: goto failed_mount_wq; } - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) && + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); ext4_commit_super(sb, 1); @@ -4197,7 +4195,7 @@ no_journal: goto failed_mount4; } - if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) + if (ext4_setup_super(sb, es, sb_rdonly(sb))) sb->s_flags |= MS_RDONLY; /* determine the minimum size of new large inodes, if present */ @@ -4285,7 +4283,7 @@ no_journal: #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ - if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) { err = ext4_enable_quotas(sb); if (err) goto failed_mount8; @@ -4609,7 +4607,7 @@ static int ext4_load_journal(struct super_block *sb, * can get read-write access to the device. */ if (ext4_has_feature_journal_needs_recovery(sb)) { - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { ext4_msg(sb, KERN_INFO, "INFO: recovery " "required on readonly filesystem"); if (really_read_only) { @@ -4764,8 +4762,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - if (ext4_has_feature_journal_needs_recovery(sb) && - sb->s_flags & MS_RDONLY) { + if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } @@ -4821,7 +4818,7 @@ int ext4_force_commit(struct super_block *sb) { journal_t *journal; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; journal = EXT4_SB(sb)->s_journal; @@ -4886,7 +4883,7 @@ static int ext4_freeze(struct super_block *sb) int error = 0; journal_t *journal; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; journal = EXT4_SB(sb)->s_journal; @@ -4921,7 +4918,7 @@ out: */ static int ext4_unfreeze(struct super_block *sb) { - if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb))) + if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb))) return 0; if (EXT4_SB(sb)->s_journal) { @@ -5059,7 +5056,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (*flags & MS_LAZYTIME) sb->s_flags |= MS_LAZYTIME; - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { + if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; @@ -5154,7 +5151,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * Reinitialize lazy itable initialization thread based on * current settings */ - if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) + if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE)) ext4_unregister_li_request(sb); else { ext4_group_t first_not_zeroed; @@ -5731,7 +5728,7 @@ static inline int ext2_feature_set_ok(struct super_block *sb) { if (ext4_has_unknown_ext2_incompat_features(sb)) return 0; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 1; if (ext4_has_unknown_ext2_ro_compat_features(sb)) return 0; @@ -5762,7 +5759,7 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 0; if (!ext4_has_feature_journal(sb)) return 0; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 1; if (ext4_has_unknown_ext3_ro_compat_features(sb)) return 0; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 1d9a8c4e9de0..48b2336692f9 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -309,7 +309,7 @@ static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - if (sb->s_flags & MS_RDONLY || sbi->fat_bits != 32) + if (sb_rdonly(sb) || sbi->fat_bits != 32) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a2c05f2ada6d..30c52394a7ad 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -657,7 +657,7 @@ static void fat_set_state(struct super_block *sb, struct msdos_sb_info *sbi = MSDOS_SB(sb); /* do not change any thing if mounted read only */ - if ((sb->s_flags & MS_RDONLY) && !force) + if (sb_rdonly(sb) && !force) return; /* do not change state if fs was dirty */ @@ -787,7 +787,7 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) /* make sure we update state on remount. */ new_rdonly = *flags & MS_RDONLY; - if (new_rdonly != (sb->s_flags & MS_RDONLY)) { + if (new_rdonly != sb_rdonly(sb)) { if (new_rdonly) fat_set_state(sb, 0, 0); else diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 8a8698119ff7..acc3aa30ee54 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -32,7 +32,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) if (opts->errors == FAT_ERRORS_PANIC) panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); - else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { + else if (opts->errors == FAT_ERRORS_RO && !sb_rdonly(sb)) { sb->s_flags |= MS_RDONLY; fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); } diff --git a/fs/fcntl.c b/fs/fcntl.c index 0491da3b28c3..448a1119f0be 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p, * specific si_codes. In that case use SI_SIGIO instead * to remove the ambiguity. */ - if (sig_specific_sicodes(signum)) + if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) si.si_code = SI_SIGIO; /* Make sure we are called with one of the POLL_* diff --git a/fs/file_table.c b/fs/file_table.c index 72e861a35a7f..61517f57f8ef 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -233,12 +233,10 @@ static LLIST_HEAD(delayed_fput_list); static void delayed_fput(struct work_struct *unused) { struct llist_node *node = llist_del_all(&delayed_fput_list); - struct llist_node *next; + struct file *f, *t; - for (; node; node = next) { - next = llist_next(node); - __fput(llist_entry(node, struct file, f_u.fu_llist)); - } + llist_for_each_entry_safe(f, t, node, f_u.fu_llist) + __fput(f); } static void ____fput(struct callback_head *work) @@ -312,7 +310,7 @@ void put_filp(struct file *file) } void __init files_init(void) -{ +{ filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); percpu_counter_init(&nr_files, 0, GFP_KERNEL); @@ -331,4 +329,4 @@ void __init files_maxfiles_init(void) n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); -} +} diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 67f940892ef8..b5ab06fabc60 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -262,7 +262,8 @@ static int fscache_objlist_show(struct seq_file *m, void *v) type = "DT"; break; default: - sprintf(_type, "%02u", cookie->def->type); + snprintf(_type, sizeof(_type), "%02u", + cookie->def->type); type = _type; break; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index dac6559e2195..cdd1c5f06f45 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -554,7 +554,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) struct gfs2_inode *ip = gl->gl_object; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) + if (!remote || sb_rdonly(sdp->sd_vfs)) return; if (gl->gl_demote_state == LM_ST_UNLOCKED && diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 84593587691d..a3711f543405 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1037,7 +1037,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp) char ro[20]; char spectator[20]; char *envp[] = { ro, spectator, NULL }; - sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); + sprintf(ro, "RDONLY=%d", sb_rdonly(sb)); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp); } @@ -1179,7 +1179,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_per_node; } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { error = gfs2_make_fs_rw(sdp); if (error) { fs_err(sdp, "can't make FS RW: %d\n", error); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e647938432bd..e700fb162664 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -452,7 +452,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) *qdp = NULL; - if (sdp->sd_vfs->s_flags & MS_RDONLY) + if (sb_rdonly(sdp->sd_vfs)) return 0; spin_lock(&qd_lock); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 113b6095a58d..9395a3db1a60 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -522,7 +522,7 @@ void gfs2_recover_func(struct work_struct *work) if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { - if (sdp->sd_vfs->s_flags & MS_RDONLY) { + if (sb_rdonly(sdp->sd_vfs)) { /* check if device itself is read-only */ ro = bdev_read_only(sdp->sd_vfs->s_bdev); if (!ro) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 769841185ce5..8e54f2e3a304 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -893,7 +893,7 @@ restart: } spin_unlock(&sdp->sd_jindex_spin); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { error = gfs2_make_fs_ro(sdp); if (error) gfs2_io_error(sdp); @@ -1569,7 +1569,7 @@ static void gfs2_evict_inode(struct inode *inode) return; } - if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) + if (inode->i_nlink || sb_rdonly(sb)) goto out; if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) { diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ca1f97ff898c..9eb9d0a1abd9 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -645,7 +645,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) char *envp[] = { ro, spectator, NULL }; int sysfs_frees_sdp = 0; - sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); + sprintf(ro, "RDONLY=%d", sb_rdonly(sb)); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); sdp->sd_kobj.kset = gfs2_kset; diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 482081bcdf70..894994d2c885 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -210,7 +210,7 @@ int hfs_mdb_get(struct super_block *sb) pr_warn("filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { /* Mark the volume uncleanly unmounted in case we crash */ attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); @@ -259,7 +259,7 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; lock_buffer(HFS_SB(sb)->mdb_bh); @@ -334,7 +334,7 @@ void hfs_mdb_commit(struct super_block *sb) void hfs_mdb_close(struct super_block *sb) { /* update volume attributes */ - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; HFS_SB(sb)->mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); HFS_SB(sb)->mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index bf6304a350a6..7e0d65e9586c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -71,7 +71,7 @@ void hfs_mark_mdb_dirty(struct super_block *sb) struct hfs_sb_info *sbi = HFS_SB(sb); unsigned long delay; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; spin_lock(&sbi->work_lock); @@ -115,7 +115,7 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); *flags |= MS_NODIRATIME; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) return 0; if (!(*flags & MS_RDONLY)) { if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 67aedf4c2e7c..e5bb2de2262a 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -264,7 +264,7 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb) struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); unsigned long delay; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; spin_lock(&sbi->work_lock); @@ -284,7 +284,7 @@ static void hfsplus_put_super(struct super_block *sb) cancel_delayed_work_sync(&sbi->sync_work); - if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { + if (!sb_rdonly(sb) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; vhdr->modify_date = hfsp_now2mt(); @@ -329,7 +329,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfsplus_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) return 0; if (!(*flags & MS_RDONLY)) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; @@ -462,7 +462,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) pr_warn("Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && - !(sb->s_flags & MS_RDONLY)) { + !sb_rdonly(sb)) { pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } @@ -535,7 +535,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) } else hfs_find_exit(&fd); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { /* * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused * all three are registered with Apple for our use diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index d6a4b55d2ab0..098bf0f4f386 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -538,7 +538,7 @@ int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned return 0; if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) { hpfs_lock(s); - if (s->s_flags & MS_RDONLY) { + if (sb_rdonly(s)) { err = -EROFS; goto unlock_1; } @@ -559,7 +559,7 @@ unlock_1: end_bmp = (end + 0x3fff) >> 14; while (start_bmp < end_bmp && !err) { hpfs_lock(s); - if (s->s_flags & MS_RDONLY) { + if (sb_rdonly(s)) { err = -EROFS; goto unlock_2; } diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 7b9150c2e75c..fa6bbb4f509f 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -264,7 +264,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in hpfs_result = hpfs_i(result); if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino; - if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) { + if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) { hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); goto bail1; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 82067ca22f2b..1516fb4e28f4 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -21,7 +21,7 @@ static void mark_dirty(struct super_block *s, int remount) { - if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) { + if (hpfs_sb(s)->sb_chkdsk && (remount || !sb_rdonly(s))) { struct buffer_head *bh; struct hpfs_spare_block *sb; if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { @@ -41,7 +41,7 @@ static void unmark_dirty(struct super_block *s) { struct buffer_head *bh; struct hpfs_spare_block *sb; - if (s->s_flags & MS_RDONLY) return; + if (sb_rdonly(s)) return; sync_blockdev(s->s_bdev); if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error; @@ -73,14 +73,14 @@ void hpfs_error(struct super_block *s, const char *fmt, ...) mark_dirty(s, 0); panic("HPFS panic"); } else if (hpfs_sb(s)->sb_err == 1) { - if (s->s_flags & MS_RDONLY) + if (sb_rdonly(s)) pr_cont("; already mounted read-only\n"); else { pr_cont("; remounting read-only\n"); mark_dirty(s, 0); s->s_flags |= MS_RDONLY; } - } else if (s->s_flags & MS_RDONLY) + } else if (sb_rdonly(s)) pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); else pr_cont("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n"); @@ -607,8 +607,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) } /* Check version */ - if (!(s->s_flags & MS_RDONLY) && - superblock->funcversion != 2 && superblock->funcversion != 3) { + if (!sb_rdonly(s) && superblock->funcversion != 2 && superblock->funcversion != 3) { pr_err("Bad version %d,%d. Mount readonly to go around\n", (int)superblock->version, (int)superblock->funcversion); pr_err("please try recent version of HPFS driver at http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi and if it still can't understand this format, contact author - mikulas@artax.karlin.mff.cuni.cz\n"); @@ -666,7 +665,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) hpfs_error(s, "improperly stopped"); } - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { spareblock->dirty = 1; spareblock->old_wrote = 0; mark_buffer_dirty(bh2); diff --git a/fs/iomap.c b/fs/iomap.c index 269b24a01f32..379eb9896093 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, struct iomap *iomap) { - sector_t sector = iomap->blkno + - (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); + sector_t sector = (iomap->addr + + (pos & PAGE_MASK) - iomap->offset) >> 9; return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector, offset, bytes); @@ -510,11 +510,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, flags |= FIEMAP_EXTENT_MERGED; if (iomap->flags & IOMAP_F_SHARED) flags |= FIEMAP_EXTENT_SHARED; + if (iomap->flags & IOMAP_F_DATA_INLINE) + flags |= FIEMAP_EXTENT_DATA_INLINE; return fiemap_fill_next_extent(fi, iomap->offset, - iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, + iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0, iomap->length, flags); - } static loff_t @@ -713,8 +714,24 @@ struct iomap_dio { static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + ret = invalidate_inode_pages2_range(inode->i_mapping, + iocb->ki_pos >> PAGE_SHIFT, + (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(ret); + } + if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -807,7 +824,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, bio = bio_alloc(GFP_KERNEL, 1); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = - iomap->blkno + ((pos - iomap->offset) >> 9); + (iomap->addr + pos - iomap->offset) >> 9; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -886,7 +903,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = - iomap->blkno + ((pos - iomap->offset) >> 9); + (iomap->addr + pos - iomap->offset) >> 9; bio->bi_write_hint = dio->iocb->ki_hint; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -993,6 +1010,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, WARN_ON_ONCE(ret); ret = 0; + if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && + !inode->i_sb->s_dio_done_wq) { + ret = sb_init_dio_done_wq(inode->i_sb); + if (ret < 0) + goto out_free_dio; + } + inode_dio_begin(inode); blk_start_plug(&plug); @@ -1015,13 +1039,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret < 0) iomap_dio_set_error(dio, ret); - if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && - !inode->i_sb->s_dio_done_wq) { - ret = sb_init_dio_done_wq(inode->i_sb); - if (ret < 0) - iomap_dio_set_error(dio, ret); - } - if (!atomic_dec_and_test(&dio->ref)) { if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; @@ -1042,19 +1059,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ret = iomap_dio_complete(dio); - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (iov_iter_rw(iter) == WRITE) { - int err = invalidate_inode_pages2_range(mapping, - start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(err); - } - return ret; out_free_dio: diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index f1ed935322db..447a24d77b89 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) if (sbi->s_fmode != ISOFS_INVALID_MODE) seq_printf(m, ",fmode=%o", sbi->s_fmode); +#ifdef CONFIG_JOLIET if (sbi->s_nls_iocharset && strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); +#endif return 0; } @@ -737,7 +739,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) root_found: /* We don't support read-write mounts */ - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { error = -EACCES; goto out_freebh; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 76fa814df3d1..e96c6b05e43e 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -395,14 +395,14 @@ int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (c->flags & JFFS2_SB_FLAG_RO && !(sb->s_flags & MS_RDONLY)) + if (c->flags & JFFS2_SB_FLAG_RO && !sb_rdonly(sb)) return -EROFS; /* We stop if it was running, then restart if it needs to. This also catches the case where it was stopped and this is just a remount to restart it. Flush the writebuffer, if neccecary, else we loose it */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { jffs2_stop_garbage_collect_thread(c); mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); @@ -590,7 +590,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = JFFS2_SUPER_MAGIC; - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) jffs2_start_garbage_collect_thread(c); return 0; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 5ef21f4c4c77..153f1c6eb169 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 48d9522e209c..2cfe487708e0 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1162,7 +1162,7 @@ static void delayed_wbuf_sync(struct work_struct *work) struct jffs2_sb_info *c = work_to_sb(work); struct super_block *sb = OFNI_BS_2SFFJ(c); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); } @@ -1173,7 +1173,7 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c) struct super_block *sb = OFNI_BS_2SFFJ(c); unsigned long delay; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; delay = msecs_to_jiffies(dirty_writeback_interval * 10); diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 9895595fd2f2..d8658607bf46 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -362,7 +362,7 @@ static int chkSuper(struct super_block *sb) /* validate fs state */ if (j_sb->s_state != cpu_to_le32(FM_CLEAN) && - !(sb->s_flags & MS_RDONLY)) { + !sb_rdonly(sb)) { jfs_err("jfs_mount: Mount Failure: File System Dirty."); rc = -EINVAL; goto out; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 60726ae7cf26..2f14677169c3 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -76,7 +76,7 @@ static void jfs_handle_error(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; updateSuper(sb, FM_DIRTY); @@ -468,7 +468,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; if (newLVSize) { - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { pr_err("JFS: resize requires volume to be mounted read-write\n"); return -EROFS; } @@ -477,7 +477,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return rc; } - if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { /* * Invalidate any previously read metadata. fsck may have * changed the on-disk data since we mounted r/o @@ -493,7 +493,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) dquot_resume(sb, -1); return ret; } - if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { + if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { rc = dquot_suspend(sb, -1); if (rc < 0) return rc; @@ -502,7 +502,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return rc; } if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { rc = jfs_umount_rw(sb); if (rc) return rc; @@ -592,7 +592,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) jfs_err("jfs_mount failed w/return code = %d", rc); goto out_mount_failed; } - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) sbi->log = NULL; else { rc = jfs_mount_rw(sb, 0); @@ -652,7 +652,7 @@ static int jfs_freeze(struct super_block *sb) struct jfs_log *log = sbi->log; int rc = 0; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { txQuiesce(sb); rc = lmLogShutdown(log); if (rc) { @@ -682,7 +682,7 @@ static int jfs_unfreeze(struct super_block *sb) struct jfs_log *log = sbi->log; int rc = 0; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { rc = updateSuper(sb, FM_MOUNT); if (rc) { jfs_error(sb, "updateSuper failed\n"); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 6ac76b0434e9..b6829d679643 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -42,7 +42,7 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; mark_buffer_dirty(sbi->s_sbh); @@ -125,7 +125,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) sync_filesystem(sb); ms = sbi->s_ms; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) return 0; if (*flags & MS_RDONLY) { if (ms->s_state & MINIX_VALID_FS || @@ -293,7 +293,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) if (!s->s_root) goto out_no_root; - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ ms->s_state &= ~MINIX_VALID_FS; mark_buffer_dirty(bh); diff --git a/fs/namei.c b/fs/namei.c index 1180f9c58093..c75ea03ca147 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -447,8 +447,7 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) umode_t mode = inode->i_mode; /* Nobody gets write access to a read-only fs. */ - if ((sb->s_flags & MS_RDONLY) && - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; } return 0; diff --git a/fs/namespace.c b/fs/namespace.c index df0f7521979a..54059b142d6b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -275,7 +275,7 @@ int __mnt_is_readonly(struct vfsmount *mnt) { if (mnt->mnt_flags & MNT_READONLY) return 1; - if (mnt->mnt_sb->s_flags & MS_RDONLY) + if (sb_rdonly(mnt->mnt_sb)) return 1; return 0; } @@ -1029,7 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) return ERR_PTR(-ENOMEM); - if (flags & MS_KERNMOUNT) + if (flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; root = mount_fs(type, flags, name, data); @@ -1061,7 +1061,7 @@ vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, if (mountpoint->d_sb->s_user_ns != &init_user_ns) return ERR_PTR(-EPERM); - return vfs_kern_mount(type, MS_SUBMOUNT, name, data); + return vfs_kern_mount(type, SB_SUBMOUNT, name, data); } EXPORT_SYMBOL_GPL(vfs_submount); @@ -1182,12 +1182,10 @@ static LLIST_HEAD(delayed_mntput_list); static void delayed_mntput(struct work_struct *unused) { struct llist_node *node = llist_del_all(&delayed_mntput_list); - struct llist_node *next; + struct mount *m, *t; - for (; node; node = next) { - next = llist_next(node); - cleanup_mnt(llist_entry(node, struct mount, mnt_llist)); - } + llist_for_each_entry_safe(m, t, node, mnt_llist) + cleanup_mnt(m); } static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); @@ -1592,8 +1590,8 @@ static int do_umount(struct mount *mnt, int flags) if (!capable(CAP_SYS_ADMIN)) return -EPERM; down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) - retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); + if (!sb_rdonly(sb)) + retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); up_write(&sb->s_umount); return retval; } @@ -2117,7 +2115,7 @@ static void unlock_mount(struct mountpoint *where) static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) { - if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) + if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) return -EINVAL; if (d_is_dir(mp->m_dentry) != @@ -2131,9 +2129,9 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) * Sanity check the flags to change_mnt_propagation. */ -static int flags_to_propagation_type(int flags) +static int flags_to_propagation_type(int ms_flags) { - int type = flags & ~(MS_REC | MS_SILENT); + int type = ms_flags & ~(MS_REC | MS_SILENT); /* Fail if any non-propagation flags are set */ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) @@ -2147,18 +2145,18 @@ static int flags_to_propagation_type(int flags) /* * recursively change the type of the mountpoint. */ -static int do_change_type(struct path *path, int flag) +static int do_change_type(struct path *path, int ms_flags) { struct mount *m; struct mount *mnt = real_mount(path->mnt); - int recurse = flag & MS_REC; + int recurse = ms_flags & MS_REC; int type; int err = 0; if (path->dentry != path->mnt->mnt_root) return -EINVAL; - type = flags_to_propagation_type(flag); + type = flags_to_propagation_type(ms_flags); if (!type) return -EINVAL; @@ -2280,8 +2278,8 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) * If you've mounted a non-root directory somewhere and want to do remount * on it - tough luck. */ -static int do_remount(struct path *path, int flags, int mnt_flags, - void *data) +static int do_remount(struct path *path, int ms_flags, int sb_flags, + int mnt_flags, void *data) { int err; struct super_block *sb = path->mnt->mnt_sb; @@ -2325,12 +2323,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags, return err; down_write(&sb->s_umount); - if (flags & MS_BIND) - err = change_mount_flags(path->mnt, flags); + if (ms_flags & MS_BIND) + err = change_mount_flags(path->mnt, ms_flags); else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; else - err = do_remount_sb(sb, flags, data, 0); + err = do_remount_sb(sb, sb_flags, data, 0); if (!err) { lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; @@ -2495,7 +2493,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); * create a new mount for userspace and request it to be added into the * namespace's tree */ -static int do_new_mount(struct path *path, const char *fstype, int flags, +static int do_new_mount(struct path *path, const char *fstype, int sb_flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; @@ -2509,7 +2507,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, if (!type) return -ENODEV; - mnt = vfs_kern_mount(type, flags, name, data); + mnt = vfs_kern_mount(type, sb_flags, name, data); if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); @@ -2764,8 +2762,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; + unsigned int mnt_flags = 0, sb_flags; int retval = 0; - int mnt_flags = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -2775,6 +2773,9 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + if (flags & MS_NOUSER) + return -EINVAL; + /* ... and get the mountpoint */ retval = user_path(dir_name, &path); if (retval) @@ -2784,7 +2785,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, type_page, flags, data_page); if (!retval && !may_mount()) retval = -EPERM; - if (!retval && (flags & MS_MANDLOCK) && !may_mandlock()) + if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) retval = -EPERM; if (retval) goto dput_out; @@ -2806,7 +2807,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); - if (flags & MS_RDONLY) + if (flags & SB_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ @@ -2817,12 +2818,15 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; } - flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | - MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | - MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); + sb_flags = flags & (SB_RDONLY | + SB_SYNCHRONOUS | + SB_MANDLOCK | + SB_DIRSYNC | + SB_SILENT | + SB_POSIXACL); if (flags & MS_REMOUNT) - retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, + retval = do_remount(&path, flags, sb_flags, mnt_flags, data_page); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); @@ -2831,7 +2835,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else - retval = do_new_mount(&path, type_page, flags, mnt_flags, + retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, dev_name, data_page); dput_out: path_put(&path); @@ -3281,7 +3285,7 @@ void put_mnt_ns(struct mnt_namespace *ns) struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) { struct vfsmount *mnt; - mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); if (!IS_ERR(mnt)) { /* * it is a longterm mount, don't release mnt until @@ -3358,7 +3362,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, mnt_flags = mnt->mnt.mnt_flags; /* Don't miss readonly hidden in the superblock flags */ - if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + if (sb_rdonly(mnt->mnt.mnt_sb)) mnt_flags |= MNT_LOCK_READONLY; /* Verify the mount flags are equal to or more permissive diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a385d1c3f146..0214dd1e1060 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -208,21 +208,19 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); * fall back to doing a synchronous write. */ static int -nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) +nfs_file_fsync_commit(struct file *file, int datasync) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); - int have_error, do_resend, status; + int do_resend, status; int ret = 0; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); - have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) { + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { ret = xchg(&ctx->error, 0); if (ret) goto out; @@ -247,10 +245,16 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); do { + struct nfs_open_context *ctx = nfs_file_open_context(file); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { + int ret2 = xchg(&ctx->error, 0); + if (ret2) + ret = ret2; + } if (ret != 0) break; - ret = nfs_file_fsync_commit(file, start, end, datasync); + ret = nfs_file_fsync_commit(file, datasync); if (!ret) ret = pnfs_sync_inode(inode, !!datasync); /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 68cc22083639..5bdf952f414b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -768,3 +768,10 @@ static inline bool nfs_error_is_fatal(int err) return false; } } + +static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 2ca9167bc97d..551711042ba4 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -719,6 +719,254 @@ TRACE_EVENT(nfs_sillyrename_unlink, __get_str(name) ) ); + +TRACE_EVENT(nfs_initiate_read, + TP_PROTO( + const struct inode *inode, + loff_t offset, unsigned long count + ), + + TP_ARGS(inode, offset, count), + + TP_STRUCT__entry( + __field(loff_t, offset) + __field(unsigned long, count) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->offset = offset; + __entry->count = count; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count + ) +); + +TRACE_EVENT(nfs_readpage_done, + TP_PROTO( + const struct inode *inode, + int status, loff_t offset, bool eof + ), + + TP_ARGS(inode, status, offset, eof), + + TP_STRUCT__entry( + __field(int, status) + __field(loff_t, offset) + __field(bool, eof) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->status = status; + __entry->offset = offset; + __entry->eof = eof; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld status=%d%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->status, + __entry->eof ? " eof" : "" + ) +); + +/* + * XXX: I tried using NFS_UNSTABLE and friends in this table, but they + * all evaluate to 0 for some reason, even if I include linux/nfs.h. + */ +#define nfs_show_stable(stable) \ + __print_symbolic(stable, \ + { 0, " (UNSTABLE)" }, \ + { 1, " (DATA_SYNC)" }, \ + { 2, " (FILE_SYNC)" }) + +TRACE_EVENT(nfs_initiate_write, + TP_PROTO( + const struct inode *inode, + loff_t offset, unsigned long count, + enum nfs3_stable_how stable + ), + + TP_ARGS(inode, offset, count, stable), + + TP_STRUCT__entry( + __field(loff_t, offset) + __field(unsigned long, count) + __field(enum nfs3_stable_how, stable) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->offset = offset; + __entry->count = count; + __entry->stable = stable; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%lu stable=%d%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __entry->stable, nfs_show_stable(__entry->stable) + ) +); + +TRACE_EVENT(nfs_writeback_done, + TP_PROTO( + const struct inode *inode, + int status, + loff_t offset, + struct nfs_writeverf *writeverf + ), + + TP_ARGS(inode, status, offset, writeverf), + + TP_STRUCT__entry( + __field(int, status) + __field(loff_t, offset) + __field(enum nfs3_stable_how, stable) + __field(unsigned long long, verifier) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->status = status; + __entry->offset = offset; + __entry->stable = writeverf->committed; + memcpy(&__entry->verifier, &writeverf->verifier, + sizeof(__entry->verifier)); + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld status=%d stable=%d%s " + "verifier 0x%016llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->status, + __entry->stable, nfs_show_stable(__entry->stable), + __entry->verifier + ) +); + +TRACE_EVENT(nfs_initiate_commit, + TP_PROTO( + const struct nfs_commit_data *data + ), + + TP_ARGS(data), + + TP_STRUCT__entry( + __field(loff_t, offset) + __field(unsigned long, count) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count + ) +); + +TRACE_EVENT(nfs_commit_done, + TP_PROTO( + const struct nfs_commit_data *data + ), + + TP_ARGS(data), + + TP_STRUCT__entry( + __field(int, status) + __field(loff_t, offset) + __field(unsigned long long, verifier) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->status = data->res.op_status; + __entry->offset = data->args.offset; + memcpy(&__entry->verifier, &data->verf.verifier, + sizeof(__entry->verifier)); + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld status=%d verifier 0x%016llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->status, + __entry->verifier + ) +); + #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index bec120ec1967..d0543e19098a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1170,8 +1170,8 @@ out_failed: /* remember fatal errors */ if (nfs_error_is_fatal(desc->pg_error)) - mapping_set_error(desc->pg_inode->i_mapping, - desc->pg_error); + nfs_context_set_write_error(req->wb_context, + desc->pg_error); func = desc->pg_completion_ops->error_cleanup; for (midx = 0; midx < desc->pg_mirror_count; midx++) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7879ed8ceb76..3bcd669a3152 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1664,7 +1664,7 @@ pnfs_update_layout(struct inode *ino, .offset = pos, .length = count, }; - unsigned pg_offset, seq; + unsigned pg_offset; struct nfs_server *server = NFS_SERVER(ino); struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo = NULL; @@ -1754,10 +1754,14 @@ lookup_again: } first = true; - do { - seq = read_seqbegin(&ctx->state->seqlock); - nfs4_stateid_copy(&stateid, &ctx->state->stateid); - } while (read_seqretry(&ctx->state->seqlock, seq)); + if (nfs4_select_rw_stateid(ctx->state, + iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, + NULL, &stateid, NULL) != 0) { + trace_pnfs_update_layout(ino, pos, count, + iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_INVALID_OPEN); + goto out_unlock; + } } else { nfs4_stateid_copy(&stateid, &lo->plh_stateid); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0d42573d423d..48d7277c60a9 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -25,6 +25,7 @@ #include "iostat.h" #include "fscache.h" #include "pnfs.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -200,6 +201,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, task_setup_data->flags |= swap_flags; rpc_ops->read_setup(hdr, msg); + trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes); } static void @@ -232,6 +234,8 @@ static int nfs_readpage_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); + trace_nfs_readpage_done(inode, task->tk_status, + hdr->args.offset, hdr->res.eof); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6b179af59b92..c9d24bae3025 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -812,7 +812,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) * Display all mount option settings */ seq_printf(m, "\n\topts:\t"); - seq_puts(m, root->d_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : ""); seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f68083db63c8..babebbccae2a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -145,13 +145,6 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc) kref_put(&ioc->refcount, nfs_io_completion_release); } -static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) -{ - ctx->error = error; - smp_wmb(); - set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); -} - static struct nfs_page * nfs_page_private_request(struct page *page) { @@ -1383,6 +1376,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg); + trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, + hdr->args.stable); nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, &task_setup_data->rpc_client, msg, hdr); @@ -1540,7 +1535,10 @@ static int nfs_writeback_done(struct rpc_task *task, status = NFS_PROTO(inode)->write_done(task, hdr); if (status != 0) return status; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); + trace_nfs_writeback_done(inode, task->tk_status, + hdr->args.offset, hdr->res.verf); if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { @@ -1669,6 +1667,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, }; /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg); + trace_nfs_initiate_commit(data); dprintk("NFS: initiated commit call\n"); @@ -1793,6 +1792,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); + trace_nfs_commit_done(data); } static void nfs_commit_release_pages(struct nfs_commit_data *data) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index c862c2489df0..2d1d37b27dc7 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -65,7 +65,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, bex->es = PNFS_BLOCK_READ_DATA; else bex->es = PNFS_BLOCK_READWRITE_DATA; - bex->soff = (iomap.blkno << 9); + bex->soff = iomap.addr; break; case IOMAP_UNWRITTEN: if (seg->iomode & IOMODE_RW) { @@ -78,7 +78,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, } bex->es = PNFS_BLOCK_INVALID_DATA; - bex->soff = (iomap.blkno << 9); + bex->soff = iomap.addr; break; } /*FALLTHRU*/ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7ffe71a8dfb9..6a612d832e7d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -174,7 +174,7 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; - if (inode->i_sb->s_flags & MS_RDONLY) { + if (sb_rdonly(inode->i_sb)) { nilfs_clear_dirty_pages(mapping, false); return -EROFS; } @@ -191,7 +191,7 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; - if (inode->i_sb->s_flags & MS_RDONLY) { + if (sb_rdonly(inode->i_sb)) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 98835ed6bef4..c6bc1033e7d2 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -413,7 +413,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) struct super_block *sb; int err = 0; - if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + if (inode && sb_rdonly(inode->i_sb)) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 926682981d61..4fc018dfcfae 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -136,7 +136,7 @@ void __nilfs_error(struct super_block *sb, const char *function, va_end(args); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { nilfs_set_error(sb); if (nilfs_test_opt(nilfs, ERRORS_RO)) { @@ -478,7 +478,7 @@ static void nilfs_put_super(struct super_block *sb) nilfs_detach_log_writer(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); @@ -578,7 +578,7 @@ static int nilfs_freeze(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; int err; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; /* Mark super block clean */ @@ -592,7 +592,7 @@ static int nilfs_unfreeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; down_write(&nilfs->ns_sem); @@ -898,7 +898,7 @@ int nilfs_check_feature_compatibility(struct super_block *sb, } features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; - if (!(sb->s_flags & MS_RDONLY) && features) { + if (!sb_rdonly(sb) && features) { nilfs_msg(sb, KERN_ERR, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); @@ -1083,7 +1083,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_unload; } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; @@ -1095,7 +1095,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); @@ -1144,7 +1144,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & MS_RDONLY) { /* Shutting down log writer */ @@ -1338,8 +1338,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, if ((flags ^ s->s_flags) & MS_RDONLY) { nilfs_msg(s, KERN_ERR, "the device already has a %s mount.", - (s->s_flags & MS_RDONLY) ? - "read-only" : "read/write"); + sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; goto failed_super; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index ecb49870a680..3f70f041dbe9 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -487,7 +487,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * When remounting read-only, mark the volume clean if no volume errors * have occurred. */ - if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { static const char *es = ". Cannot remount read-write."; /* Remounting read-write. */ @@ -548,7 +548,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) NVolSetErrors(vol); return -EROFS; } - } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { + } else if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { /* Remounting read-only. */ if (!NVolErrors(vol)) { if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) @@ -732,7 +732,7 @@ hotfix_primary_boot_sector: * on a large sector device contains the whole boot loader or * just the first 512 bytes). */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { ntfs_warning(sb, "Hot-fix: Recovering invalid primary " "boot sector from backup copy."); memcpy(bh_primary->b_data, bh_backup->b_data, @@ -1789,7 +1789,7 @@ static bool load_system_files(ntfs_volume *vol) static const char *es3 = ". Run ntfsfix and/or chkdsk."; /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -1928,7 +1928,7 @@ get_ctx_vol_failed: (unsigned)le16_to_cpu(vol->vol_flags)); } /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -1961,7 +1961,7 @@ get_ctx_vol_failed: es1 = !vol->logfile_ino ? es1a : es1b; /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -2010,7 +2010,7 @@ get_ctx_vol_failed: es1 = err < 0 ? es1a : es1b; /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -2028,8 +2028,7 @@ get_ctx_vol_failed: NVolSetErrors(vol); } /* If (still) a read-write mount, mark the volume dirty. */ - if (!(sb->s_flags & MS_RDONLY) && - ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { + if (!sb_rdonly(sb) && ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { static const char *es1 = "Failed to set dirty bit in volume " "information flags"; static const char *es2 = ". Run chkdsk."; @@ -2075,8 +2074,7 @@ get_ctx_vol_failed: } #endif /* If (still) a read-write mount, empty the logfile. */ - if (!(sb->s_flags & MS_RDONLY) && - !ntfs_empty_logfile(vol->logfile_ino)) { + if (!sb_rdonly(sb) && !ntfs_empty_logfile(vol->logfile_ino)) { static const char *es1 = "Failed to empty $LogFile"; static const char *es2 = ". Mount in Windows."; @@ -2121,7 +2119,7 @@ get_ctx_vol_failed: static const char *es2 = ". Run chkdsk."; /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -2139,8 +2137,7 @@ get_ctx_vol_failed: NVolSetErrors(vol); } /* If (still) a read-write mount, mark the quotas out of date. */ - if (!(sb->s_flags & MS_RDONLY) && - !ntfs_mark_quotas_out_of_date(vol)) { + if (!sb_rdonly(sb) && !ntfs_mark_quotas_out_of_date(vol)) { static const char *es1 = "Failed to mark quotas out of date"; static const char *es2 = ". Run chkdsk."; @@ -2165,7 +2162,7 @@ get_ctx_vol_failed: static const char *es2 = ". Run chkdsk."; /* If a read-write mount, convert it to a read-only mount. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | ON_ERRORS_CONTINUE))) { ntfs_error(sb, "%s and neither on_errors=" @@ -2183,7 +2180,7 @@ get_ctx_vol_failed: NVolSetErrors(vol); } /* If (still) a read-write mount, stamp the transaction log. */ - if (!(sb->s_flags & MS_RDONLY) && !ntfs_stamp_usnjrnl(vol)) { + if (!sb_rdonly(sb) && !ntfs_stamp_usnjrnl(vol)) { static const char *es1 = "Failed to stamp transaction log " "($UsnJrnl)"; static const char *es2 = ". Run chkdsk."; @@ -2314,7 +2311,7 @@ static void ntfs_put_super(struct super_block *sb) * If a read-write mount and no volume errors have occurred, mark the * volume clean. Also, re-commit all affected inodes. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!NVolErrors(vol)) { if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) ntfs_warning(sb, "Failed to clear dirty bit " diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3f936be379a9..80733496b22a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -675,7 +675,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } /* We're going to/from readonly mode. */ - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { + if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { /* Disable quota accounting before remounting RO */ if (*flags & MS_RDONLY) { ret = ocfs2_susp_quotas(osb, 0); @@ -1063,7 +1063,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { status = -EACCES; mlog(ML_ERROR, "Readonly device detected but readonly " "mount was not specified.\n"); @@ -1098,7 +1098,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } if (!ocfs2_is_hard_readonly(osb)) { - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) ocfs2_set_ro_flag(osb, 0); } @@ -1179,7 +1179,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) /* Now we can initialize quotas because we can afford to wait * for cluster locks recovery now. That also means that truncation * log recovery can happen but that waits for proper quota setup */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { status = ocfs2_enable_quotas(osb); if (status < 0) { /* We have to err-out specially here because @@ -2180,8 +2180,7 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } - if (!(osb->sb->s_flags & MS_RDONLY) && - (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { + if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { mlog(ML_ERROR, "couldn't mount RDWR because of " "unsupported optional features (%x).\n", i); status = -EINVAL; @@ -2567,9 +2566,7 @@ static int ocfs2_handle_error(struct super_block *sb) rv = -EIO; } else { /* default option */ rv = -EROFS; - if (sb->s_flags & MS_RDONLY && - (ocfs2_is_soft_readonly(osb) || - ocfs2_is_hard_readonly(osb))) + if (sb_rdonly(sb) && (ocfs2_is_soft_readonly(osb) || ocfs2_is_hard_readonly(osb))) return rv; pr_crit("OCFS2: File system is now read-only.\n"); diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 7a3754488312..9108ef433e6d 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -35,7 +35,7 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type) * I don't do that for now. */ value = kmalloc(ORANGEFS_MAX_XATTR_VALUELEN, GFP_KERNEL); - if (value == NULL) + if (!value) return ERR_PTR(-ENOMEM); gossip_debug(GOSSIP_ACL_DEBUG, @@ -61,9 +61,9 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type) return acl; } -int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, + int type) { - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int error = 0; void *value = NULL; size_t size = 0; @@ -72,22 +72,6 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - umode_t mode; - - error = posix_acl_update_mode(inode, &mode, &acl); - if (error) { - gossip_err("%s: posix_acl_update_mode err: %d\n", - __func__, - error); - return error; - } - - if (inode->i_mode != mode) - SetModeFlag(orangefs_inode); - inode->i_mode = mode; - mark_inode_dirty_sync(inode); - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -132,6 +116,42 @@ out: return error; } +int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + struct iattr iattr; + int rc; + + if (type == ACL_TYPE_ACCESS && acl) { + /* + * posix_acl_update_mode checks to see if the permissions + * described by the ACL can be encoded into the + * object's mode. If so, it sets "acl" to NULL + * and "mode" to the new desired value. It is up to + * us to propagate the new mode back to the server... + */ + error = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); + if (error) { + gossip_err("%s: posix_acl_update_mode err: %d\n", + __func__, + error); + return error; + } + + if (acl) { + rc = __orangefs_set_acl(inode, acl, type); + } else { + iattr.ia_valid = ATTR_MODE; + rc = orangefs_inode_setattr(inode, &iattr); + } + + return rc; + + } else { + return -EINVAL; + } +} + int orangefs_init_acl(struct inode *inode, struct inode *dir) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); @@ -146,13 +166,14 @@ int orangefs_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __orangefs_set_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index c19f0787c9c6..2826859bdc2c 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -461,13 +461,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) goto wakeup; - op->downcall.trailer_buf = - vmalloc(op->downcall.trailer_size); - if (op->downcall.trailer_buf == NULL) { - gossip_err("%s: failed trailer vmalloc.\n", - __func__); + op->downcall.trailer_buf = vmalloc(op->downcall.trailer_size); + if (!op->downcall.trailer_buf) goto Enomem; - } + memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); if (!copy_from_iter_full(op->downcall.trailer_buf, op->downcall.trailer_size, iter)) { diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 28f38d813ad2..336ecbf8c268 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -646,14 +646,11 @@ static int orangefs_fsync(struct file *file, loff_t end, int datasync) { - int ret = -EINVAL; + int ret; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(file_inode(file)); struct orangefs_kernel_op_s *new_op = NULL; - /* required call */ - filemap_write_and_wait_range(file->f_mapping, start, end); - new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); if (!new_op) return -ENOMEM; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 038d67545d9f..7ef473f3d642 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -244,20 +244,14 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) bufmap->buffer_index_array = kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); - if (!bufmap->buffer_index_array) { - gossip_err("orangefs: could not allocate %d buffer indices\n", - bufmap->desc_count); + if (!bufmap->buffer_index_array) goto out_free_bufmap; - } bufmap->desc_array = kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc), GFP_KERNEL); - if (!bufmap->desc_array) { - gossip_err("orangefs: could not allocate %d descriptors\n", - bufmap->desc_count); + if (!bufmap->desc_array) goto out_free_index_array; - } bufmap->page_count = bufmap->total_size / PAGE_SIZE; diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 716ed337f166..5f59917fd631 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -571,11 +571,8 @@ static int orangefs_prepare_cdm_array(char *debug_array_string) goto out; } - cdm_array = - kzalloc(cdm_element_count * sizeof(struct client_debug_mask), - GFP_KERNEL); + cdm_array = kcalloc(cdm_element_count, sizeof(*cdm_array), GFP_KERNEL); if (!cdm_array) { - pr_info("malloc failed for cdm_array!\n"); rc = -ENOMEM; goto out; } diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index c1b5174cb5a9..85ef87245a87 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -98,7 +98,6 @@ static int __init orangefs_init(void) orangefs_htable_ops_in_progress = kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL); if (!orangefs_htable_ops_in_progress) { - gossip_err("Failed to initialize op hashtable"); ret = -ENOMEM; goto cleanup_inode; } diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 5a1bed6c8c6a..47f3fb9cbec4 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -107,10 +107,8 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) struct orangefs_inode_s *orangefs_inode; orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL); - if (orangefs_inode == NULL) { - gossip_err("Failed to allocate orangefs_inode\n"); + if (!orangefs_inode) return NULL; - } /* * We want to clear everything except for rw_semaphore and the diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 237c9c04dc3b..81ac88bb91ff 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -76,7 +76,7 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) + if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; fsuid = from_kuid(&init_user_ns, current_fsuid()); @@ -169,7 +169,7 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name, struct orangefs_kernel_op_s *new_op = NULL; int ret = -ENOMEM; - if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) + if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; down_write(&orangefs_inode->xattr_sem); @@ -233,13 +233,13 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name, if (size > ORANGEFS_MAX_XATTR_VALUELEN) return -EINVAL; - if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) + if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; internal_flag = convert_to_internal_xattr_flags(flags); /* This is equivalent to a removexattr */ - if (size == 0 && value == NULL) { + if (size == 0 && !value) { gossip_debug(GOSSIP_XATTR_DEBUG, "removing xattr (%s)\n", name); @@ -311,7 +311,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) int i = 0; int returned_count = 0; - if (size > 0 && buffer == NULL) { + if (size > 0 && !buffer) { gossip_err("%s: bogus NULL pointers\n", __func__); return -EINVAL; } @@ -442,7 +442,7 @@ static int orangefs_xattr_get_default(const struct xattr_handler *handler, } -static struct xattr_handler orangefs_xattr_default_handler = { +static const struct xattr_handler orangefs_xattr_default_handler = { .prefix = "", /* match any name => handlers called with full name */ .get = orangefs_xattr_get_default, .set = orangefs_xattr_set_default, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index acb6f97deb97..aad97b30d5e6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -241,7 +241,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) int buflen = MAX_HANDLE_SZ; uuid_t *uuid = &lower->d_sb->s_uuid; - buf = kmalloc(buflen, GFP_TEMPORARY); + buf = kmalloc(buflen, GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 9cb0c80e5967..3309b1912241 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -833,7 +833,7 @@ static char *ovl_get_redirect(struct dentry *dentry, bool samedir) goto out; } - buf = ret = kmalloc(buflen, GFP_TEMPORARY); + buf = ret = kmalloc(buflen, GFP_KERNEL); if (!buf) goto out; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 8aef2b304b2d..c3addd1114f1 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -38,7 +38,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, return 0; goto fail; } - buf = kzalloc(prelen + res + strlen(post) + 1, GFP_TEMPORARY); + buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -103,7 +103,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res == 0) return NULL; - fh = kzalloc(res, GFP_TEMPORARY); + fh = kzalloc(res, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); @@ -309,7 +309,7 @@ static int ovl_check_origin(struct dentry *upperdentry, BUG_ON(*ctrp); if (!*stackp) - *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + *stackp = kmalloc(sizeof(struct path), GFP_KERNEL); if (!*stackp) { dput(origin); return -ENOMEM; @@ -418,7 +418,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, err = -ENOMEM; len = index->d_name.len / 2; - fh = kzalloc(len, GFP_TEMPORARY); + fh = kzalloc(len, GFP_KERNEL); if (!fh) goto fail; @@ -478,7 +478,7 @@ int ovl_get_index_name(struct dentry *origin, struct qstr *name) return PTR_ERR(fh); err = -ENOMEM; - n = kzalloc(fh->len * 2, GFP_TEMPORARY); + n = kzalloc(fh->len * 2, GFP_KERNEL); if (n) { s = bin2hex(n, fh, fh->len); *name = (struct qstr) QSTR_INIT(n, s - n); @@ -646,7 +646,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!d.stop && poe->numlower) { err = -ENOMEM; stack = kcalloc(ofs->numlower, sizeof(struct path), - GFP_TEMPORARY); + GFP_KERNEL); if (!stack) goto out_put_upper; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index cd49c0298ddf..fd5ea4facc62 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -870,7 +870,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_free_config; /* Upper fs should not be r/o */ - if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { + if (sb_rdonly(upperpath.mnt->mnt_sb)) { pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); err = -EINVAL; goto out_put_upperpath; diff --git a/fs/proc/array.c b/fs/proc/array.c index 88c355574aa0..77a8eacbe032 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -62,6 +62,7 @@ #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/sched/numa_balancing.h> +#include <linux/sched/task_stack.h> #include <linux/sched/task.h> #include <linux/sched/cputime.h> #include <linux/proc_fs.h> @@ -118,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) * simple bit tests. */ static const char * const task_state_array[] = { - "R (running)", /* 0 */ - "S (sleeping)", /* 1 */ - "D (disk sleep)", /* 2 */ - "T (stopped)", /* 4 */ - "t (tracing stop)", /* 8 */ - "X (dead)", /* 16 */ - "Z (zombie)", /* 32 */ + + /* states in TASK_REPORT: */ + "R (running)", /* 0x00 */ + "S (sleeping)", /* 0x01 */ + "D (disk sleep)", /* 0x02 */ + "T (stopped)", /* 0x04 */ + "t (tracing stop)", /* 0x08 */ + "X (dead)", /* 0x10 */ + "Z (zombie)", /* 0x20 */ + "P (parked)", /* 0x40 */ + + /* states beyond TASK_REPORT: */ + "I (idle)", /* 0x80 */ }; static inline const char *get_task_state(struct task_struct *tsk) { - unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; - - /* - * Parked tasks do not run; they sit in __kthread_parkme(). - * Without this check, we would report them as running, which is - * clearly wrong, so we report them as sleeping instead. - */ - if (tsk->state == TASK_PARKED) - state = TASK_INTERRUPTIBLE; - - BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); - - return task_state_array[fls(state)]; + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); + return task_state_array[__get_task_state(tsk)]; } static inline int get_task_umask(struct task_struct *tsk) @@ -421,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * esp and eip are intentionally zeroed out. There is no * non-racy way to read them without freezing the task. * Programs that need reliable values can use ptrace(2). + * + * The only exception is if the task is core dumping because + * a program is not able to use ptrace(2) in that case. It is + * safe because the task has stopped executing permanently. */ + if (permitted && (task->flags & PF_DUMPCORE)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } } get_task_comm(tcomm, task); diff --git a/fs/proc/base.c b/fs/proc/base.c index e5d89a0d0b8a..ad3b0762cc3e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -232,7 +232,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, goto out_mmput; } - page = (char *)__get_free_page(GFP_TEMPORARY); + page = (char *)__get_free_page(GFP_KERNEL); if (!page) { rv = -ENOMEM; goto out_mmput; @@ -813,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!mm) return 0; - page = (char *)__get_free_page(GFP_TEMPORARY); + page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; @@ -918,7 +918,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!mm || !mm->env_end) return 0; - page = (char *)__get_free_page(GFP_TEMPORARY); + page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; @@ -1630,7 +1630,7 @@ out: static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - char *tmp = (char*)__get_free_page(GFP_TEMPORARY); + char *tmp = (char *)__get_free_page(GFP_KERNEL); char *pathname; int len; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7b40e11ede9b..5589b4bd4b85 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1474,7 +1474,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_KERNEL); ret = -ENOMEM; if (!pm.buffer) goto out_mm; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index dea90b566a6e..b00b766098fa 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -145,7 +145,6 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { struct mm_struct *mm = vma->vm_mm; - struct proc_maps_private *priv = m->private; unsigned long ino = 0; struct file *file; dev_t dev = 0; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index b5713fefb4c1..99dff222fe67 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -178,7 +178,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) } else { mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } - seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); + seq_puts(m, sb_rdonly(sb) ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8381db9db6d9..50b0556a124f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, &warn_to[cnt]); if (ret) { + spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); + spin_unlock(&transfer_to[cnt]->dq_dqb_lock); goto over_quota; } } diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 07e08c7d05ca..a9c5dfe6b83e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -753,7 +753,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_XGETNEXTQUOTA: return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; /* XFS quotas are fully coherent now, making this call a noop */ return 0; diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index c0187cda2c1e..a73e5b34db41 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot) if (!dquot->dq_off) { alloc = true; down_write(&dqopt->dqio_sem); + } else { + down_read(&dqopt->dqio_sem); } ret = qtree_write_dquot( sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); if (alloc) up_write(&dqopt->dqio_sem); + else + up_read(&dqopt->dqio_sem); return ret; } diff --git a/fs/read_write.c b/fs/read_write.c index 61b58c7b6531..f0d4b16873e8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * In the generic case the entire file is data, so as long as * offset isn't at the end of the file then the offset is data. */ - if (offset >= eof) + if ((unsigned long long)offset >= eof) return -ENXIO; break; case SEEK_HOLE: @@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * There is a virtual hole at the end of the file, so as long as * offset isn't i_size or larger, return i_size. */ - if (offset >= eof) + if ((unsigned long long)offset >= eof) return -ENXIO; offset = eof; break; @@ -413,7 +413,20 @@ ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, else return -EINVAL; } -EXPORT_SYMBOL(__vfs_read); + +ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ + mm_segment_t old_fs; + ssize_t result; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + result = vfs_read(file, (void __user *)buf, count, pos); + set_fs(old_fs); + return result; +} +EXPORT_SYMBOL(kernel_read); ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -441,8 +454,6 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return ret; } -EXPORT_SYMBOL(vfs_read); - static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; @@ -471,9 +482,8 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, else return -EINVAL; } -EXPORT_SYMBOL(__vfs_write); -ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) +ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; const char __user *p; @@ -496,9 +506,24 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t inc_syscw(current); return ret; } - EXPORT_SYMBOL(__kernel_write); +ssize_t kernel_write(struct file *file, const void *buf, size_t count, + loff_t *pos) +{ + mm_segment_t old_fs; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_write(file, (__force const char __user *)buf, count, pos); + set_fs(old_fs); + + return res; +} +EXPORT_SYMBOL(kernel_write); + ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { ssize_t ret; @@ -527,8 +552,6 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ return ret; } -EXPORT_SYMBOL(vfs_write); - static inline loff_t file_pos_read(struct file *file) { return file->f_pos; @@ -959,9 +982,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, return ret; } -EXPORT_SYMBOL(vfs_readv); -ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, +static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; @@ -978,7 +1000,6 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, } return ret; } -EXPORT_SYMBOL(vfs_writev); static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, rwf_t flags) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 873fc04e9403..11a48affa882 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1776,7 +1776,7 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) struct reiserfs_transaction_handle th; int jbegin_count = 1; - if (inode->i_sb->s_flags & MS_RDONLY) + if (sb_rdonly(inode->i_sb)) return -EROFS; /* * memory pressure can sometimes initiate write_inode calls with diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 0c882a0e2a6e..f59c667df15b 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1918,7 +1918,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * we only want to flush out transactions if we were * called with error == 0 */ - if (!error && !(sb->s_flags & MS_RDONLY)) { + if (!error && !sb_rdonly(sb)) { /* end the current trans */ BUG_ON(!th->t_trans_id); do_journal_end(th, FLUSH_ALL); diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 4f3f928076f3..64f49cafbc5b 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -386,7 +386,7 @@ void __reiserfs_error(struct super_block *sb, const char *id, printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n", sb->s_id, function, error_buf); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return; reiserfs_info(sb, "Remounting filesystem read-only\n"); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 306e4e9d172d..5464ec517702 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -121,7 +121,7 @@ void reiserfs_schedule_old_flush(struct super_block *s) * Avoid scheduling flush when sb is being shut down. It can race * with journal shutdown and free still queued delayed work. */ - if (s->s_flags & MS_RDONLY || !(s->s_flags & MS_ACTIVE)) + if (sb_rdonly(s) || !(s->s_flags & MS_ACTIVE)) return; spin_lock(&sbi->old_work_lock); @@ -151,7 +151,7 @@ static int reiserfs_freeze(struct super_block *s) reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { int err = journal_begin(&th, s, 1); if (err) { reiserfs_block_writes(&th); @@ -599,7 +599,7 @@ static void reiserfs_put_super(struct super_block *s) * change file system state to current state if it was mounted * with read-write permissions */ - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { if (!journal_begin(&th, s, 10)) { reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -700,7 +700,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) int err = 0; - if (inode->i_sb->s_flags & MS_RDONLY) { + if (sb_rdonly(inode->i_sb)) { reiserfs_warning(inode->i_sb, "clm-6006", "writing inode %lu on readonly FS", inode->i_ino); @@ -1525,7 +1525,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); /* remount read-only */ - if (s->s_flags & MS_RDONLY) + if (sb_rdonly(s)) /* it is read-only already */ goto out_ok_unlocked; @@ -1551,7 +1551,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); } else { /* remount read-write */ - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); goto out_ok_unlocked; /* We are read-write already */ @@ -1855,7 +1855,7 @@ static int what_hash(struct super_block *s) * the super */ if (code != UNSET_HASH && - !(s->s_flags & MS_RDONLY) && + !sb_rdonly(s) && code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); } @@ -2052,7 +2052,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (replay_only(s)) goto error_unlocked; - if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { + if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); s->s_flags |= MS_RDONLY; @@ -2101,7 +2101,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) else set_bit(REISERFS_3_6, &sbi->s_properties); - if (!(s->s_flags & MS_RDONLY)) { + if (!sb_rdonly(s)) { errval = journal_begin(&th, s, 1); if (errval) { diff --git a/fs/splice.c b/fs/splice.c index ae41201d0325..f3084cce0ea6 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -364,22 +364,6 @@ static ssize_t kernel_readv(struct file *file, const struct kvec *vec, return res; } -ssize_t kernel_write(struct file *file, const char *buf, size_t count, - loff_t pos) -{ - mm_segment_t old_fs; - ssize_t res; - - old_fs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_write(file, (__force const char __user *)buf, count, &pos); - set_fs(old_fs); - - return res; -} -EXPORT_SYMBOL(kernel_write); - static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index ffb093e72b6c..1adb3346b9d6 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -165,6 +165,20 @@ config SQUASHFS_XZ If unsure, say N. +config SQUASHFS_ZSTD + bool "Include support for ZSTD compressed file systems" + depends on SQUASHFS + select ZSTD_DECOMPRESS + help + Saying Y here includes support for reading Squashfs file systems + compressed with ZSTD compression. ZSTD gives better compression than + the default ZLIB compression, while using less CPU. + + ZSTD is not the standard compression used in Squashfs and so most + file systems will be readable without selecting this option. + + If unsure, say N. + config SQUASHFS_4K_DEVBLK_SIZE bool "Use 4K device block size?" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 246a6f329d89..6655631c53ae 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -15,3 +15,4 @@ squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o +squashfs-$(CONFIG_SQUASHFS_ZSTD) += zstd_wrapper.o diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index d2bc13636f79..836639810ea0 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -65,6 +65,12 @@ static const struct squashfs_decompressor squashfs_zlib_comp_ops = { }; #endif +#ifndef CONFIG_SQUASHFS_ZSTD +static const struct squashfs_decompressor squashfs_zstd_comp_ops = { + NULL, NULL, NULL, NULL, ZSTD_COMPRESSION, "zstd", 0 +}; +#endif + static const struct squashfs_decompressor squashfs_unknown_comp_ops = { NULL, NULL, NULL, NULL, 0, "unknown", 0 }; @@ -75,6 +81,7 @@ static const struct squashfs_decompressor *decompressor[] = { &squashfs_lzo_comp_ops, &squashfs_xz_comp_ops, &squashfs_lzma_unsupported_comp_ops, + &squashfs_zstd_comp_ops, &squashfs_unknown_comp_ops }; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index a25713c031a5..0f5a8e4e58da 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -58,4 +58,8 @@ extern const struct squashfs_decompressor squashfs_lzo_comp_ops; extern const struct squashfs_decompressor squashfs_zlib_comp_ops; #endif +#ifdef CONFIG_SQUASHFS_ZSTD +extern const struct squashfs_decompressor squashfs_zstd_comp_ops; +#endif + #endif diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 506f4ba5b983..24d12fd14177 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -241,6 +241,7 @@ struct meta_index { #define LZO_COMPRESSION 3 #define XZ_COMPRESSION 4 #define LZ4_COMPRESSION 5 +#define ZSTD_COMPRESSION 6 struct squashfs_super_block { __le32 s_magic; diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c new file mode 100644 index 000000000000..eeaabf881159 --- /dev/null +++ b/fs/squashfs/zstd_wrapper.c @@ -0,0 +1,151 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * zstd_wrapper.c + */ + +#include <linux/mutex.h> +#include <linux/buffer_head.h> +#include <linux/slab.h> +#include <linux/zstd.h> +#include <linux/vmalloc.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs.h" +#include "decompressor.h" +#include "page_actor.h" + +struct workspace { + void *mem; + size_t mem_size; + size_t window_size; +}; + +static void *zstd_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct workspace *wksp = kmalloc(sizeof(*wksp), GFP_KERNEL); + + if (wksp == NULL) + goto failed; + wksp->window_size = max_t(size_t, + msblk->block_size, SQUASHFS_METADATA_SIZE); + wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size); + wksp->mem = vmalloc(wksp->mem_size); + if (wksp->mem == NULL) + goto failed; + + return wksp; + +failed: + ERROR("Failed to allocate zstd workspace\n"); + kfree(wksp); + return ERR_PTR(-ENOMEM); +} + + +static void zstd_free(void *strm) +{ + struct workspace *wksp = strm; + + if (wksp) + vfree(wksp->mem); + kfree(wksp); +} + + +static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) +{ + struct workspace *wksp = strm; + ZSTD_DStream *stream; + size_t total_out = 0; + size_t zstd_err; + int k = 0; + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + + stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size); + + if (!stream) { + ERROR("Failed to initialize zstd decompressor\n"); + goto out; + } + + out_buf.size = PAGE_SIZE; + out_buf.dst = squashfs_first_page(output); + + do { + if (in_buf.pos == in_buf.size && k < b) { + int avail = min(length, msblk->devblksize - offset); + + length -= avail; + in_buf.src = bh[k]->b_data + offset; + in_buf.size = avail; + in_buf.pos = 0; + offset = 0; + } + + if (out_buf.pos == out_buf.size) { + out_buf.dst = squashfs_next_page(output); + if (out_buf.dst == NULL) { + /* Shouldn't run out of pages + * before stream is done. + */ + squashfs_finish_page(output); + goto out; + } + out_buf.pos = 0; + out_buf.size = PAGE_SIZE; + } + + total_out -= out_buf.pos; + zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf); + total_out += out_buf.pos; /* add the additional data produced */ + + if (in_buf.pos == in_buf.size && k < b) + put_bh(bh[k++]); + } while (zstd_err != 0 && !ZSTD_isError(zstd_err)); + + squashfs_finish_page(output); + + if (ZSTD_isError(zstd_err)) { + ERROR("zstd decompression error: %d\n", + (int)ZSTD_getErrorCode(zstd_err)); + goto out; + } + + if (k < b) + goto out; + + return (int)total_out; + +out: + for (; k < b; k++) + put_bh(bh[k]); + + return -EIO; +} + +const struct squashfs_decompressor squashfs_zstd_comp_ops = { + .init = zstd_init, + .free = zstd_free, + .decompress = zstd_uncompress, + .id = ZSTD_COMPRESSION, + .name = "zstd", + .supported = 1 +}; diff --git a/fs/super.c b/fs/super.c index 221cfa1f4e92..166c4ee0d0ed 100644 --- a/fs/super.c +++ b/fs/super.c @@ -360,7 +360,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock) s->s_count++; spin_unlock(&sb_lock); down_write(&s->s_umount); - if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) { put_super(s); return 1; } @@ -390,7 +390,7 @@ bool trylock_super(struct super_block *sb) { if (down_read_trylock(&sb->s_umount)) { if (!hlist_unhashed(&sb->s_instances) && - sb->s_root && (sb->s_flags & MS_BORN)) + sb->s_root && (sb->s_flags & SB_BORN)) return true; up_read(&sb->s_umount); } @@ -419,7 +419,7 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); sync_filesystem(sb); - sb->s_flags &= ~MS_ACTIVE; + sb->s_flags &= ~SB_ACTIVE; fsnotify_unmount_inodes(sb); cgroup_writeback_umount(); @@ -472,7 +472,7 @@ struct super_block *sget_userns(struct file_system_type *type, struct super_block *old; int err; - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && + if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -502,7 +502,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns); + s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -547,11 +547,11 @@ struct super_block *sget(struct file_system_type *type, * mount through to here so always use &init_user_ns * until that changes. */ - if (flags & MS_SUBMOUNT) + if (flags & SB_SUBMOUNT) user_ns = &init_user_ns; /* Ensure the requestor has permissions over the target filesystem */ - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) + if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); return sget_userns(type, test, set, flags, user_ns, data); @@ -594,7 +594,7 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && (sb->s_flags & MS_BORN)) + if (sb->s_root && (sb->s_flags & SB_BORN)) f(sb, arg); up_read(&sb->s_umount); @@ -628,7 +628,7 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && (sb->s_flags & MS_BORN)) + if (sb->s_root && (sb->s_flags & SB_BORN)) f(sb, arg); up_read(&sb->s_umount); @@ -664,7 +664,7 @@ rescan: else down_write(&sb->s_umount); /* still alive? */ - if (sb->s_root && (sb->s_flags & MS_BORN)) + if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; if (!excl) up_read(&sb->s_umount); @@ -785,7 +785,7 @@ rescan: spin_unlock(&sb_lock); down_read(&sb->s_umount); /* still alive? */ - if (sb->s_root && (sb->s_flags & MS_BORN)) + if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; up_read(&sb->s_umount); /* nope, got unmounted */ @@ -801,13 +801,13 @@ rescan: /** * do_remount_sb - asks filesystem to change mount options. * @sb: superblock in question - * @flags: numeric part of options + * @sb_flags: revised superblock flags * @data: the rest of options * @force: whether or not to force the change * * Alters the mount options of a mounted file system. */ -int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) { int retval; int remount_ro; @@ -816,11 +816,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return -EBUSY; #ifdef CONFIG_BLOCK - if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) + if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; #endif - remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); + remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { @@ -831,7 +831,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; - remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); + remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); } } shrink_dcache_sb(sb); @@ -850,7 +850,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) } if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &flags, data); + retval = sb->s_op->remount_fs(sb, &sb_flags, data); if (retval) { if (!force) goto cancel_readonly; @@ -859,7 +859,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) sb->s_type->name, retval); } } - sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); + sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; @@ -892,12 +892,12 @@ static void do_emergency_remount(struct work_struct *work) sb->s_count++; spin_unlock(&sb_lock); down_write(&sb->s_umount); - if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) && - !(sb->s_flags & MS_RDONLY)) { + if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && + !sb_rdonly(sb)) { /* * What lock protects sb->s_flags?? */ - do_remount_sb(sb, MS_RDONLY, NULL, 1); + do_remount_sb(sb, SB_RDONLY, NULL, 1); } up_write(&sb->s_umount); spin_lock(&sb_lock); @@ -1023,7 +1023,7 @@ struct dentry *mount_ns(struct file_system_type *fs_type, /* Don't allow mounting unless the caller has CAP_SYS_ADMIN * over the namespace. */ - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) + if (!(flags & SB_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags, @@ -1033,13 +1033,13 @@ struct dentry *mount_ns(struct file_system_type *fs_type, if (!sb->s_root) { int err; - err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + err = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); return ERR_PTR(err); } - sb->s_flags |= MS_ACTIVE; + sb->s_flags |= SB_ACTIVE; } return dget(sb->s_root); @@ -1071,7 +1071,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, fmode_t mode = FMODE_READ | FMODE_EXCL; int error = 0; - if (!(flags & MS_RDONLY)) + if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1089,14 +1089,14 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC, + s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC, bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; if (s->s_root) { - if ((flags ^ s->s_flags) & MS_RDONLY) { + if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); error = -EBUSY; goto error_bdev; @@ -1116,13 +1116,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); goto error; } - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; bdev->bd_super = s; } @@ -1162,12 +1162,12 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, if (IS_ERR(s)) return ERR_CAST(s); - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); } - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; return dget(s->s_root); } EXPORT_SYMBOL(mount_nodev); @@ -1188,12 +1188,12 @@ struct dentry *mount_single(struct file_system_type *fs_type, if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); } - s->s_flags |= MS_ACTIVE; + s->s_flags |= SB_ACTIVE; } else { do_remount_sb(s, flags, data, 0); } @@ -1227,7 +1227,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); - sb->s_flags |= MS_BORN; + sb->s_flags |= SB_BORN; error = security_sb_kern_mount(sb, flags, secdata); if (error) @@ -1434,12 +1434,12 @@ int freeze_super(struct super_block *sb) return -EBUSY; } - if (!(sb->s_flags & MS_BORN)) { + if (!(sb->s_flags & SB_BORN)) { up_write(&sb->s_umount); return 0; /* sic - it's "nothing to do" */ } - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { /* Nothing to do really... */ sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); @@ -1502,7 +1502,7 @@ int thaw_super(struct super_block *sb) return -EINVAL; } - if (sb->s_flags & MS_RDONLY) { + if (sb_rdonly(sb)) { sb->s_writers.frozen = SB_UNFROZEN; goto out; } diff --git a/fs/sync.c b/fs/sync.c index 2e3fd7d94d2d..a576aa2e6b09 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -57,7 +57,7 @@ int sync_filesystem(struct super_block *sb) /* * No point in syncing out anything if the filesystem is read-only. */ - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; ret = __sync_filesystem(sb, 0); @@ -69,13 +69,13 @@ EXPORT_SYMBOL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) sync_inodes_sb(sb); } static void sync_fs_one_sb(struct super_block *sb, void *arg) { - if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) + if (!sb_rdonly(sb) && sb->s_op->sync_fs) sb->s_op->sync_fs(sb, *(int *)arg); } diff --git a/fs/sysv/balloc.c b/fs/sysv/balloc.c index 921c053fc052..862c1f74a583 100644 --- a/fs/sysv/balloc.c +++ b/fs/sysv/balloc.c @@ -231,7 +231,7 @@ trust_sb: Ecount: printk("sysv_count_free_blocks: free block count was %d, " "correcting to %d\n", sb_count, count); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { *sbi->s_free_blocks = cpu_to_fs32(sbi, count); dirty_sb(sb); } diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 53f1b78996dd..eb963fbb7903 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -220,7 +220,7 @@ Einval: printk("sysv_count_free_inodes: " "free inode count was %d, correcting to %d\n", sb_count, count); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { *sbi->s_sb_total_free_inodes = cpu_to_fs16(SYSV_SB(sb), count); dirty_sb(sb); } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 858fb72f9e0f..1c8bf9453a71 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -70,7 +70,7 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); if (sbi->s_bh1 != sbi->s_bh2) diff --git a/fs/sysv/super.c b/fs/sysv/super.c index eda10959714f..0d56e486b392 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -216,7 +216,7 @@ static int detect_sysv(struct sysv_sb_info *sbi, struct buffer_head *bh) if (fs16_to_cpu(sbi, sbd->s_nfree) == 0xffff) { sbi->s_type = FSTYPE_AFS; sbi->s_forced_ro = 1; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { printk("SysV FS: SCO EAFS on %s detected, " "forcing read-only mode.\n", sb->s_id); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bffadbb67e47..5496b17b959c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1159,7 +1159,7 @@ static int mount_ubifs(struct ubifs_info *c) long long x, y; size_t sz; - c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); + c->ro_mount = !!sb_rdonly(c->vfs_sb); /* Suppress error messages while probing if MS_SILENT is set */ c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); diff --git a/fs/udf/super.c b/fs/udf/super.c index 93c59630512b..99cb81d0077f 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -673,7 +673,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sbi->s_dmode = uopt.dmode; write_unlock(&sbi->s_cred_lock); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) goto out_unlock; if (*flags & MS_RDONLY) @@ -1017,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition, fe = udf_iget_special(sb, &addr); if (IS_ERR(fe)) { - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); @@ -1341,7 +1341,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) * writing to it (we overwrite blocks instead of relocating * them). */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { ret = -EACCES; goto out_bh; } @@ -2205,7 +2205,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) ret = -EINVAL; goto error_out; } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION && - !(sb->s_flags & MS_RDONLY)) { + !sb_rdonly(sb)) { ret = -EACCES; goto error_out; } @@ -2226,7 +2226,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & UDF_PART_FLAG_READ_ONLY && - !(sb->s_flags & MS_RDONLY)) { + !sb_rdonly(sb)) { ret = -EACCES; goto error_out; } @@ -2245,7 +2245,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) le16_to_cpu(ts.year), ts.month, ts.day, ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { udf_open_lvid(sb); lvid_open = true; } @@ -2332,7 +2332,7 @@ static void udf_put_super(struct super_block *sb) if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); #endif - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); udf_sb_free_partitions(sb); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 0a4f58a5073c..6440003f8ddc 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -278,7 +278,7 @@ void ufs_error (struct super_block * sb, const char * function, uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_mark_sb_dirty(sb); @@ -312,7 +312,7 @@ void ufs_panic (struct super_block * sb, const char * function, uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_mark_sb_dirty(sb); @@ -742,7 +742,7 @@ static void ufs_put_super(struct super_block *sb) UFSD("ENTER\n"); - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) ufs_put_super_internal(sb); cancel_delayed_work_sync(&sbi->sync_work); @@ -793,7 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("ENTER\n"); #ifndef CONFIG_UFS_FS_WRITE - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); return -EROFS; } @@ -805,7 +805,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; sbi->sb = sb; - UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); + UFSD("flag %u\n", (int)(sb_rdonly(sb))); mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); @@ -902,7 +902,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_sbsize = super_block_size = 2048; uspi->s_sbbase = 0; flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=old is supported read-only\n"); sb->s_flags |= MS_RDONLY; @@ -918,7 +918,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_sbbase = 0; uspi->s_dirblksize = 1024; flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep is supported read-only\n"); sb->s_flags |= MS_RDONLY; @@ -934,7 +934,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_sbbase = 0; uspi->s_dirblksize = 1024; flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=nextstep-cd is supported read-only\n"); sb->s_flags |= MS_RDONLY; @@ -950,7 +950,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_sbbase = 0; uspi->s_dirblksize = 1024; flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=openstep is supported read-only\n"); sb->s_flags |= MS_RDONLY; @@ -965,7 +965,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_sbsize = super_block_size = 2048; uspi->s_sbbase = 0; flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { if (!silent) pr_info("ufstype=hp is supported read-only\n"); sb->s_flags |= MS_RDONLY; @@ -1273,7 +1273,7 @@ magic_found: /* * Read cylinder group structures */ - if (!(sb->s_flags & MS_RDONLY)) + if (!sb_rdonly(sb)) if (!ufs_read_cylinder_structures(sb)) goto failed; @@ -1328,7 +1328,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) return -EINVAL; } - if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + if ((bool)(*mount_flags & MS_RDONLY) == sb_rdonly(sb)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; mutex_unlock(&UFS_SB(sb)->s_lock); return 0; diff --git a/fs/utimes.c b/fs/utimes.c index 6571d8c848a0..51edb9f9507c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -22,7 +22,7 @@ */ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times) { - struct timespec tv[2]; + struct timespec64 tv[2]; if (times) { if (get_user(tv[0].tv_sec, ×->actime) || @@ -44,7 +44,7 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -static int utimes_common(const struct path *path, struct timespec *times) +static int utimes_common(const struct path *path, struct timespec64 *times) { int error; struct iattr newattrs; @@ -115,7 +115,7 @@ out: * must be owner or have write permission. * Else, update from *times, must be owner or super user. */ -long do_utimes(int dfd, const char __user *filename, struct timespec *times, +long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags) { int error = -EINVAL; @@ -167,10 +167,11 @@ out: SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, struct timespec __user *, utimes, int, flags) { - struct timespec tstimes[2]; + struct timespec64 tstimes[2]; if (utimes) { - if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) + if ((get_timespec64(&tstimes[0], &utimes[0]) || + get_timespec64(&tstimes[1], &utimes[1]))) return -EFAULT; /* Nothing to do, we must not even check the path. */ @@ -186,7 +187,7 @@ SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename, struct timeval __user *, utimes) { struct timeval times[2]; - struct timespec tstimes[2]; + struct timespec64 tstimes[2]; if (utimes) { if (copy_from_user(×, utimes, sizeof(times))) @@ -224,7 +225,7 @@ SYSCALL_DEFINE2(utimes, char __user *, filename, COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, struct compat_utimbuf __user *, t) { - struct timespec tv[2]; + struct timespec64 tv[2]; if (t) { if (get_user(tv[0].tv_sec, &t->actime) || @@ -238,11 +239,11 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) { - struct timespec tv[2]; + struct timespec64 tv[2]; if (t) { - if (compat_get_timespec(&tv[0], &t[0]) || - compat_get_timespec(&tv[1], &t[1])) + if (compat_get_timespec64(&tv[0], &t[0]) || + compat_get_timespec64(&tv[1], &t[1])) return -EFAULT; if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) @@ -253,7 +254,7 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) { - struct timespec tv[2]; + struct timespec64 tv[2]; if (t) { if (get_user(tv[0].tv_sec, &t[0].tv_sec) || diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index b008ff3250eb..df3e600835e8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -156,7 +156,8 @@ __xfs_ag_resv_free( trace_xfs_ag_resv_free(pag, type, 0); resv = xfs_perag_resv(pag, type); - pag->pag_mount->m_ag_max_usable += resv->ar_asked; + if (pag->pag_agno == 0) + pag->pag_mount->m_ag_max_usable += resv->ar_asked; /* * AGFL blocks are always considered "free", so whatever * was reserved at mount time must be given back at umount. @@ -216,7 +217,14 @@ __xfs_ag_resv_init( return error; } - mp->m_ag_max_usable -= ask; + /* + * Reduce the maximum per-AG allocation length by however much we're + * trying to reserve for an AG. Since this is a filesystem-wide + * counter, we only make the adjustment for AG 0. This assumes that + * there aren't any AGs hungrier for per-AG reservation than AG 0. + */ + if (pag->pag_agno == 0) + mp->m_ag_max_usable -= ask; resv = xfs_perag_resv(pag, type); resv->ar_asked = ask; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 459f4b4f08fe..044a363119be 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,7 +49,6 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" -#include "xfs_rmap_btree.h" #include "xfs_icache.h" @@ -192,12 +191,8 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ - xfs_filblks_t orig_len; mp = ip->i_mount; - - /* Calculate the worst-case size of the bmbt. */ - orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -205,20 +200,12 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) { - rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; - break; - } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } - - /* Calculate the worst-case size of the rmapbt. */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) - rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + - mp->m_rmap_maxlevels; - return rval; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 29172609f2a3..f18e5932aec4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -343,7 +343,8 @@ xfs_end_io( error = xfs_reflink_end_cow(ip, offset, size); break; case XFS_IO_UNWRITTEN: - error = xfs_iomap_write_unwritten(ip, offset, size); + /* writeback should never update isize */ + error = xfs_iomap_write_unwritten(ip, offset, size, false); break; default: ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index cd9a5400ba4f..bc6c6e10a969 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1459,7 +1459,19 @@ xfs_shift_file_space( return error; /* - * The extent shiting code works on extent granularity. So, if + * Clean out anything hanging around in the cow fork now that + * we've flushed all the dirty data out to disk to avoid having + * CoW extents at the wrong offsets. + */ + if (xfs_is_reflink_inode(ip)) { + error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, + true); + if (error) + return error; + } + + /* + * The extent shifting code works on extent granularity. So, if * stop_fsb is not the starting block of extent, we need to split * the extent at stop_fsb. */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index da14658da310..2f97c12ca75e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map( int size; int offset; - total_nr_pages = bp->b_page_count; - /* skip the pages in the buffer before the start offset */ page_index = 0; offset = *buf_offset; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index bd786a9ac2c3..eaf86f55b7f2 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -347,7 +347,7 @@ xfs_verifier_error( { struct xfs_mount *mp = bp->b_target->bt_mount; - xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", __return_address, bp->b_ops->name, bp->b_bn); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ec3e44fcf771..309e26c9dddb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -58,7 +58,7 @@ xfs_zero_range( xfs_off_t count, bool *did_zero) { - return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); + return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops); } int @@ -259,7 +259,11 @@ xfs_file_buffered_aio_read( trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); - xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } ret = generic_file_read_iter(iocb, to); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -373,8 +377,6 @@ restart: */ spin_lock(&ip->i_flags_lock); if (iocb->ki_pos > i_size_read(inode)) { - bool zero = false; - spin_unlock(&ip->i_flags_lock); if (!drained_dio) { if (*iolock == XFS_IOLOCK_SHARED) { @@ -395,7 +397,7 @@ restart: drained_dio = true; goto restart; } - error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); + error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL); if (error) return error; } else @@ -432,7 +434,6 @@ xfs_dio_write_end_io( struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); loff_t offset = iocb->ki_pos; - bool update_size = false; int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); @@ -443,6 +444,21 @@ xfs_dio_write_end_io( if (size <= 0) return size; + if (flags & IOMAP_DIO_COW) { + error = xfs_reflink_end_cow(ip, offset, size); + if (error) + return error; + } + + /* + * Unwritten conversion updates the in-core isize after extent + * conversion but before updating the on-disk size. Updating isize any + * earlier allows a racing dio read to find unwritten extents before + * they are converted. + */ + if (flags & IOMAP_DIO_UNWRITTEN) + return xfs_iomap_write_unwritten(ip, offset, size, true); + /* * We need to update the in-core inode size here so that we don't end up * with the on-disk inode size being outside the in-core inode size. We @@ -457,20 +473,11 @@ xfs_dio_write_end_io( spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) { i_size_write(inode, offset + size); - update_size = true; - } - spin_unlock(&ip->i_flags_lock); - - if (flags & IOMAP_DIO_COW) { - error = xfs_reflink_end_cow(ip, offset, size); - if (error) - return error; - } - - if (flags & IOMAP_DIO_UNWRITTEN) - error = xfs_iomap_write_unwritten(ip, offset, size); - else if (update_size) + spin_unlock(&ip->i_flags_lock); error = xfs_setfilesize(ip, offset, size); + } else { + spin_unlock(&ip->i_flags_lock); + } return error; } @@ -636,6 +643,9 @@ xfs_file_buffered_aio_write( int enospc = 0; int iolock; + if (iocb->ki_flags & IOCB_NOWAIT) + return -EOPNOTSUPP; + write_retry: iolock = XFS_IOLOCK_EXCL; xfs_ilock(ip, iolock); @@ -912,7 +922,7 @@ xfs_file_open( return -EFBIG; if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) return -EIO; - file->f_mode |= FMODE_AIO_NOWAIT; + file->f_mode |= FMODE_NOWAIT; return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5599dda4727a..4ec5b7f45401 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1624,10 +1624,12 @@ xfs_itruncate_extents( goto out; /* - * Clear the reflink flag if we truncated everything. + * Clear the reflink flag if there are no data fork blocks and + * there are no extents staged in the cow fork. */ - if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { + if (ip->i_d.di_nblocks == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; xfs_inode_clear_cowblocks_tag(ip); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6d0f74ec31e8..a705f34b58fa 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -745,7 +745,7 @@ xfs_iflush_done( */ iip = INODE_ITEM(blip); if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || - lip->li_flags & XFS_LI_FAILED) + (blip->li_flags & XFS_LI_FAILED)) need_ail++; blip = next; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5049e8ab6e30..aa75389be8cf 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate( int *join_flags) { struct inode *inode = VFS_I(ip); + struct super_block *sb = inode->i_sb; int error; *join_flags = 0; @@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate( if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; - if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) + if (bdev_dax_supported(sb, sb->s_blocksize) < 0) return -EINVAL; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a1909bc064e9..9744b4819e0d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -54,13 +54,13 @@ xfs_bmbt_to_iomap( struct xfs_mount *mp = ip->i_mount; if (imap->br_startblock == HOLESTARTBLOCK) { - iomap->blkno = IOMAP_NULL_BLOCK; + iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; } else if (imap->br_startblock == DELAYSTARTBLOCK) { - iomap->blkno = IOMAP_NULL_BLOCK; + iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_DELALLOC; } else { - iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock); + iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); if (imap->br_state == XFS_EXT_UNWRITTEN) iomap->type = IOMAP_UNWRITTEN; else @@ -829,7 +829,8 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool update_isize) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -840,6 +841,7 @@ xfs_iomap_write_unwritten( xfs_trans_t *tp; xfs_bmbt_irec_t imap; struct xfs_defer_ops dfops; + struct inode *inode = VFS_I(ip); xfs_fsize_t i_size; uint resblks; int error; @@ -899,7 +901,8 @@ xfs_iomap_write_unwritten( i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); if (i_size > offset + count) i_size = offset + count; - + if (update_isize && i_size > i_size_read(inode)) + i_size_write(inode, i_size); i_size = xfs_new_eof(ip, i_size); if (i_size) { ip->i_d.di_size = i_size; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 00db3ecea084..ee535065c5d0 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 2f2dc3c09ad0..4246876df7b7 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -274,7 +274,7 @@ xfs_fs_commit_blocks( (end - 1) >> PAGE_SHIFT); WARN_ON_ONCE(error); - error = xfs_iomap_write_unwritten(ip, start, length); + error = xfs_iomap_write_unwritten(ip, start, length, false); if (error) goto out_drop_iolock; } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index de9493253edf..a65108594a07 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -125,7 +125,7 @@ xfs_fs_set_info( struct xfs_mount *mp = XFS_M(sb); struct qc_dqblk newlim; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; @@ -175,7 +175,7 @@ xfs_quota_enable( { struct xfs_mount *mp = XFS_M(sb); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; @@ -190,7 +190,7 @@ xfs_quota_disable( { struct xfs_mount *mp = XFS_M(sb); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; @@ -208,7 +208,7 @@ xfs_fs_rm_xquota( struct xfs_mount *mp = XFS_M(sb); unsigned int flags = 0; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; if (XFS_IS_QUOTA_ON(mp)) @@ -279,7 +279,7 @@ xfs_fs_set_dqblk( { struct xfs_mount *mp = XFS_M(sb); - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3008f31753df..584cf2d573ba 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -210,7 +210,7 @@ xfs_parseargs( /* * Copy binary VFS mount flags we are interested in. */ - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) mp->m_flags |= XFS_MOUNT_RDONLY; if (sb->s_flags & MS_DIRSYNC) mp->m_flags |= XFS_MOUNT_DIRSYNC; @@ -1654,6 +1654,16 @@ xfs_fs_fill_super( "DAX and reflink have not been tested together!"); } + if (mp->m_flags & XFS_MOUNT_DISCARD) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) { + xfs_warn(mp, "mounting with \"discard\" option, but " + "the device does not support discard"); + mp->m_flags &= ~XFS_MOUNT_DISCARD; + } + } + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, |