From dfb7c0ceab57fee7618f4c9c31c5a89254e8530a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Dec 2012 09:47:39 +0900 Subject: f2fs: remove set_page_dirty for atomic f2fs_end_io_write We should guarantee not to do *scheduling while atomic*. I found, in atomic f2fs_end_io_write(), there is a set_page_dirty() call to deal with IO errors. But, set_page_dirty() calls: -> f2fs_set_data_page_dirty() -> set_dirty_dir_page() -> cond_resched() which results in scheduling. In order to avoid this, I'd like to remove simply set_page_dirty(), since the page is already marked as ERROR and f2fs will be operated as the read-only mode as well. So, there is no recovery issue with this. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1b26e4ea1016..8bc1b6fdcf71 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -631,7 +631,6 @@ static void f2fs_end_io_write(struct bio *bio, int err) if (page->mapping) set_bit(AS_EIO, &page->mapping->flags); set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); - set_page_dirty(page); } end_page_writeback(page); dec_page_count(p->sbi, F2FS_WRITEBACK); -- cgit v1.2.3 From 1362b5e347e27102ea0fa99c9932bca1ecde330f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Dec 2012 19:45:49 +0900 Subject: f2fs: fix wrong calculation on f_files in statfs In f2fs_statfs(), f_files should be the total number of available inodes instead of the currently allocated inodes. So, this patch should resolve the reported bug below. Note that, showing 10% usage is not a bug, since f2fs reveals whole volume size as much as possible and shows the space overhead as *used*. This policy is fair enough with respect to other file systems. (loop0 is backed by 1GiB file) $ mkfs.f2fs /dev/loop0 F2FS-tools: Ver: 1.1.0 (2012-12-11) Info: sector size = 512 Info: total sectors = 2097152 (in 512bytes) Info: zone aligned segment0 blkaddr: 512 Info: format successful $ mount /dev/loop0 mnt/ $ df mnt/ Filesystem 1K-blocks Used Available Use% Mounted on /dev/loop0 1046528 98312 929784 10% /home/zeta/linux-devel/mtd-bench/mnt $ df mnt/ -i Filesystem Inodes IUsed IFree IUse% Mounted on /dev/loop0 1 -465918 465919 - /home/zeta/linux-devel/mtd-bench/mnt Notice IUsed is negative. Also, 10% usage on a fresh f2fs seems too much to be correct. Reported-and-Tested-by: Ezequiel Garcia Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13867322cf5a..f4d9e03723db 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -148,8 +148,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = valid_inode_count(sbi); - buf->f_ffree = sbi->total_node_count - valid_node_count(sbi); + buf->f_files = sbi->total_node_count; + buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); buf->f_namelen = F2FS_MAX_NAME_LEN; buf->f_fsid.val[0] = (u32)id; -- cgit v1.2.3 From 38e0abdcfb5e69aa61a1e9b474d434afc1c177a9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 13 Dec 2012 23:44:11 +0900 Subject: f2fs: fix up f2fs_get_parent issue to retrieve correct parent inode number Test Case: [NFS Client] ls -lR . [NFS Server] while [ 1 ] do echo 3 > /proc/sys/vm/drop_caches done Error on NFS Client: "No such file or directory" When cache is dropped at the server, it results in lookup failure at the NFS client due to non-connection with the parent. The default path is it initiates a lookup by calculating the hash value for the name, even though the hash values stored on the disk for "." and ".." is maintained as zero, which results in failure from find_in_block due to not matching HASH values. Fix up, by using the correct hashing values for these entries. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++-- fs/f2fs/hash.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b4e24f32b54e..e1f66df0f97d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -540,13 +540,13 @@ int f2fs_make_empty(struct inode *inode, struct inode *parent) de = &dentry_blk->dentry[0]; de->name_len = cpu_to_le16(1); - de->hash_code = 0; + de->hash_code = f2fs_dentry_hash(".", 1); de->ino = cpu_to_le32(inode->i_ino); memcpy(dentry_blk->filename[0], ".", 1); set_de_type(de, inode); de = &dentry_blk->dentry[1]; - de->hash_code = 0; + de->hash_code = f2fs_dentry_hash("..", 2); de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(dentry_blk->filename[1], "..", 2); diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index a60f04200f8b..5e48baca3597 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -76,6 +76,10 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len) const char *p; __u32 in[8], buf[4]; + if ((len <= 2) && (name[0] == '.') && + (name[1] == '.' || name[1] == '\0')) + return 0; + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; -- cgit v1.2.3 From 398b1ac5a57219823f942a8d3665b27ab99354de Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Dec 2012 15:28:39 +0900 Subject: f2fs: fix handling errors got by f2fs_write_inode Ruslan reported that f2fs hangs with an infinite loop in f2fs_sync_file(): while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0) f2fs_write_inode(inode, NULL); The reason was revealed that the cold flag is not set even thought this inode is a normal file. Therefore, sync_node_pages() skips to write node blocks since it only writes cold node blocks. The cold flag is stored to the node_footer in node block, and whenever a new node page is allocated, it is set according to its file type, file or directory. But, after sudden-power-off, when recovering the inode page, f2fs doesn't recover its cold flag. So, let's assign the cold flag in more right places. One more thing: If f2fs_write_inode() returns an error due to whatever situations, there would be no dirty node pages so that sync_node_pages() returns zero. (i.e., zero means nothing was written.) Reported-by: Ruslan N. Marchenko Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 ++ fs/f2fs/file.c | 10 ++++++---- fs/f2fs/inode.c | 1 + fs/f2fs/node.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e1f66df0f97d..4a78d6c4f3a7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -11,6 +11,7 @@ #include #include #include "f2fs.h" +#include "node.h" #include "acl.h" static unsigned long dir_blocks(struct inode *inode) @@ -308,6 +309,7 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry) ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); + set_cold_node(inode, ipage); init_dent_inode(dentry, ipage); f2fs_put_page(ipage, 1); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f9e085dfb1f0..7f9ea9271ebe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -160,15 +160,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (need_to_sync_dir(sbi, inode)) need_cp = true; - f2fs_write_inode(inode, NULL); - if (need_cp) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); clear_inode_flag(F2FS_I(inode), FI_NEED_CP); } else { - while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0) - f2fs_write_inode(inode, NULL); + /* if there is no written node page, write its inode page */ + while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index df5fb381ebf1..bf20b4d03214 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -203,6 +203,7 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); + set_cold_node(inode, node_page); set_page_dirty(node_page); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 19870361497e..dffac1c11f63 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -834,11 +834,11 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; } set_node_addr(sbi, &new_ni, NEW_ADDR); + set_cold_node(dn->inode, page); dn->node_page = page; sync_inode_page(dn); set_page_dirty(page); - set_cold_node(dn->inode, page); if (ofs == 0) inc_valid_inode_count(sbi); -- cgit v1.2.3 From 30f0c75858c46a0273ccb838de401b1f5fdebe6f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Dec 2012 16:09:19 +0900 Subject: f2fs: should recover orphan and fsync data The recovery routine should do all the time regardless of normal umount action. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f4d9e03723db..50240d28ca24 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -528,8 +528,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* if there are nt orphan nodes free them */ err = -EINVAL; - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && - recover_orphan_inodes(sbi)) + if (recover_orphan_inodes(sbi)) goto free_node_inode; /* read root inode and dentry */ @@ -548,8 +547,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* recover fsynced data */ - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && - !test_opt(sbi, DISABLE_ROLL_FORWARD)) + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) recover_fsync_data(sbi); /* After POR, we can run background GC thread */ -- cgit v1.2.3 From 1efef832020ef392deb2cd3d74e0c316711245be Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Dec 2012 16:25:21 +0900 Subject: f2fs: do f2fs_balance_fs in front of dir operations In order to conserve free sections to deal with the worst-case scenarios, f2fs should be able to freeze all the directory operations especially when there are not enough free sections. The f2fs_balance_fs() is for this use. When FS utilization becomes almost 100%, directory operations can be failed due to -ENOSPC frequently, which produces some dirty node pages occasionally. Previously, in such a case, f2fs_balance_fs() is not able to be triggered since it is triggered only if the directory operation ends up with success. So, this patch triggers f2fs_balance_fs() at first before handling directory operations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 89b7675dc377..b42389f80011 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -123,6 +123,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + f2fs_balance_fs(sbi); + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -144,8 +146,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (!sbi->por_doing) d_instantiate(dentry, inode); unlock_new_inode(inode); - - f2fs_balance_fs(sbi); return 0; out: clear_nlink(inode); @@ -163,6 +163,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_SB(sb); int err; + f2fs_balance_fs(sbi); + inode->i_ctime = CURRENT_TIME; atomic_inc(&inode->i_count); @@ -172,8 +174,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, goto out; d_instantiate(dentry, inode); - - f2fs_balance_fs(sbi); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -223,6 +223,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct page *page; int err = -ENOENT; + f2fs_balance_fs(sbi); + de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) goto fail; @@ -238,7 +240,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); - f2fs_balance_fs(sbi); fail: return err; } @@ -252,6 +253,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, unsigned symlen = strlen(symname) + 1; int err; + f2fs_balance_fs(sbi); + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -268,9 +271,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); - - f2fs_balance_fs(sbi); - return err; out: clear_nlink(inode); @@ -286,6 +286,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + f2fs_balance_fs(sbi); + inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -305,7 +307,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) d_instantiate(dentry, inode); unlock_new_inode(inode); - f2fs_balance_fs(sbi); return 0; out_fail: @@ -336,6 +337,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; + f2fs_balance_fs(sbi); + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -350,9 +353,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); unlock_new_inode(inode); - - f2fs_balance_fs(sbi); - return 0; out: clear_nlink(inode); @@ -376,6 +376,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *new_entry; int err = -ENOENT; + f2fs_balance_fs(sbi); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) goto out; @@ -441,8 +443,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } mutex_unlock_op(sbi, RENAME); - - f2fs_balance_fs(sbi); return 0; out_dir: -- cgit v1.2.3 From 690e4a3ead5f88fc95f7650816d1376aa2e79db5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Dec 2012 22:19:30 +0100 Subject: f2fs: add missing #include m68k allmodconfig: fs/f2fs/data.c: In function ‘read_end_io’: fs/f2fs/data.c:311: error: implicit declaration of function ‘prefetchw’ fs/f2fs/segment.c: In function ‘f2fs_end_io_write’: fs/f2fs/segment.c:628: error: implicit declaration of function ‘prefetchw’ Signed-off-by: Geert Uytterhoeven Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/segment.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 655aeabc1dd4..3aa5ce7cab83 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8bc1b6fdcf71..ca7b5ffb09d5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "f2fs.h" -- cgit v1.2.3 From 71e9fec548a95b2a4cf378646addd5d3098684a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 20 Dec 2012 15:10:06 +0900 Subject: f2fs: invalidate the node page if allocation is failed The new_node_page() is processed as the following procedure. 1. A new node page is allocated. 2. Set PageUptodate with proper footer information. 3. Check if there is a free space for allocation 4.a. If there is no space, f2fs returns with -ENOSPC. 4.b. Otherwise, go next. In the case of step #4.a, f2fs remains a wrong node page in the page cache with the uptodate flag. Also, even though a new node page is allocated successfully, an error can be occurred afterwards due to allocation failure of the other data structures. In such a case, remove_inode_page() would be triggered, so that we have to clear uptodate flag in truncate_node() too. So, we should remove the uptodate flag, if allocation is failed. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dffac1c11f63..e85643cc74a9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -484,12 +484,14 @@ static void truncate_node(struct dnode_of_data *dn) struct node_info ni; get_node_info(sbi, dn->nid, &ni); + if (dn->inode->i_blocks == 0) { + BUG_ON(ni.blk_addr != NULL_ADDR); + goto invalidate; + } BUG_ON(ni.blk_addr == NULL_ADDR); - if (ni.blk_addr != NULL_ADDR) - invalidate_blocks(sbi, ni.blk_addr); - /* Deallocate node address */ + invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, dn->inode, 1); set_node_addr(sbi, &ni, NULL_ADDR); @@ -499,7 +501,7 @@ static void truncate_node(struct dnode_of_data *dn) } else { sync_inode_page(dn); } - +invalidate: clear_node_page_dirty(dn->node_page); F2FS_SET_SB_DIRT(sbi); @@ -768,20 +770,12 @@ int remove_inode_page(struct inode *inode) dn.inode_page_locked = 1; truncate_node(&dn); } - if (inode->i_blocks == 1) { - /* inernally call f2fs_put_page() */ - set_new_dnode(&dn, inode, page, page, ino); - truncate_node(&dn); - } else if (inode->i_blocks == 0) { - struct node_info ni; - get_node_info(sbi, inode->i_ino, &ni); - /* called after f2fs_new_inode() is failed */ - BUG_ON(ni.blk_addr != NULL_ADDR); - f2fs_put_page(page, 1); - } else { - BUG(); - } + /* 0 is possible, after f2fs_new_inode() is failed */ + BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); + set_new_dnode(&dn, inode, page, page, ino); + truncate_node(&dn); + mutex_unlock_op(sbi, NODE_TRUNC); return 0; } @@ -845,6 +839,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) return page; fail: + clear_node_page_dirty(page); f2fs_put_page(page, 1); return ERR_PTR(err); } -- cgit v1.2.3 From 12a67146e35ba1d04ac4a5430eaaa8790158d60e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Dec 2012 11:47:05 +0900 Subject: f2fs: return a default value for non-void function This patch resolves a build warning reported by kbuild test robot. " fs/f2fs/segment.c: In function '__get_segment_type': fs/f2fs/segment.c:806:1: warning: control reaches end of non-void function [-Wreturn-type] " Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++---- fs/f2fs/super.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca7b5ffb09d5..fe2cc0bdc115 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -791,11 +791,10 @@ static int __get_segment_type(struct page *page, enum page_type p_type) return __get_segment_type_2(page, p_type); case 4: return __get_segment_type_4(page, p_type); - case 6: - return __get_segment_type_6(page, p_type); - default: - BUG(); } + /* NR_CURSEG_TYPE(6) logs by default */ + BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + return __get_segment_type_6(page, p_type); } static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 50240d28ca24..cf0ffb800654 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -302,7 +302,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options) case Opt_active_logs: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg != 2 && arg != 4 && arg != 6) + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; sbi->active_logs = arg; break; -- cgit v1.2.3 From 029cd28c1f739bbfc5105035696d5f1f4e45d161 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Dec 2012 17:20:21 +0900 Subject: f2fs: fix equation of has_not_enough_free_secs() Practically, has_not_enough_free_secs() should calculate with the numbers of current node and directory data blocks together. Actually the equation was implemented in need_to_flush(). So, this patch removes need_flush() and moves the equation into has_not_enough_free_secs(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 36 ++---------------------------------- fs/f2fs/segment.h | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fe2cc0bdc115..66f5e82ec324 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -19,48 +19,16 @@ #include "segment.h" #include "node.h" -static int need_to_flush(struct f2fs_sb_info *sbi) -{ - unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) * - sbi->segs_per_sec; - int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1) - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; - int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1) - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; - - if (sbi->por_doing) - return 0; - - if (free_sections(sbi) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi))) - return 1; - return 0; -} - /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. */ void f2fs_balance_fs(struct f2fs_sb_info *sbi) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - - if (sbi->por_doing) - return; - /* - * We should do checkpoint when there are so many dirty node pages - * with enough free segments. After then, we should do GC. + * We should do GC or end up with checkpoint, if there are so many dirty + * dir/node pages without enough free segments. */ - if (need_to_flush(sbi)) { - sync_dirty_dir_inodes(sbi); - sync_node_pages(sbi, 0, &wbc); - } - if (has_not_enough_free_secs(sbi)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi, 1); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0948405af6f5..66a288a52fd3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -459,7 +459,20 @@ static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) { - return free_sections(sbi) <= reserved_sections(sbi); + unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) * + sbi->segs_per_sec; + int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + + if (sbi->por_doing) + return false; + + if (free_sections(sbi) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi))) + return true; + return false; } static inline int utilization(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 06025f4df88e9e41f4ebcf6b4c3df30661332bc9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 22 Dec 2012 12:09:43 +0900 Subject: f2fs: handle error from f2fs_iget_nowait In case f2fs_iget_nowait returns error, it results in truncate_hole being called with 'error' value as inode pointer. There is no check in truncate_hole for valid inode, so it could result in crash due "invalid access to memory". Avoid this by handling error condition properly. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b07e9b6ef376..207e2c865c7e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -228,6 +228,9 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Deallocate previous index in the node page */ inode = f2fs_iget_nowait(sbi->sb, ino); + if (IS_ERR(inode)) + return; + truncate_hole(inode, bidx, bidx + 1); iput(inode); } -- cgit v1.2.3 From 344324f10fad05e40b1047c5e09ebbc77e43c24f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 22 Dec 2012 12:09:58 +0900 Subject: f2fs: remove unneeded initialization of nr_dirty in dirty_seglist_info Since, the memory for the object of dirty_seglist_info is allocated using kzalloc - which returns zeroed out memory. So, there is no need to initialize the nr_dirty values with zeroes. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 66f5e82ec324..de6240922b0a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1575,7 +1575,6 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) for (i = 0; i < NR_DIRTY_TYPE; i++) { dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); - dirty_i->nr_dirty[i] = 0; if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } -- cgit v1.2.3 From fd8bb65f796f041ee6ba400255ca9021bc45a992 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 22 Dec 2012 12:10:12 +0900 Subject: f2fs: fix fsync_inode list addition logic and avoid invalid access to memory In function find_fsync_dnodes() - the fsync inodes gets added to the list, but in one path suppose f2fs_iget results in error, in such case - error gets added to the fsync inode list. In next call to recover_data()->get_fsync_inode() entry = list_entry(this, struct fsync_inode_entry, list); if (entry->inode->i_ino == ino) This can result in "invalid access to memory" when it encounters 'error' as entry in the fsync inode list. So, add the fsync inode entry to the list only in case of no errors. And, free the object at that point itself in case of issue. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 207e2c865c7e..b571fee677d5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -144,14 +144,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto out; } - INIT_LIST_HEAD(&entry->list); - list_add_tail(&entry->list, head); - entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); + kmem_cache_free(fsync_entry_slab, entry); goto out; } + + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, head); entry->blkaddr = blkaddr; } if (IS_INODE(page)) { -- cgit v1.2.3 From 64c576fe51bc6b19e99340d2d0e1bda89f66db25 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 22 Dec 2012 12:10:27 +0900 Subject: f2fs: remove unneeded variable from f2fs_sync_fs We can directly return '0' from the function, instead of introducing a 'ret' variable. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cf0ffb800654..08a94c814bdc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -119,7 +119,6 @@ static void f2fs_put_super(struct super_block *sb) int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret = 0; if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; @@ -127,7 +126,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) write_checkpoint(sbi, false, false); - return ret; + return 0; } static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) -- cgit v1.2.3 From ce19a5d4321911f98d42e4d724630ae48f413719 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Dec 2012 12:03:22 +0900 Subject: f2fs: clean up the start_bidx_of_node function This patch also resolves the following warning reported by kbuild test robot. fs/f2fs/gc.c: In function 'start_bidx_of_node': fs/f2fs/gc.c:453:21: warning: 'bidx' may be used uninitialized in this function Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 644aa3808273..eda8230deb0c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -430,28 +430,22 @@ next_step: */ block_t start_bidx_of_node(unsigned int node_ofs) { - block_t start_bidx; - unsigned int bidx, indirect_blks; - int dec; + unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; + unsigned int bidx; - indirect_blks = 2 * NIDS_PER_BLOCK + 4; + if (node_ofs == 0) + return 0; - start_bidx = 1; - if (node_ofs == 0) { - start_bidx = 0; - } else if (node_ofs <= 2) { + if (node_ofs <= 2) { bidx = node_ofs - 1; } else if (node_ofs <= indirect_blks) { - dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 2 - dec; } else { - dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 5 - dec; } - - if (start_bidx) - start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE; - return start_bidx; + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE; } static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, -- cgit v1.2.3 From 2b50638decdb9a8585654a5acf1c8ce5962f1951 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Dec 2012 14:39:50 +0900 Subject: f2fs: clean up unused variables and return values This patch cleans up a couple of unnecessary codes related to unused variables and return values. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 12 +++--------- fs/f2fs/hash.c | 4 +--- fs/f2fs/node.c | 6 +----- 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index eda8230deb0c..b0ec721e984a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -390,9 +390,7 @@ next_step: } err = check_valid_map(sbi, segno, off); - if (err == GC_ERROR) - return err; - else if (err == GC_NEXT) + if (err == GC_NEXT) continue; if (initial) { @@ -550,9 +548,7 @@ next_step: } err = check_valid_map(sbi, segno, off); - if (err == GC_ERROR) - goto stop; - else if (err == GC_NEXT) + if (err == GC_NEXT) continue; if (phase == 0) { @@ -562,9 +558,7 @@ next_step: /* Get an inode by ino with checking validity */ err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); - if (err == GC_ERROR) - goto stop; - else if (err == GC_NEXT) + if (err == GC_NEXT) continue; if (phase == 1) { diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5e48baca3597..6977415c52fc 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -71,7 +71,7 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) f2fs_hash_t f2fs_dentry_hash(const char *name, int len) { - __u32 hash, minor_hash; + __u32 hash; f2fs_hash_t f2fs_hash; const char *p; __u32 in[8], buf[4]; @@ -94,8 +94,6 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len) p += 16; } hash = buf[0]; - minor_hash = buf[1]; - f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); return f2fs_hash; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e85643cc74a9..5066bfd256c9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1088,7 +1088,6 @@ static int f2fs_write_node_page(struct page *page, { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); nid_t nid; - unsigned int nofs; block_t new_addr; struct node_info ni; @@ -1105,7 +1104,6 @@ static int f2fs_write_node_page(struct page *page, /* get old block addr of this node page */ nid = nid_of_node(page); - nofs = ofs_of_node(page); BUG_ON(page->index != nid); get_node_info(sbi, nid, &ni); @@ -1566,7 +1564,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t nid; struct f2fs_nat_entry raw_ne; int offset = -1; - block_t old_blkaddr, new_blkaddr; + block_t new_blkaddr; ne = list_entry(cur, struct nat_entry, list); nid = nat_get_nid(ne); @@ -1580,7 +1578,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); if (offset >= 0) { raw_ne = nat_in_journal(sum, offset); - old_blkaddr = le32_to_cpu(raw_ne.block_addr); goto flush_now; } to_nat_page: @@ -1602,7 +1599,6 @@ to_nat_page: BUG_ON(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; - old_blkaddr = le32_to_cpu(raw_ne.block_addr); flush_now: new_blkaddr = nat_get_blkaddr(ne); -- cgit v1.2.3 From 9836b8b9499cb25ea32cad9fff640eef874c5431 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 27 Dec 2012 19:55:46 +0200 Subject: f2fs: unify string length declarations and usage This patch is intended to unify string length declarations and usage. There are number of calls to strlen which return size_t object. The size of this object depends on compiler if it will be bigger, equal or even smaller than an unsigned int Signed-off-by: Leon Romanovsky Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/hash.c | 10 ++++++---- fs/f2fs/namei.c | 6 +++--- fs/f2fs/xattr.c | 5 +++-- 5 files changed, 18 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4a78d6c4f3a7..951ed52748f6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -75,7 +75,7 @@ static unsigned long dir_block_index(unsigned int level, unsigned int idx) return bidx; } -static bool early_match_name(const char *name, int namelen, +static bool early_match_name(const char *name, size_t namelen, f2fs_hash_t namehash, struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) @@ -88,7 +88,7 @@ static bool early_match_name(const char *name, int namelen, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - const char *name, int namelen, int *max_slots, + const char *name, size_t namelen, int *max_slots, f2fs_hash_t namehash, struct page **res_page) { struct f2fs_dir_entry *de; @@ -127,7 +127,7 @@ found: } static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, const char *name, int namelen, + unsigned int level, const char *name, size_t namelen, f2fs_hash_t namehash, struct page **res_page) { int s = GET_DENTRY_SLOTS(namelen); @@ -182,7 +182,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, struct page **res_page) { const char *name = child->name; - int namelen = child->len; + size_t namelen = child->len; unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; f2fs_hash_t name_hash; @@ -383,7 +383,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode) struct inode *dir = dentry->d_parent->d_inode; struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + size_t namelen = dentry->d_name.len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a18d63db2fb6..13c6dfbb7183 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -881,7 +881,7 @@ int f2fs_sync_fs(struct super_block *, int); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const char *, int); +f2fs_hash_t f2fs_dentry_hash(const char *, size_t); /* * node.c diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 6977415c52fc..6eb8d269b53b 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -42,7 +42,7 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[]) buf[1] += b1; } -static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) +static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) { unsigned pad, val; int i; @@ -69,7 +69,7 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const char *name, int len) +f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -87,11 +87,13 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len) buf[3] = 0x10325476; p = name; - while (len > 0) { + while (1) { str2hashbuf(p, len, in, 4); TEA_transform(buf, in); - len -= 16; p += 16; + if (len <= 16) + break; + len -= 16; } hash = buf[0]; f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b42389f80011..1a49b881bac0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -77,8 +77,8 @@ fail: static int is_multimedia_file(const unsigned char *s, const char *sub) { - int slen = strlen(s); - int sublen = strlen(sub); + size_t slen = strlen(s); + size_t sublen = strlen(sub); int ret; if (sublen > slen) @@ -250,7 +250,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct super_block *sb = dir->i_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - unsigned symlen = strlen(symname) + 1; + size_t symlen = strlen(symname) + 1; int err; f2fs_balance_fs(sbi); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7d52e8dc0c59..940136a3d3a6 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -208,7 +208,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, struct page *page; void *base_addr; int error = 0, found = 0; - int value_len, name_len; + size_t value_len, name_len; if (name == NULL) return -EINVAL; @@ -304,7 +304,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, struct f2fs_xattr_entry *here, *last; struct page *page; void *base_addr; - int error, found, free, name_len, newsize; + int error, found, free, newsize; + size_t name_len; char *pval; if (name == NULL) -- cgit v1.2.3 From ea702b80e0bbb2448e201472127288beb82ca2fe Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 27 Dec 2012 07:28:55 -0500 Subject: cifs: move check for NULL socket into smb_send_rqst Cai reported this oops: [90701.616664] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 [90701.625438] IP: [] kernel_setsockopt+0x2e/0x60 [90701.632167] PGD fea319067 PUD 103fda4067 PMD 0 [90701.637255] Oops: 0000 [#1] SMP [90701.640878] Modules linked in: des_generic md4 nls_utf8 cifs dns_resolver binfmt_misc tun sg igb iTCO_wdt iTCO_vendor_support lpc_ich pcspkr i2c_i801 i2c_core i7core_edac edac_core ioatdma dca mfd_core coretemp kvm_intel kvm crc32c_intel microcode sr_mod cdrom ata_generic sd_mod pata_acpi crc_t10dif ata_piix libata megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [90701.677655] CPU 10 [90701.679808] Pid: 9627, comm: ls Tainted: G W 3.7.1+ #10 QCI QSSC-S4R/QSSC-S4R [90701.688950] RIP: 0010:[] [] kernel_setsockopt+0x2e/0x60 [90701.698383] RSP: 0018:ffff88177b431bb8 EFLAGS: 00010206 [90701.704309] RAX: ffff88177b431fd8 RBX: 00007ffffffff000 RCX: ffff88177b431bec [90701.712271] RDX: 0000000000000003 RSI: 0000000000000006 RDI: 0000000000000000 [90701.720223] RBP: ffff88177b431bc8 R08: 0000000000000004 R09: 0000000000000000 [90701.728185] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 [90701.736147] R13: ffff88184ef92000 R14: 0000000000000023 R15: ffff88177b431c88 [90701.744109] FS: 00007fd56a1a47c0(0000) GS:ffff88105fc40000(0000) knlGS:0000000000000000 [90701.753137] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [90701.759550] CR2: 0000000000000028 CR3: 000000104f15f000 CR4: 00000000000007e0 [90701.767512] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [90701.775465] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [90701.783428] Process ls (pid: 9627, threadinfo ffff88177b430000, task ffff88185ca4cb60) [90701.792261] Stack: [90701.794505] 0000000000000023 ffff88177b431c50 ffff88177b431c38 ffffffffa014fcb1 [90701.802809] ffff88184ef921bc 0000000000000000 00000001ffffffff ffff88184ef921c0 [90701.811123] ffff88177b431c08 ffffffff815ca3d9 ffff88177b431c18 ffff880857758000 [90701.819433] Call Trace: [90701.822183] [] smb_send_rqst+0x71/0x1f0 [cifs] [90701.828991] [] ? schedule+0x29/0x70 [90701.834736] [] smb_sendv+0x3d/0x40 [cifs] [90701.841062] [] smb_send+0x26/0x30 [cifs] [90701.847291] [] send_nt_cancel+0x6f/0xd0 [cifs] [90701.854102] [] SendReceive+0x18e/0x360 [cifs] [90701.860814] [] CIFSFindFirst+0x1a8/0x3f0 [cifs] [90701.867724] [] ? build_path_from_dentry+0xf1/0x260 [cifs] [90701.875601] [] ? build_path_from_dentry+0xf1/0x260 [cifs] [90701.883477] [] cifs_query_dir_first+0x26/0x30 [cifs] [90701.890869] [] initiate_cifs_search+0xed/0x250 [cifs] [90701.898354] [] ? fillonedir+0x100/0x100 [90701.904486] [] cifs_readdir+0x45b/0x8f0 [cifs] [90701.911288] [] ? fillonedir+0x100/0x100 [90701.917410] [] ? fillonedir+0x100/0x100 [90701.923533] [] ? fillonedir+0x100/0x100 [90701.929657] [] vfs_readdir+0xb8/0xe0 [90701.935490] [] sys_getdents+0x8f/0x110 [90701.941521] [] system_call_fastpath+0x16/0x1b [90701.948222] Code: 66 90 55 65 48 8b 04 25 f0 c6 00 00 48 89 e5 53 48 83 ec 08 83 fe 01 48 8b 98 48 e0 ff ff 48 c7 80 48 e0 ff ff ff ff ff ff 74 22 <48> 8b 47 28 ff 50 68 65 48 8b 14 25 f0 c6 00 00 48 89 9a 48 e0 [90701.970313] RIP [] kernel_setsockopt+0x2e/0x60 [90701.977125] RSP [90701.981018] CR2: 0000000000000028 [90701.984809] ---[ end trace 24bd602971110a43 ]--- This is likely due to a race vs. a reconnection event. The current code checks for a NULL socket in smb_send_kvec, but that's too late. By the time that check is done, the socket will already have been passed to kernel_setsockopt. Move the check into smb_send_rqst, so that it's checked earlier. In truth, this is a bit of a half-assed fix. The -ENOTSOCK error return here looks like it could bubble back up to userspace. The locking rules around the ssocket pointer are really unclear as well. There are cases where the ssocket pointer is changed without holding the srv_mutex, but I'm not clear whether there's a potential race here yet or not. This code seems like it could benefit from some fundamental re-think of how the socket handling should behave. Until then though, this patch should at least fix the above oops in most cases. Cc: # 3.7+ Reported-and-Tested-by: CAI Qian Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 76d974c952fe..1a528680ec5a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -144,9 +144,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, *sent = 0; - if (ssocket == NULL) - return -ENOTSOCK; /* BB eventually add reconnect code here */ - smb_msg.msg_name = (struct sockaddr *) &server->dstaddr; smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; @@ -291,6 +288,9 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct socket *ssocket = server->ssocket; int val = 1; + if (ssocket == NULL) + return -ENOTSOCK; + cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); -- cgit v1.2.3 From 31efee60f489c759c341454d755a9fd13de8c03d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 27 Dec 2012 08:05:03 -0500 Subject: cifs: adjust sequence number downward after signing NT_CANCEL request When a call goes out, the signing code adjusts the sequence number upward by two to account for the request and the response. An NT_CANCEL however doesn't get a response of its own, it just hurries the server along to get it to respond to the original request more quickly. Therefore, we must adjust the sequence number back down by one after signing a NT_CANCEL request. Cc: Reported-by: Tim Perry Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index a5d234c8d5d9..dd79056c0581 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -53,6 +53,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, mutex_unlock(&server->srv_mutex); return rc; } + + /* + * The response to this call was already factored into the sequence + * number when the call went out, so we must adjust it back downward + * after signing here. + */ + --server->sequence_number; rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); mutex_unlock(&server->srv_mutex); -- cgit v1.2.3 From ca8aa29c60238720af2ca2a5caab25fa0c70067e Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 21 Dec 2012 15:05:47 +0400 Subject: Revert "CIFS: Fix write after setting a read lock for read oplock files" that solution has data races and can end up two identical writes to the server: when clientCanCacheAll value can be changed during the execution of __generic_file_aio_write. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 1 - fs/cifs/cifsglob.h | 1 - fs/cifs/file.c | 94 ++++++++++++++++++------------------------------------ 3 files changed, 31 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f653835d067b..de7f9168a118 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -228,7 +228,6 @@ cifs_alloc_inode(struct super_block *sb) cifs_set_oplock_level(cifs_inode, 0); cifs_inode->delete_pending = false; cifs_inode->invalid_mapping = false; - cifs_inode->leave_pages_clean = false; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index aea1eec64911..dfab450a191e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1030,7 +1030,6 @@ struct cifsInodeInfo { bool clientCanCacheAll; /* read and writebehind oplock */ bool delete_pending; /* DELETE_ON_CLOSE is set */ bool invalid_mapping; /* pagecache is invalid */ - bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */ unsigned long time; /* jiffies of last update of inode */ u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0a6677ba212b..1b322d041f1e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2103,15 +2103,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, } else { rc = copied; pos += copied; - /* - * When we use strict cache mode and cifs_strict_writev was run - * with level II oplock (indicated by leave_pages_clean field of - * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev - * sent the data to the server itself. - */ - if (!CIFS_I(inode)->leave_pages_clean || - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)) - set_page_dirty(page); + set_page_dirty(page); } if (rc > 0) { @@ -2462,8 +2454,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, } static ssize_t -cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool cache_ex) +cifs_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; @@ -2485,12 +2477,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov, server->vals->exclusive_lock_type, NULL, CIFS_WRITE_OP)) { mutex_lock(&inode->i_mutex); - if (!cache_ex) - cinode->leave_pages_clean = true; rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - if (!cache_ex) - cinode->leave_pages_clean = false; + &iocb->ki_pos); mutex_unlock(&inode->i_mutex); } @@ -2517,62 +2505,42 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - ssize_t written, written2; + +#ifdef CONFIG_CIFS_SMB2 /* - * We need to store clientCanCacheAll here to prevent race - * conditions - this value can be changed during an execution - * of generic_file_aio_write. For CIFS it can be changed from - * true to false only, but for SMB2 it can be changed both from - * true to false and vice versa. So, we can end up with a data - * stored in the cache, not marked dirty and not sent to the - * server if this value changes its state from false to true - * after cifs_write_end. + * If we have an oplock for read and want to write a data to the file + * we need to store it in the page cache and then push it to the server + * to be sure the next read will get a valid data. */ - bool cache_ex = cinode->clientCanCacheAll; - bool cache_read = cinode->clientCanCacheRead; - int rc; - loff_t saved_pos; + if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { + ssize_t written; + int rc; + + written = generic_file_aio_write(iocb, iov, nr_segs, pos); + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + return (ssize_t)rc; - if (cache_ex) { - if (cap_unix(tcon->ses) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( - tcon->fsUnixInfo.Capability))) - return generic_file_aio_write(iocb, iov, nr_segs, pos); - return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex); + return written; } +#endif /* - * For files without exclusive oplock in strict cache mode we need to - * write the data to the server exactly from the pos to pos+len-1 rather - * than flush all affected pages because it may cause a error with - * mandatory locks on these pages but not on the region from pos to - * ppos+len-1. + * For non-oplocked files in strict cache mode we need to write the data + * to the server exactly from the pos to pos+len-1 rather than flush all + * affected pages because it may cause a error with mandatory locks on + * these pages but not on the region from pos to ppos+len-1. */ - written = cifs_user_writev(iocb, iov, nr_segs, pos); - if (!cache_read || written <= 0) - return written; - saved_pos = iocb->ki_pos; - iocb->ki_pos = pos; - /* we have a read oplock - need to store a data in the page cache */ + if (!cinode->clientCanCacheAll) + return cifs_user_writev(iocb, iov, nr_segs, pos); + if (cap_unix(tcon->ses) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( - tcon->fsUnixInfo.Capability))) - written2 = generic_file_aio_write(iocb, iov, nr_segs, pos); - else - written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos, - cache_ex); - /* errors occured during writing - invalidate the page cache */ - if (written2 < 0) { - rc = cifs_invalidate_mapping(inode); - if (rc) - written = (ssize_t)rc; - else - iocb->ki_pos = saved_pos; - } - return written; + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + + return cifs_writev(iocb, iov, nr_segs, pos); } static struct cifs_readdata * -- cgit v1.2.3 From 88cf75aaaf27a652b3e85960ac3060172dd3edac Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 21 Dec 2012 15:07:52 +0400 Subject: CIFS: Fix write after setting a read lock for read oplock files If we have a read oplock and set a read lock in it, we can't write to the locked area - so, filemap_fdatawrite may fail with a no information for a userspace application even if we request a write to non-locked area. Fix this by writing directly to the server and then breaking oplock level from level2 to None. Also remove CONFIG_CIFS_SMB2 ifdefs because it's suitable for both CIFS and SMB2 protocols. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1b322d041f1e..22c37254b64e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2505,42 +2505,34 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + ssize_t written; -#ifdef CONFIG_CIFS_SMB2 - /* - * If we have an oplock for read and want to write a data to the file - * we need to store it in the page cache and then push it to the server - * to be sure the next read will get a valid data. - */ - if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { - ssize_t written; - int rc; - - written = generic_file_aio_write(iocb, iov, nr_segs, pos); - rc = filemap_fdatawrite(inode->i_mapping); - if (rc) - return (ssize_t)rc; - - return written; + if (cinode->clientCanCacheAll) { + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) + && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + return cifs_writev(iocb, iov, nr_segs, pos); } -#endif - /* * For non-oplocked files in strict cache mode we need to write the data * to the server exactly from the pos to pos+len-1 rather than flush all * affected pages because it may cause a error with mandatory locks on * these pages but not on the region from pos to ppos+len-1. */ - - if (!cinode->clientCanCacheAll) - return cifs_user_writev(iocb, iov, nr_segs, pos); - - if (cap_unix(tcon->ses) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) - return generic_file_aio_write(iocb, iov, nr_segs, pos); - - return cifs_writev(iocb, iov, nr_segs, pos); + written = cifs_user_writev(iocb, iov, nr_segs, pos); + if (written > 0 && cinode->clientCanCacheRead) { + /* + * Windows 7 server can delay breaking level2 oplock if a write + * request comes - break it on the client to prevent reading + * an old data. + */ + cifs_invalidate_mapping(inode); + cFYI(1, "Set no oplock for inode=%p after a write operation", + inode); + cinode->clientCanCacheRead = false; + } + return written; } static struct cifs_readdata * -- cgit v1.2.3 From 63b7d3a41ccadef971a4ffbe6662119d4275ebf9 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 24 Dec 2012 14:41:19 +0400 Subject: CIFS: Don't let read only caching for mandatory byte-range locked files If we have mandatory byte-range locks on a file we can't cache reads because pagereading may have conflicts with these locks on the server. That's why we should allow level2 oplocks for files without mandatory locks only. Signed-off-by: Pavel Shilovsky Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/cifs/smb1ops.c | 1 + fs/cifs/smb2ops.c | 2 ++ 4 files changed, 57 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index dfab450a191e..e6899cea1c35 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -386,6 +386,7 @@ struct smb_version_values { unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; + unsigned int oplock_read; }; #define HEADER_SIZE(server) (server->vals->header_size) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 22c37254b64e..8ea6ca50a665 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -238,6 +238,23 @@ out: return rc; } +static bool +cifs_has_mand_locks(struct cifsInodeInfo *cinode) +{ + struct cifs_fid_locks *cur; + bool has_locks = false; + + down_read(&cinode->lock_sem); + list_for_each_entry(cur, &cinode->llist, llist) { + if (!list_empty(&cur->locks)) { + has_locks = true; + break; + } + } + up_read(&cinode->lock_sem); + return has_locks; +} + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct cifsFileInfo *cfile; struct cifs_fid_locks *fdlocks; struct cifs_tcon *tcon = tlink_tcon(tlink); + struct TCP_Server_Info *server = tcon->ses->server; cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (cfile == NULL) @@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_WORK(&cfile->oplock_break, cifs_oplock_break); mutex_init(&cfile->fh_mutex); + /* + * If the server returned a read oplock and we have mandatory brlocks, + * set oplock level to None. + */ + if (oplock == server->vals->oplock_read && + cifs_has_mand_locks(cinode)) { + cFYI(1, "Reset oplock val from read to None due to mand locks"); + oplock = 0; + } + spin_lock(&cifs_file_list_lock); - if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) + if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) oplock = fid->pending_open->oplock; list_del(&fid->pending_open->olist); - tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); + server->ops->set_fid(cfile, fid, oplock); list_add(&cfile->tlist, &tcon->openFileList); /* if readable file instance put first in list*/ @@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; + struct inode *inode = cfile->dentry->d_inode; if (posix_lck) { int posix_lock_type; @@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (!rc) goto out; + /* + * Windows 7 server can delay breaking lease from read to None + * if we set a byte-range lock on a file - break it explicitly + * before sending the lock to the server to be sure the next + * read won't conflict with non-overlapted locks due to + * pagereading. + */ + if (!CIFS_I(inode)->clientCanCacheAll && + CIFS_I(inode)->clientCanCacheRead) { + cifs_invalidate_mapping(inode); + cFYI(1, "Set no oplock for inode=%p due to mand locks", + inode); + CIFS_I(inode)->clientCanCacheRead = false; + } + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1, 0, wait_flag); if (rc) { @@ -3537,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; + if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && + cifs_has_mand_locks(cinode)) { + cFYI(1, "Reset oplock to None for inode=%p due to mand locks", + inode); + cinode->clientCanCacheRead = false; + } + if (inode && S_ISREG(inode->i_mode)) { if (cinode->clientCanCacheRead) break_lease(inode, O_RDONLY); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index dd79056c0581..47bc5a87f94e 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -959,4 +959,5 @@ struct smb_version_values smb1_values = { .cap_unix = CAP_UNIX, .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, .cap_large_files = CAP_LARGE_FILES, + .oplock_read = OPLOCK_READ, }; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d79de7bc4435..c9c7aa7ed966 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -708,6 +708,7 @@ struct smb_version_values smb20_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, + .oplock_read = SMB2_OPLOCK_LEVEL_II, }; struct smb_version_values smb21_values = { @@ -725,6 +726,7 @@ struct smb_version_values smb21_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, + .oplock_read = SMB2_OPLOCK_LEVEL_II, }; struct smb_version_values smb30_values = { -- cgit v1.2.3 From ec1487528bed94c4aaff3687834fe94203880fd6 Mon Sep 17 00:00:00 2001 From: Nathan Straz Date: Tue, 11 Dec 2012 17:01:24 -0500 Subject: GFS2: Initialize hex string to '0' When generating the DLM lock name, a value of 0 would skip the loop and leave the string unchanged. This left locks with a value of 0 unlabeled. Initializing the string to '0' fixes this. Signed-off-by: Nathan Straz Signed-off-by: Steven Whitehouse --- fs/gfs2/lock_dlm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 8dad6b093716..b906ed17a839 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -241,6 +241,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, static void gfs2_reverse_hex(char *c, u64 value) { + *c = '0'; while (value) { *c-- = hex_asc[value & 0x0f]; value >>= 4; -- cgit v1.2.3 From f1213cacc7ffc7d4cdef3692f22b28a2df3216f5 Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Wed, 19 Dec 2012 10:48:01 -0500 Subject: GFS2: Fix race in gfs2_rs_alloc QE aio tests uncovered a race condition in gfs2_rs_alloc where it's possible to come out of the function with a valid ip->i_res allocation but it gets freed before use resulting in a NULL ptr dereference. This patch envelopes the initial short-circuit check for non-NULL ip->i_res into the mutex lock. With this patch, I was able to successfully run the reproducer test multiple times. Resolves: rhbz#878476 Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 37ee061d899e..738b3888adc6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -557,22 +557,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rs_alloc(struct gfs2_inode *ip) { - struct gfs2_blkreserv *res; + int error = 0; + down_write(&ip->i_rw_mutex); if (ip->i_res) - return 0; - - res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!res) - return -ENOMEM; + goto out; - RB_CLEAR_NODE(&res->rs_node); + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) { + error = -ENOMEM; + goto out; + } - down_write(&ip->i_rw_mutex); - if (ip->i_res) - kmem_cache_free(gfs2_rsrv_cachep, res); - else - ip->i_res = res; + RB_CLEAR_NODE(&ip->i_res->rs_node); +out: up_write(&ip->i_rw_mutex); return 0; } -- cgit v1.2.3 From 15bd50ad82a6d3421af1abe82e2554898abc4141 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 20 Dec 2012 13:21:07 -0500 Subject: GFS2: Stop looking for free blocks at end of rgrp This patch adds a return code check after calling function gfs2_rbm_from_block while determining the free extent size. That way, when the end of an rgrp is reached, it won't try to process unaligned blocks after the end. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 738b3888adc6..712dd4fd8641 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) BUG_ON(len < chunk_size); len -= chunk_size; block = gfs2_rbm_to_block(&rbm); - gfs2_rbm_from_block(&rbm, block + chunk_size); - n_unaligned = 3; - if (ptr) + if (gfs2_rbm_from_block(&rbm, block + chunk_size)) { + n_unaligned = 0; break; + } + if (ptr) { + n_unaligned = 3; + break; + } n_unaligned = len & 3; } -- cgit v1.2.3 From 13d2eb012927b03ac1b80202af5aa9abc4003bd5 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 20 Dec 2012 13:23:04 -0500 Subject: GFS2: Reset rd_last_alloc when it reaches the end of the rgrp In function rg_mblk_search, it's searching for multiple blocks in a given state (e.g. "free"). If there's an active block reservation its goal is the next free block of that. If the resource group contains the dinode's goal block, that's used for the search. But if neither is the case, it uses the rgrp's last allocated block. That way, consecutive allocations appear after one another on media. The problem comes in when you hit the end of the rgrp; it would never start over and search from the beginning. This became a problem, since if you deleted all the files and data from the rgrp, it would never start over and find free blocks. So it had to keep searching further out on the media to allocate blocks. This patch resets the rd_last_alloc after it does an unsuccessful search at the end of the rgrp. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 712dd4fd8641..b7eff078fe90 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1426,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, rs->rs_free = extlen; rs->rs_inum = ip->i_no_addr; rs_insert(ip); + } else { + if (goal == rgd->rd_last_alloc + rgd->rd_data0) + rgd->rd_last_alloc = 0; } } -- cgit v1.2.3 From f8d9a897d4384b77f13781ea813156568f68b83e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 3 Jan 2013 16:42:29 -0500 Subject: NFS: Fix access to suid/sgid executables nfs_open_permission_mask() should only check MAY_EXEC for files that are opened with __FMODE_EXEC. Also fix NFSv4 access-in-open path in a similar way -- openflags must be used because fmode will not always have FMODE_EXEC set. This patch fixes https://bugzilla.kernel.org/show_bug.cgi?id=49101 Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/dir.c | 16 ++++++++++------ fs/nfs/nfs4proc.c | 18 +++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32e6c53520e2..1b2d7eb93796 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2153,12 +2153,16 @@ static int nfs_open_permission_mask(int openflags) { int mask = 0; - if ((openflags & O_ACCMODE) != O_WRONLY) - mask |= MAY_READ; - if ((openflags & O_ACCMODE) != O_RDONLY) - mask |= MAY_WRITE; - if (openflags & __FMODE_EXEC) - mask |= MAY_EXEC; + if (openflags & __FMODE_EXEC) { + /* ONLY check exec rights */ + mask = MAY_EXEC; + } else { + if ((openflags & O_ACCMODE) != O_WRONLY) + mask |= MAY_READ; + if ((openflags & O_ACCMODE) != O_RDONLY) + mask |= MAY_WRITE; + } + return mask; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5d864fb36578..cf747ef86650 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1626,7 +1626,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) static int nfs4_opendata_access(struct rpc_cred *cred, struct nfs4_opendata *opendata, - struct nfs4_state *state, fmode_t fmode) + struct nfs4_state *state, fmode_t fmode, + int openflags) { struct nfs_access_entry cache; u32 mask; @@ -1638,11 +1639,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred, mask = 0; /* don't check MAY_WRITE - a newly created file may not have - * write mode bits, but POSIX allows the creating process to write */ - if (fmode & FMODE_READ) - mask |= MAY_READ; - if (fmode & FMODE_EXEC) - mask |= MAY_EXEC; + * write mode bits, but POSIX allows the creating process to write. + * use openflags to check for exec, because fmode won't + * always have FMODE_EXEC set when file open for exec. */ + if (openflags & __FMODE_EXEC) { + /* ONLY check for exec rights */ + mask = MAY_EXEC; + } else if (fmode & FMODE_READ) + mask = MAY_READ; cache.cred = cred; cache.jiffies = jiffies; @@ -1896,7 +1900,7 @@ static int _nfs4_do_open(struct inode *dir, if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); - status = nfs4_opendata_access(cred, opendata, state, fmode); + status = nfs4_opendata_access(cred, opendata, state, fmode, flags); if (status != 0) goto err_opendata_put; -- cgit v1.2.3 From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:02:05 -0800 Subject: pstore: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from the pstore filesystem. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 14 ++++++-------- fs/pstore/ram_core.c | 9 ++++----- include/linux/pstore_ram.h | 5 ++--- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index f883e7e74305..7003e5266f25 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -291,9 +291,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt) kfree(cxt->przs); } -static int __devinit ramoops_init_przs(struct device *dev, - struct ramoops_context *cxt, - phys_addr_t *paddr, size_t dump_mem_sz) +static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, + phys_addr_t *paddr, size_t dump_mem_sz) { int err = -ENOMEM; int i; @@ -336,10 +335,9 @@ fail_prz: return err; } -static int __devinit ramoops_init_prz(struct device *dev, - struct ramoops_context *cxt, - struct persistent_ram_zone **prz, - phys_addr_t *paddr, size_t sz, u32 sig) +static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone **prz, + phys_addr_t *paddr, size_t sz, u32 sig) { if (!sz) return 0; @@ -367,7 +365,7 @@ static int __devinit ramoops_init_prz(struct device *dev, return 0; } -static int __devinit ramoops_probe(struct platform_device *pdev) +static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = pdev->dev.platform_data; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index eecd2a8a84dd..0306303be372 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return 0; } -static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, - u32 sig, int ecc_size) +static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, + int ecc_size) { int ret; @@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) kfree(prz); } -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size) +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size) { struct persistent_ram_zone *prz; int ret = -ENOMEM; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 098d2a838296..cb6ab5feab67 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -46,9 +46,8 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size); +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From 6ae141718e3f9c7e2c620e999c86612a7f415bb1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:16:45 -0800 Subject: misc: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the last of the __dev* markings from the kernel from a variety of different, tiny, places. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 2 +- lib/Kconfig.debug | 2 +- samples/rpmsg/rpmsg_client_sample.c | 4 ++-- scripts/kernel-doc | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 15cb8618e95d..2b3570b7caeb 100644 --- a/fs/file.c +++ b/fs/file.c @@ -490,7 +490,7 @@ void exit_files(struct task_struct *tsk) } } -static void __devinit fdtable_defer_list_init(int cpu) +static void fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); spin_lock_init(&fddef->lock); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3a353091a903..67604e599384 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -134,7 +134,7 @@ config DEBUG_SECTION_MISMATCH any use of code/data previously in these sections would most likely result in an oops. In the code, functions and variables are annotated with - __init, __devinit, etc. (see the full list in include/linux/init.h), + __init, __cpuinit, etc. (see the full list in include/linux/init.h), which results in the code/data being placed in specific sections. The section mismatch analysis is always performed after a full kernel build, and enabling this option causes the following diff --git a/samples/rpmsg/rpmsg_client_sample.c b/samples/rpmsg/rpmsg_client_sample.c index 23ea9f2ae11d..59b13440813d 100644 --- a/samples/rpmsg/rpmsg_client_sample.c +++ b/samples/rpmsg/rpmsg_client_sample.c @@ -64,7 +64,7 @@ static int rpmsg_sample_probe(struct rpmsg_channel *rpdev) return 0; } -static void __devexit rpmsg_sample_remove(struct rpmsg_channel *rpdev) +static void rpmsg_sample_remove(struct rpmsg_channel *rpdev) { dev_info(&rpdev->dev, "rpmsg sample client driver is removed\n"); } @@ -81,7 +81,7 @@ static struct rpmsg_driver rpmsg_sample_client = { .id_table = rpmsg_driver_sample_id_table, .probe = rpmsg_sample_probe, .callback = rpmsg_sample_cb, - .remove = __devexit_p(rpmsg_sample_remove), + .remove = rpmsg_sample_remove, }; static int __init rpmsg_client_sample_init(void) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 28b761567815..f565536a2bef 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -2079,7 +2079,6 @@ sub dump_function($$) { $prototype =~ s/^__inline +//; $prototype =~ s/^__always_inline +//; $prototype =~ s/^noinline +//; - $prototype =~ s/__devinit +//; $prototype =~ s/__init +//; $prototype =~ s/__init_or_module +//; $prototype =~ s/__must_check +//; -- cgit v1.2.3 From 39e88fcfb1d5c6c4b1ff76ca2ab76cf449b850e8 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Fri, 4 Jan 2013 20:19:49 +0800 Subject: pnfs: Increase the refcount when LAYOUTGET fails the first time The layout will be set unusable if LAYOUTGET fails. Is it reasonable to increase the refcount iff LAYOUTGET fails the first time? Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e7165d915362..d00260b08103 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -254,7 +254,7 @@ static void pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; - if (test_and_set_bit(fail_bit, &lo->plh_flags)) + if (!test_and_set_bit(fail_bit, &lo->plh_flags)) atomic_inc(&lo->plh_refcount); } -- cgit v1.2.3 From e25fbe380c4e3c09afa98bcdcd9d3921443adab8 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 4 Jan 2013 03:22:57 -0500 Subject: nfs: fix null checking in nfs_get_option_str() The following null pointer check is broken. *option = match_strdup(args); return !option; The pointer `option' must be non-null, and thus `!option' is always false. Use `!*option' instead. The bug was introduced in commit c5cb09b6f8 ("Cleanup: Factor out some cut-and-paste code."). Signed-off-by: Xi Wang Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c25cadf8f8c4..2e7e8c878e5d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1152,7 +1152,7 @@ static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); *option = match_strdup(args); - return !option; + return !*option; } static int nfs_get_option_ul(substring_t args[], unsigned long *option) -- cgit v1.2.3 From 6db6dd7d3fd8f7c765dabc376493d6791ab28bd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Jan 2013 12:47:04 -0500 Subject: NFS: Ensure that we free the rpc_task after read and write cleanups are done This patch ensures that we free the rpc_task after the cleanup callbacks are done in order to avoid a deadlock problem that can be triggered if the callback needs to wait for another workqueue item to complete. Signed-off-by: Trond Myklebust Cc: Weston Andros Adamson Cc: Tejun Heo Cc: Bruce Fields Cc: stable@vger.kernel.org [>= 3.5] --- fs/nfs/read.c | 10 +++++++--- fs/nfs/write.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b6bdb18e892c..a5e5d9899d56 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata) put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) kfree(rdata->pages.pagevec); - if (rdata != &read_header->rpc_data) - kfree(rdata); - else + if (rdata == &read_header->rpc_data) { rdata->header = NULL; + rdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(rdata); } EXPORT_SYMBOL_GPL(nfs_readdata_release); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b673be31590e..c483cc50b82e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata) put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) kfree(wdata->pages.pagevec); - if (wdata != &write_header->rpc_data) - kfree(wdata); - else + if (wdata == &write_header->rpc_data) { wdata->header = NULL; + wdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(wdata); } EXPORT_SYMBOL_GPL(nfs_writedata_release); -- cgit v1.2.3 From ecf0eb9edbb607d74f74b73c14af8b43f3729528 Mon Sep 17 00:00:00 2001 From: Nickolai Zeldovich Date: Sat, 5 Jan 2013 14:19:51 -0500 Subject: nfs: avoid dereferencing null pointer in initiate_bulk_draining Fix an inverted null pointer check in initiate_bulk_draining(). Signed-off-by: Nickolai Zeldovich Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c89b26bc9759..264d1aa935f2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -206,7 +206,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, list_for_each_entry(lo, &server->layouts, plh_layouts) { ino = igrab(lo->plh_inode); - if (ino) + if (!ino) continue; spin_lock(&ino->i_lock); /* Is this layout in the process of being freed? */ -- cgit v1.2.3 From 96465efee14ecca0cdffcb09f9903635db8fc504 Mon Sep 17 00:00:00 2001 From: Valerie Aurora Date: Sun, 6 Jan 2013 23:38:44 -0500 Subject: ext4: fix configuration dependencies for ext4 ACLs and security labels Commit "ext4: Remove CONFIG_EXT4_FS_XATTR" removed the configuration dependencies for ext4 xattrs from the ext4 ACLs and security labels configuration options, but did not replace them with a dependency on ext4 itself. Add back the dependency on ext4 so the options only show up if ext4 is enabled. Signed-off-by: Valerie Aurora Signed-off-by: "Theodore Ts'o" Reviewed-by: Tao Ma --- fs/ext4/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 0a475c881852..987358740cb9 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23 config EXT4_FS_POSIX_ACL bool "Ext4 POSIX Access Control Lists" + depends on EXT4_FS select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and @@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL config EXT4_FS_SECURITY bool "Ext4 Security Labels" + depends on EXT4_FS help Security labels support alternative access control models implemented by security modules like SELinux. This option -- cgit v1.2.3 From 0ecaef0644973e9006fdbc6974301047aaff9bc6 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Sun, 6 Jan 2013 23:38:47 -0500 Subject: ext4: release buffer in failed path in dx_probe() If checksum fails, we should also release the buffer read from previous iteration. Signed-off-by: Guo Chao Signed-off-by: "Theodore Ts'o" Reviewed-by: Darrick J. Wong - Cc: stable@vger.kernel.org -- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8990165346ee..f8be1c288a1c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -722,7 +722,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, ext4_warning(dir->i_sb, "Node failed checksum"); brelse(bh); *err = ERR_BAD_DX_DIR; - goto fail; + goto fail2; } set_buffer_verified(bh); -- cgit v1.2.3 From fef0ebdb229bedce888b63923e2a1ba4e6c6a84c Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Sun, 6 Jan 2013 23:40:25 -0500 Subject: ext4: remove duplicate call to ext4_bread() in ext4_init_new_dir() This fixes a buffer cache leak when creating a directory, introduced in commit a774f9c20. Signed-off-by: Guo Chao Signed-off-by: "Theodore Ts'o" Reviewed-by: Tao Ma --- fs/ext4/namei.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f8be1c288a1c..f9ed946a448e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2368,7 +2368,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, } inode->i_size = EXT4_I(inode)->i_disksize = blocksize; - dir_block = ext4_bread(handle, inode, 0, 1, &err); if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { if (!err) { err = -EIO; -- cgit v1.2.3 From ae62ca7b03217be5e74759dc6d7698c95df498b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jan 2013 18:21:49 +0000 Subject: tcp: fix MSG_SENDPAGE_NOTLAST logic commit 35f9c09fe9c72e (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag : MSG_SENDPAGE_NOTLAST meant to be set on all frags but the last one for a splice() call. The condition used to set the flag in pipe_to_sendpage() relied on splice() user passing the exact number of bytes present in the pipe, or a smaller one. But some programs pass an arbitrary high value, and the test fails. The effect of this bug is a lack of tcp_push() at the end of a splice(pipe -> socket) call, and possibly very slow or erratic TCP sessions. We should both test sd->total_len and fact that another fragment is in the pipe (pipe->nrbufs > 1) Many thanks to Willy for providing very clear bug report, bisection and test programs. Reported-by: Willy Tarreau Bisected-by: Willy Tarreau Tested-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- fs/splice.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 8890604e3fcd..6909d89d0da5 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -696,8 +696,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - if (sd->len < sd->total_len) + + if (sd->len < sd->total_len && pipe->nrbufs > 1) more |= MSG_SENDPAGE_NOTLAST; + return file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); } -- cgit v1.2.3