diff options
Diffstat (limited to 'fs')
45 files changed, 550 insertions, 189 deletions
diff --git a/fs/Makefile b/fs/Makefile index 38bc735c67ad..dc20db348679 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -69,10 +69,12 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 +obj-$(CONFIG_EXT2_FS) += ext2/ +# We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2 +# unless explicitly requested by rootfstype +obj-$(CONFIG_EXT4_FS) += ext4/ obj-$(CONFIG_JBD) += jbd/ obj-$(CONFIG_JBD2) += jbd2/ -obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_SQUASHFS) += squashfs/ obj-y += ramfs/ @@ -302,7 +302,7 @@ void bio_init(struct bio *bio) struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = NULL; - void *p; + void *uninitialized_var(p); if (bs) { p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a8c9693b75ac..72677ce2b74f 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -66,6 +66,9 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; + /* the space_info for where this inode's data allocations are done */ + struct btrfs_space_info *space_info; + /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ @@ -94,6 +97,11 @@ struct btrfs_inode { */ u64 delalloc_bytes; + /* total number of bytes that may be used for this inode for + * delalloc + */ + u64 reserved_bytes; + /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 42491d728e99..37f31b5529aa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -277,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (*cow_ret == buf) unlock_orig = 1; - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (parent) parent_start = parent->start; @@ -2365,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); @@ -2562,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems == 0) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); @@ -4101,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) next = read_node_slot(root, c, slot); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(c)); + btrfs_assert_tree_locked(c); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } @@ -4126,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_assert_tree_locked(path->nodes[level]); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 766b31ae3186..82491ba8fa40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,13 +596,27 @@ struct btrfs_block_group_item { struct btrfs_space_info { u64 flags; - u64 total_bytes; - u64 bytes_used; - u64 bytes_pinned; - u64 bytes_reserved; - u64 bytes_readonly; - int full; - int force_alloc; + + u64 total_bytes; /* total bytes in the space */ + u64 bytes_used; /* total bytes used on disk */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + /* delalloc accounting */ + u64 bytes_delalloc; /* number of bytes reserved for allocation, + this space is not necessarily reserved yet + by the allocator */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc */ + + int full; /* indicates that we cannot allocate any more + chunks for this space */ + int force_alloc; /* set if we need to force a chunk alloc for + this space */ + struct list_head list; /* for block groups in our same type */ @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes); +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index adda739a0215..3e18175248e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -857,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); /* ugh, clear_extent_buffer_dirty can be expensive */ btrfs_set_lock_blocking(buf); @@ -2361,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) btrfs_set_lock_blocking(buf); - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0a5d796c9f7e..9abf81f71c46 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags, int force); + static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_pinned = 0; found->bytes_reserved = 0; found->bytes_readonly = 0; + found->bytes_delalloc = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1972,6 +1977,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return flags; } +static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) +{ + struct btrfs_fs_info *info = root->fs_info; + u64 alloc_profile; + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (root == root->fs_info->chunk_root) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + return btrfs_reduce_alloc_profile(root, data); +} + +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) +{ + u64 alloc_target; + + alloc_target = btrfs_get_alloc_profile(root, 1); + BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, + alloc_target); +} + +/* + * for now this just makes sure we have at least 5% of our metadata space free + * for use. + */ +int btrfs_check_metadata_free_space(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 alloc_target, thresh; + int committed = 0, ret; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + +again: + spin_lock(&meta_sinfo->lock); + if (!meta_sinfo->full) + thresh = meta_sinfo->total_bytes * 80; + else + thresh = meta_sinfo->total_bytes * 95; + + do_div(thresh, 100); + + if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + struct btrfs_trans_handle *trans; + if (!meta_sinfo->full) { + meta_sinfo->force_alloc = 1; + spin_unlock(&meta_sinfo->lock); + + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + 2 * 1024 * 1024, alloc_target, 0); + btrfs_end_transaction(trans, root); + goto again; + } + spin_unlock(&meta_sinfo->lock); + + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } + return -ENOSPC; + } + spin_unlock(&meta_sinfo->lock); + + return 0; +} + +/* + * This will check the space that the inode allocates from to make sure we have + * enough space for bytes. + */ +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + int ret = 0, committed = 0; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; +again: + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + if (data_sinfo->total_bytes - data_sinfo->bytes_used - + data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - + data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - + data_sinfo->bytes_may_use < bytes) { + struct btrfs_trans_handle *trans; + + /* + * if we don't have enough free bytes in this space then we need + * to alloc a new chunk. + */ + if (!data_sinfo->full) { + u64 alloc_target; + + data_sinfo->force_alloc = 1; + spin_unlock(&data_sinfo->lock); + + alloc_target = btrfs_get_alloc_profile(root, 1); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + bytes + 2 * 1024 * 1024, + alloc_target, 0); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + goto again; + } + spin_unlock(&data_sinfo->lock); + + /* commit the current transaction and try again */ + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } + + printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" + ", %llu bytes_used, %llu bytes_reserved, " + "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu total\n", bytes, data_sinfo->bytes_delalloc, + data_sinfo->bytes_used, data_sinfo->bytes_reserved, + data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, + data_sinfo->bytes_may_use, data_sinfo->total_bytes); + return -ENOSPC; + } + data_sinfo->bytes_may_use += bytes; + BTRFS_I(inode)->reserved_bytes += bytes; + spin_unlock(&data_sinfo->lock); + + return btrfs_check_metadata_free_space(root); +} + +/* + * if there was an error for whatever reason after calling + * btrfs_check_data_free_space, call this so we can cleanup the counters. + */ +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + spin_unlock(&data_sinfo->lock); +} + +/* called when we are adding a delalloc extent to the inode's io_tree */ +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* get the space info for where this inode will be storing its data */ + data_sinfo = BTRFS_I(inode)->space_info; + + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_delalloc += bytes; + + /* + * we are adding a delalloc extent without calling + * btrfs_check_data_free_space first. This happens on a weird + * writepage condition, but shouldn't hurt our accounting + */ + if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { + data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; + BTRFS_I(inode)->reserved_bytes = 0; + } else { + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + } + + spin_unlock(&data_sinfo->lock); +} + +/* called when we are clearing an delalloc extent from the inode's io_tree */ +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *info; + + info = BTRFS_I(inode)->space_info; + + spin_lock(&info->lock); + info->bytes_delalloc -= bytes; + spin_unlock(&info->lock); +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -3105,6 +3337,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved), (info->full) ? "" : "not "); + printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," + " may_use=%llu, used=%llu\n", info->total_bytes, + info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, + info->bytes_used); down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { @@ -3131,24 +3367,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, { int ret; u64 search_start = 0; - u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; - if (data) { - alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if (root == root->fs_info->chunk_root) { - alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; - } else { - alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; - } + data = btrfs_get_alloc_profile(root, data); again: - data = btrfs_reduce_alloc_profile(root, data); /* * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations @@ -4196,13 +4418,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - BUG_ON(!btrfs_tree_locked(parent)); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); path->nodes[parent_level] = parent; path->slots[parent_level] = btrfs_header_nritems(parent); - BUG_ON(!btrfs_tree_locked(node)); + btrfs_assert_tree_locked(node); level = btrfs_header_level(node); extent_buffer_get(node); path->nodes[level] = node; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 872f104576e5..dc78954861b3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1091,19 +1091,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); - ret = btrfs_check_free_space(root, write_bytes, 0); + ret = btrfs_check_data_free_space(root, inode, write_bytes); if (ret) goto out; ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); btrfs_drop_pages(pages, num_pages); goto out; } @@ -1111,8 +1116,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); btrfs_drop_pages(pages, num_pages); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } if (will_write) { btrfs_fdatawrite_range(inode->i_mapping, pos, @@ -1136,6 +1144,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); + if (ret) + err = ret; out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3cee77ae03c8..7d4f948bc22a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -102,34 +102,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) } /* - * a very lame attempt at stopping writes when the FS is 85% full. There - * are countless ways this is incorrect, but it is better than nothing. - */ -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del) -{ - u64 total; - u64 used; - u64 thresh; - int ret = 0; - - spin_lock(&root->fs_info->delalloc_lock); - total = btrfs_super_total_bytes(&root->fs_info->super_copy); - used = btrfs_super_bytes_used(&root->fs_info->super_copy); - if (for_del) - thresh = total * 90; - else - thresh = total * 85; - - do_div(thresh, 100); - - if (used + root->fs_info->delalloc_bytes + num_required > thresh) - ret = -ENOSPC; - spin_unlock(&root->fs_info->delalloc_lock); - return ret; -} - -/* * this does all the hard work for inserting an inline extent into * the btree. The caller should have done a btrfs_drop_extents so that * no overlapping inline items exist in the btree @@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, (unsigned long long)end - start + 1, (unsigned long long) root->fs_info->delalloc_bytes); + btrfs_delalloc_free_space(root, inode, (u64)-1); root->fs_info->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0; } else { + btrfs_delalloc_free_space(root, inode, + end - start + 1); root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } @@ -2245,10 +2221,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2261,7 +2233,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2284,10 +2255,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2304,7 +2271,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) fail_trans: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); if (ret && !err) @@ -2818,7 +2784,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) return err; @@ -3014,6 +2980,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->last_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; + bi->reserved_bytes = 0; bi->disk_i_size = 0; bi->flags = 0; bi->index_cnt = (u64)-1; @@ -3035,6 +3002,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; init_btrfs_i(inode); BTRFS_I(inode)->root = args->root; + btrfs_set_inode_space_info(args->root, inode); return 0; } @@ -3455,6 +3423,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->index_cnt = 2; BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; + btrfs_set_inode_space_info(root, inode); if (mode & S_IFDIR) owner = 0; @@ -3602,7 +3571,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; @@ -3665,7 +3634,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; trans = btrfs_start_transaction(root, 1); @@ -3733,7 +3702,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return -ENOENT; btrfs_inc_nlink(inode); - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; err = btrfs_set_inode_index(dir, &index); @@ -3779,7 +3748,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_unlock; @@ -4336,7 +4305,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) u64 page_start; u64 page_end; - ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) goto out; @@ -4349,6 +4318,7 @@ again: if ((page->mapping != inode->i_mapping) || (page_start >= size)) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); /* page got truncated out from underneath us */ goto out_unlock; } @@ -4631,7 +4601,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -EXDEV; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto out_unlock; @@ -4749,7 +4719,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_fail; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 988fdc8b49eb..bca729fc80c8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -70,7 +70,7 @@ static noinline int create_subvol(struct btrfs_root *root, u64 index = 0; unsigned long nr = 1; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_commit; @@ -203,7 +203,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (!root->ref_cows) return -EINVAL; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_unlock; @@ -374,7 +374,7 @@ static int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - ret = btrfs_check_free_space(root, inode->i_size, 0); + ret = btrfs_check_data_free_space(root, inode, inode->i_size); if (ret) return -ENOSPC; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 85506c4a3af7..47b0a88c12a2 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -220,8 +220,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb) return 0; } -int btrfs_tree_locked(struct extent_buffer *eb) +void btrfs_assert_tree_locked(struct extent_buffer *eb) { - return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || - spin_is_locked(&eb->lock); + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + assert_spin_locked(&eb->lock); } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 6bb0afbff928..6c4ce457168c 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -21,11 +21,11 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); -int btrfs_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_lock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); void btrfs_set_lock_blocking(struct extent_buffer *eb); void btrfs_clear_lock_blocking(struct extent_buffer *eb); +void btrfs_assert_tree_locked(struct extent_buffer *eb); #endif diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 39bd4d38e889..45e59d3c7f1f 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1913,6 +1913,9 @@ COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ /* 0x00 */ COMPATIBLE_IOCTL(FIBMAP) COMPATIBLE_IOCTL(FIGETBSZ) +/* 'X' - originally XFS but some now in the VFS */ +COMPATIBLE_IOCTL(FIFREEZE) +COMPATIBLE_IOCTL(FITHAW) /* RAID */ COMPATIBLE_IOCTL(RAID_VERSION) COMPATIBLE_IOCTL(GET_ARRAY_INFO) diff --git a/fs/dcache.c b/fs/dcache.c index 937df0fb0da5..07e2d4a44bda 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1180,7 +1180,7 @@ struct dentry *d_obtain_alias(struct inode *inode) iput(inode); return res; } -EXPORT_SYMBOL_GPL(d_obtain_alias); +EXPORT_SYMBOL(d_obtain_alias); /** * d_splice_alias - splice a disconnected dentry into the tree if one exists diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 5f3231b9633f..bff4052b05e7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -198,9 +198,6 @@ static int mknod_ptmx(struct super_block *sb) fsi->ptmx_dentry = dentry; rc = 0; - - printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n", - inode->i_ino); out: mutex_unlock(&root->d_inode->i_mutex); return rc; @@ -369,8 +366,6 @@ static int new_pts_mount(struct file_system_type *fs_type, int flags, struct pts_fs_info *fsi; struct pts_mount_opts *opts; - printk(KERN_NOTICE "devpts: newinstance mount\n"); - err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt); if (err) return err; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9a50b8052dcf..de9459b4cb94 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,7 +609,9 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) + if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || + (*retries)++ > 3 || + !EXT4_SB(sb)->s_journal) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4fb86a0061d0..627f8c3337a3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_group_desc *gdp; struct ext4_super_block *es; struct ext4_sb_info *sbi; - int fatal = 0, err, count; + int fatal = 0, err, count, cleared; ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { @@ -248,8 +248,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) goto error_return; /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), - bit, bitmap_bh->b_data)) + spin_lock(sb_bgl_lock(sbi, block_group)); + cleared = ext4_clear_bit(bit, bitmap_bh->b_data); + spin_unlock(sb_bgl_lock(sbi, block_group)); + if (!cleared) ext4_error(sb, "ext4_free_inode", "bit already cleared for inode %lu", ino); else { @@ -715,6 +717,13 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (sbi->s_log_groups_per_flex) { ret2 = find_group_flex(sb, dir, &group); + if (ret2 == -1) { + ret2 = find_group_other(sb, dir, &group); + if (ret2 == 0 && printk_ratelimit()) + printk(KERN_NOTICE "ext4: find_group_flex " + "failed, fallback succeeded dir %lu\n", + dir->i_ino); + } goto got_group; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbd2ca99d113..c7fed5b18745 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1368,6 +1368,10 @@ retry: goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { ext4_journal_stop(handle); @@ -1377,7 +1381,7 @@ retry: *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_get_block); + ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -2540,7 +2544,7 @@ retry: ext4_journal_stop(handle); - if (mpd.retval == -ENOSPC) { + if ((mpd.retval == -ENOSPC) && sbi->s_journal) { /* commit the transaction which would * free blocks released in the transaction * and try again @@ -2667,6 +2671,9 @@ retry: ret = PTR_ERR(handle); goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a5732c58f676..39d1993cfa13 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3091,7 +3091,6 @@ static int ext4_freeze(struct super_block *sb) /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); if (error) goto out; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6b74d09adbe5..de0004fe6e00 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) sector_t blocknr; /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ - mutex_lock(&mapping->host->i_mutex); + down_read(&mapping->host->i_alloc_sem); blocknr = generic_block_bmap(mapping, block, fat_get_block); - mutex_unlock(&mapping->host->i_mutex); + up_read(&mapping->host->i_alloc_sem); return blocknr; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e5eaa62fd17f..e3fe9918faaf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -274,6 +274,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) int ret; BUG_ON(inode->i_state & I_SYNC); + WARN_ON(inode->i_state & I_NEW); /* Set I_SYNC, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; @@ -298,6 +299,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } spin_lock(&inode_lock); + WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_DIRTY) && @@ -470,6 +472,11 @@ void generic_sync_sb_inodes(struct super_block *sb, break; } + if (inode->i_state & I_NEW) { + requeue_io(inode); + continue; + } + if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; if (!sb_is_blkdev_sb(sb)) @@ -531,7 +538,7 @@ void generic_sync_sb_inodes(struct super_block *sb, list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct address_space *mapping; - if (inode->i_state & (I_FREEING|I_WILL_FREE)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; mapping = inode->i_mapping; if (mapping->nrpages == 0) diff --git a/fs/inode.c b/fs/inode.c index 913ab2d9a5d1..826fb0b9d1c3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -359,6 +359,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) invalidate_inode_buffers(inode); if (!atomic_read(&inode->i_count)) { list_move(&inode->i_list, dispose); + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; count++; continue; @@ -460,6 +461,7 @@ static void prune_icache(int nr_to_scan) continue; } list_move(&inode->i_list, &freeable); + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; nr_pruned++; } @@ -656,6 +658,7 @@ void unlock_new_inode(struct inode *inode) * just created it (so there can be no old holders * that haven't tested I_LOCK). */ + WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); inode->i_state &= ~(I_LOCK|I_NEW); wake_up_inode(inode); } @@ -1145,6 +1148,7 @@ void generic_delete_inode(struct inode *inode) list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1186,16 +1190,19 @@ static void generic_forget_inode(struct inode *inode) spin_unlock(&inode_lock); return; } + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); + WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 3cceef4ad2b7..e9580104b6ba 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -95,13 +95,17 @@ static int jffs2_garbage_collect_thread(void *_c) spin_unlock(&c->erase_completion_lock); - /* This thread is purely an optimisation. But if it runs when - other things could be running, it actually makes things a - lot worse. Use yield() and put it at the back of the runqueue - every time. Especially during boot, pulling an inode in - with read_inode() is much preferable to having the GC thread - get there first. */ - yield(); + /* Problem - immediately after bootup, the GCD spends a lot + * of time in places like jffs2_kill_fragtree(); so much so + * that userspace processes (like gdm and X) are starved + * despite plenty of cond_resched()s and renicing. Yield() + * doesn't help, either (presumably because userspace and GCD + * are generally competing for a higher latency resource - + * disk). + * This forces the GCD to slow the hell down. Pulling an + * inode in with read_inode() is much preferable to having + * the GC thread get there first. */ + schedule_timeout_interruptible(msecs_to_jiffies(50)); /* Put_super will send a SIGKILL and then wait on the sem. */ diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 6ca08ad887c0..1fc1e92356ee 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -220,7 +220,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) { uint32_t fn_end = tn->fn->ofs + tn->fn->size; - struct jffs2_tmp_dnode_info *this; + struct jffs2_tmp_dnode_info *this, *ptn; dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); @@ -251,11 +251,18 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, if (this) { /* If the node is coincident with another at a lower address, back up until the other node is found. It may be relevant */ - while (this->overlapped) - this = tn_prev(this); - - /* First node should never be marked overlapped */ - BUG_ON(!this); + while (this->overlapped) { + ptn = tn_prev(this); + if (!ptn) { + /* + * We killed a node which set the overlapped + * flags during the scan. Fix it up. + */ + this->overlapped = 0; + break; + } + this = ptn; + } dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); } @@ -360,7 +367,17 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, } if (!this->overlapped) break; - this = tn_prev(this); + + ptn = tn_prev(this); + if (!ptn) { + /* + * We killed a node which set the overlapped + * flags during the scan. Fix it up. + */ + this->overlapped = 0; + break; + } + this = ptn; } } @@ -456,8 +473,15 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, eat_last(&rii->tn_root, &last->rb); ver_insert(&ver_root, last); - if (unlikely(last->overlapped)) - continue; + if (unlikely(last->overlapped)) { + if (pen) + continue; + /* + * We killed a node which set the overlapped + * flags during the scan. Fix it up. + */ + last->overlapped = 0; + } /* Now we have a bunch of nodes in reverse version order, in the tree at ver_root. Most of the time, diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 60fe74035db5..3a9e5deed74d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -4796,6 +4796,29 @@ out: return ret; } +static int ocfs2_replace_extent_rec(struct inode *inode, + handle_t *handle, + struct ocfs2_path *path, + struct ocfs2_extent_list *el, + int split_index, + struct ocfs2_extent_rec *split_rec) +{ + int ret; + + ret = ocfs2_path_bh_journal_access(handle, inode, path, + path_num_items(path) - 1); + if (ret) { + mlog_errno(ret); + goto out; + } + + el->l_recs[split_index] = *split_rec; + + ocfs2_journal_dirty(handle, path_leaf_bh(path)); +out: + return ret; +} + /* * Mark part or all of the extent record at split_index in the leaf * pointed to by path as written. This removes the unwritten @@ -4885,7 +4908,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode, if (ctxt.c_contig_type == CONTIG_NONE) { if (ctxt.c_split_covers_rec) - el->l_recs[split_index] = *split_rec; + ret = ocfs2_replace_extent_rec(inode, handle, + path, el, + split_index, split_rec); else ret = ocfs2_split_and_insert(inode, handle, path, et, &last_eb_bh, split_index, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 54e182a27caf..0a2813947853 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, if (!mle) { if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN && res->owner != assert->node_idx) { - mlog(ML_ERROR, "assert_master from " - "%u, but current owner is " - "%u! (%.*s)\n", - assert->node_idx, res->owner, - namelen, name); - goto kill; + mlog(ML_ERROR, "DIE! Mastery assert from %u, " + "but current owner is %u! (%.*s)\n", + assert->node_idx, res->owner, namelen, + name); + __dlm_print_one_lock_resource(res); + BUG(); } } else if (mle->type != DLM_MLE_MIGRATION) { if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d1295203029f..4060bb328bc8 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); /* This ensures that clear refmap is sent after the set */ - __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG | - DLM_LOCK_RES_MIGRATING)); + __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); spin_unlock(&res->spinlock); /* clear our bit from the master's refmap, ignore errors */ diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 86ca085ef324..fcf879ed6930 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, else BUG_ON(res->owner == dlm->node_num); - spin_lock(&dlm->spinlock); + spin_lock(&dlm->ast_lock); /* We want to be sure that we're not freeing a lock * that still has AST's pending... */ in_use = !list_empty(&lock->ast_list); - spin_unlock(&dlm->spinlock); + spin_unlock(&dlm->ast_lock); if (in_use) { mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " "while waiting for an ast!", res->lockname.len, diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 206a2370876a..7219a86d34cc 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres); static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, int convert); -#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ - mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ - _err, _func, _lockres->l_name); \ +#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ + if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ + mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ + _err, _func, _lockres->l_name); \ + else \ + mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ + _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ + (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ } while (0) static int ocfs2_downconvert_thread(void *arg); static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 077384135f4e..946d3c34b90b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -341,6 +341,9 @@ struct ocfs2_super struct ocfs2_node_map osb_recovering_orphan_dirs; unsigned int *osb_orphan_wipes; wait_queue_head_t osb_wipe_event; + + /* used to protect metaecc calculation check of xattr. */ + spinlock_t osb_xattr_lock; }; #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b1cb38fbe807..7ac83a81ee55 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb, unlock_buffer(*bh); ll_rw_block(READ, 1, bh); wait_on_buffer(*bh); + if (!buffer_uptodate(*bh)) { + mlog_errno(-EIO); + brelse(*bh); + *bh = NULL; + return -EIO; + } + return 0; } @@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_LIST_HEAD(&osb->blocked_lock_list); osb->blocked_lock_count = 0; spin_lock_init(&osb->osb_lock); + spin_lock_init(&osb->osb_xattr_lock); ocfs2_init_inode_steal_slot(osb); atomic_set(&osb->alloc_stats.moves, 0); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 915039fffe6e..4ddd788add67 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt { #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) #define OCFS2_XATTR_INLINE_SIZE 80 +#define OCFS2_XATTR_HEADER_GAP 4 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ - sizeof(struct ocfs2_xattr_header) \ - - sizeof(__u32)) + - OCFS2_XATTR_HEADER_GAP) #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ - sizeof(struct ocfs2_xattr_block) \ - sizeof(struct ocfs2_xattr_header) \ - - sizeof(__u32)) + - OCFS2_XATTR_HEADER_GAP) static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), @@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, bucket->bu_blocks, bucket->bu_bhs, 0, NULL); if (!rc) { + spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, bucket->bu_bhs, bucket->bu_blocks, &bucket_xh(bucket)->xh_check); + spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); if (rc) mlog_errno(rc); } @@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, { int i; + spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, bucket->bu_bhs, bucket->bu_blocks, &bucket_xh(bucket)->xh_check); + spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); for (i = 0; i < bucket->bu_blocks; i++) ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); @@ -1507,7 +1512,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, last += 1; } - free = min_offs - ((void *)last - xs->base) - sizeof(__u32); + free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; if (free < 0) return -EIO; @@ -2190,7 +2195,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode, last += 1; } - free = min_offs - ((void *)last - xs->base) - sizeof(__u32); + free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; if (free < 0) return 0; @@ -2592,8 +2597,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, if (!ret) { /* Update inode ctime. */ - ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(ctxt->handle, inode, + xis->inode_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -5060,8 +5066,8 @@ try_again: xh_free_start = le16_to_cpu(xh->xh_free_start); header_size = sizeof(struct ocfs2_xattr_header) + count * sizeof(struct ocfs2_xattr_entry); - max_free = OCFS2_XATTR_BUCKET_SIZE - - le16_to_cpu(xh->xh_name_value_len) - header_size; + max_free = OCFS2_XATTR_BUCKET_SIZE - header_size - + le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP; mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " "of %u which exceed block size\n", @@ -5094,7 +5100,7 @@ try_again: need = 0; } - free = xh_free_start - header_size; + free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP; /* * We need to make sure the new name/value pair * can exist in the same block. @@ -5127,7 +5133,8 @@ try_again: } xh_free_start = le16_to_cpu(xh->xh_free_start); - free = xh_free_start - header_size; + free = xh_free_start - header_size + - OCFS2_XATTR_HEADER_GAP; if (xh_free_start % blocksize < need) free -= xh_free_start % blocksize; diff --git a/fs/pipe.c b/fs/pipe.c index 3a48ba5179d5..14f502b89cf5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -699,12 +699,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) int retval; mutex_lock(&inode->i_mutex); - retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); - - if (retval >= 0) + if (retval >= 0) { retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); - + if (retval < 0) /* this can happen only if on == T */ + fasync_helper(-1, filp, 0, &pipe->fasync_readers); + } mutex_unlock(&inode->i_mutex); if (retval < 0) diff --git a/fs/proc/page.c b/fs/proc/page.c index 767d95a6d1b1..e9983837d08d 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -80,7 +80,7 @@ static const struct file_operations proc_kpagecount_operations = { #define KPF_RECLAIM 9 #define KPF_BUDDY 10 -#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) +#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos) static ssize_t kpageflags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -107,7 +107,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, else kflags = ppage->flags; - uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | + uflags = kpf_copy_bit(kflags, KPF_LOCKED, PG_locked) | kpf_copy_bit(kflags, KPF_ERROR, PG_error) | kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index c837dfc2b3c6..2a7960310349 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -80,7 +80,7 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with zlib). */ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength) + int length, u64 *next_index, int srclength, int pages) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; @@ -184,7 +184,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; } - if (msblk->stream.avail_out == 0) { + if (msblk->stream.avail_out == 0 && page < pages) { msblk->stream.next_out = buffer[page++]; msblk->stream.avail_out = PAGE_CACHE_SIZE; } @@ -201,25 +201,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, zlib_init = 1; } - zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); + zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH); if (msblk->stream.avail_in == 0 && k < b) put_bh(bh[k++]); } while (zlib_err == Z_OK); if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate returned unexpected result" - " 0x%x, srclength %d, avail_in %d," - " avail_out %d\n", zlib_err, srclength, - msblk->stream.avail_in, - msblk->stream.avail_out); + ERROR("zlib_inflate error, data probably corrupt\n"); goto release_mutex; } zlib_err = zlib_inflateEnd(&msblk->stream); if (zlib_err != Z_OK) { - ERROR("zlib_inflateEnd returned unexpected result 0x%x," - " srclength %d\n", zlib_err, srclength); + ERROR("zlib_inflate error, data probably corrupt\n"); goto release_mutex; } length = msblk->stream.total_out; @@ -268,7 +263,8 @@ block_release: put_bh(bh[k]); read_failure: - ERROR("sb_bread failed reading block 0x%llx\n", cur_index); + ERROR("squashfs_read_data failed to read block 0x%llx\n", + (unsigned long long) index); kfree(bh); return -EIO; } diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index f29eda16d25e..1c4739e33af6 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -119,7 +119,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->length = squashfs_read_data(sb, entry->data, block, length, &entry->next_index, - cache->block_size); + cache->block_size, cache->pages); spin_lock(&cache->lock); @@ -406,7 +406,7 @@ int squashfs_read_table(struct super_block *sb, void *buffer, u64 block, for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length); + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); kfree(data); return res; } diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 7a63398bb855..9101dbde39ec 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -133,7 +133,8 @@ int squashfs_read_inode(struct inode *inode, long long ino) type = le16_to_cpu(sqshb_ino->inode_type); switch (type) { case SQUASHFS_REG_TYPE: { - unsigned int frag_offset, frag_size, frag; + unsigned int frag_offset, frag; + int frag_size; u64 frag_blk; struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg; @@ -175,7 +176,8 @@ int squashfs_read_inode(struct inode *inode, long long ino) break; } case SQUASHFS_LREG_TYPE: { - unsigned int frag_offset, frag_size, frag; + unsigned int frag_offset, frag; + int frag_size; u64 frag_blk; struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg; diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 6b2515d027d5..0e9feb6adf7e 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -34,7 +34,7 @@ static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) /* block.c */ extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int); + int, int); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 071df5b5b491..681ec0d83799 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -389,7 +389,7 @@ static int __init init_squashfs_fs(void) return err; } - printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) " + printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " "Phillip Lougher\n"); return 0; diff --git a/fs/super.c b/fs/super.c index 8349ed6b1412..6ce501447ada 100644 --- a/fs/super.c +++ b/fs/super.c @@ -371,8 +371,10 @@ retry: continue; if (!grab_super(old)) goto retry; - if (s) + if (s) { + up_write(&s->s_umount); destroy_super(s); + } return old; } } @@ -387,6 +389,7 @@ retry: err = set(s, data); if (err) { spin_unlock(&sb_lock); + up_write(&s->s_umount); destroy_super(s); return ERR_PTR(err); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cb329edc925b..aa1016bb9134 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -34,6 +34,12 @@ #include <linux/backing-dev.h> #include <linux/freezer.h> +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_ag.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" + static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); @@ -1435,10 +1441,12 @@ xfs_unregister_buftarg( void xfs_free_buftarg( - xfs_buftarg_t *btp) + struct xfs_mount *mp, + struct xfs_buftarg *btp) { xfs_flush_buftarg(btp, 1); - xfs_blkdev_issue_flush(btp); + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_blkdev_issue_flush(btp); xfs_free_bufhash(btp); iput(btp->bt_mapping->host); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 288ae7c4c800..9b4d666ad31f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp) * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); -extern void xfs_free_buftarg(xfs_buftarg_t *); +extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c71e226da7f5..32ae5028e96b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -734,15 +734,15 @@ xfs_close_devices( { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - xfs_free_buftarg(mp->m_logdev_targp); + xfs_free_buftarg(mp, mp->m_logdev_targp); xfs_blkdev_put(logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - xfs_free_buftarg(mp->m_rtdev_targp); + xfs_free_buftarg(mp, mp->m_rtdev_targp); xfs_blkdev_put(rtdev); } - xfs_free_buftarg(mp->m_ddev_targp); + xfs_free_buftarg(mp, mp->m_ddev_targp); } /* @@ -811,9 +811,9 @@ xfs_open_devices( out_free_rtdev_targ: if (mp->m_rtdev_targp) - xfs_free_buftarg(mp->m_rtdev_targp); + xfs_free_buftarg(mp, mp->m_rtdev_targp); out_free_ddev_targ: - xfs_free_buftarg(mp->m_ddev_targp); + xfs_free_buftarg(mp, mp->m_ddev_targp); out_close_rtdev: if (rtdev) xfs_blkdev_put(rtdev); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index e2fb6210d4c5..478e587087fe 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -246,9 +246,6 @@ xfs_iget_cache_miss( goto out_destroy; } - if (lock_flags) - xfs_ilock(ip, lock_flags); - /* * Preload the radix tree so we can insert safely under the * write spinlock. Note that we cannot sleep inside the preload @@ -256,7 +253,16 @@ xfs_iget_cache_miss( */ if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; - goto out_unlock; + goto out_destroy; + } + + /* + * Because the inode hasn't been added to the radix-tree yet it can't + * be found by another thread, so we can do the non-sleeping lock here. + */ + if (lock_flags) { + if (!xfs_ilock_nowait(ip, lock_flags)) + BUG(); } mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); @@ -284,7 +290,6 @@ xfs_iget_cache_miss( out_preload_end: write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); -out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b1047de2fffd..61af610d79b3 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1455,10 +1455,19 @@ xlog_recover_add_to_trans( item = item->ri_prev; if (item->ri_total == 0) { /* first region to be added */ - item->ri_total = in_f->ilf_size; - ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); - item->ri_buf = kmem_zalloc((item->ri_total * - sizeof(xfs_log_iovec_t)), KM_SLEEP); + if (in_f->ilf_size == 0 || + in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { + xlog_warn( + "XFS: bad number of regions (%d) in inode log format", + in_f->ilf_size); + ASSERT(0); + return XFS_ERROR(EIO); + } + + item->ri_total = in_f->ilf_size; + item->ri_buf = + kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), + KM_SLEEP); } ASSERT(item->ri_total > item->ri_cnt); /* Description region is ri_buf[0] */ |