diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 112 | ||||
-rw-r--r-- | fs/ext4/group.h | 27 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 146 | ||||
-rw-r--r-- | fs/ext4/resize.c | 21 | ||||
-rw-r--r-- | fs/ext4/super.c | 47 |
5 files changed, 322 insertions, 31 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b74bf4368441..5927687b3e79 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -20,6 +20,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include "group.h" /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -42,6 +43,94 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +/* Initializes an uninitialized block bitmap if given, and returns the + * number of blocks free in the group. */ +unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, + int block_group, struct ext4_group_desc *gdp) +{ + unsigned long start; + int bit, bit_max; + unsigned free_blocks, group_blocks; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (bh) { + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks used to prevent allocation + * essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum bad for group %u\n", block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + memset(bh->b_data, 0, sb->s_blocksize); + } + + /* Check for superblock and gdt backups in this group */ + bit_max = ext4_bg_has_super(sb, block_group); + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * + sbi->s_desc_per_block) { + if (bit_max) { + bit_max += ext4_bg_num_gdb(sb, block_group); + bit_max += + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + } + } else { /* For META_BG_BLOCK_GROUPS */ + int group_rel = (block_group - + le32_to_cpu(sbi->s_es->s_first_meta_bg)) % + EXT4_DESC_PER_BLOCK(sb); + if (group_rel == 0 || group_rel == 1 || + (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) + bit_max += 1; + } + + if (block_group == sbi->s_groups_count - 1) { + /* + * Even though mke2fs always initialize first and last group + * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need + * to make sure we calculate the right free blocks + */ + group_blocks = ext4_blocks_count(sbi->s_es) - + le32_to_cpu(sbi->s_es->s_first_data_block) - + (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); + } else { + group_blocks = EXT4_BLOCKS_PER_GROUP(sb); + } + + free_blocks = group_blocks - bit_max; + + if (bh) { + for (bit = 0; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + + start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(sbi->s_es->s_first_data_block); + + /* Set bits for block and inode bitmaps, and inode table */ + ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); + ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); + for (bit = le32_to_cpu(gdp->bg_inode_table) - start, + bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + + /* + * Also if the number of blocks within the group is + * less than the blocksize * 8 ( which is the size + * of bitmap ), set rest of the block bitmap to 1 + */ + mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); + } + + return free_blocks - sbi->s_itb_per_group - 2; +} + + /* * The free blocks are managed by bitmaps. A file system contains several * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap @@ -119,7 +208,7 @@ block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map) * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { int i; @@ -127,11 +216,24 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) struct buffer_head * bh = NULL; ext4_fsblk_t bitmap_blk; - desc = ext4_get_group_desc (sb, block_group, NULL); + desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) return NULL; bitmap_blk = ext4_block_bitmap(sb, desc); - bh = sb_bread(sb, bitmap_blk); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + bh = sb_getblk(sb, bitmap_blk); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_block_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, bitmap_blk); + } if (!bh) ext4_error (sb, __FUNCTION__, "Cannot read block bitmap - " @@ -627,6 +729,7 @@ do_more: desc->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + group_freed); + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); @@ -1685,8 +1788,11 @@ allocated: ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); diff --git a/fs/ext4/group.h b/fs/ext4/group.h new file mode 100644 index 000000000000..1577910bb58b --- /dev/null +++ b/fs/ext4/group.h @@ -0,0 +1,27 @@ +/* + * linux/fs/ext4/group.h + * + * Copyright (C) 2007 Cluster File Systems, Inc + * + * Author: Andreas Dilger <adilger@clusterfs.com> + */ + +#ifndef _LINUX_EXT4_GROUP_H +#define _LINUX_EXT4_GROUP_H + +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +struct buffer_head *read_block_bitmap(struct super_block *sb, + unsigned int block_group); +extern unsigned ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +#define ext4_free_blocks_after_init(sb, group, desc) \ + ext4_init_block_bitmap(sb, NULL, group, desc) +extern unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); +#endif /* _LINUX_EXT4_GROUP_H */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38e9a0a705df..c61f37fd3f05 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -28,6 +28,7 @@ #include "xattr.h" #include "acl.h" +#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -43,6 +44,52 @@ * the free blocks count in the block. */ +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext4_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* Initializes an uninitialized inode bitmap */ +unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int block_group, + struct ext4_group_desc *gdp) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks and inodes use to prevent + * allocation, essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", + block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + + return EXT4_INODES_PER_GROUP(sb); +} /* * Read the inode allocation bitmap for a given block_group, reading @@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group) desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_inode_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + } if (!bh) ext4_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " @@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) if (is_directory) gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); + gdp->bg_checksum = ext4_group_desc_csum(sbi, + block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) struct ext4_sb_info *sbi; int err = 0; struct inode *ret; - int i; + int i, free = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -517,11 +578,13 @@ repeat_in_this_group: goto out; got: - ino += group * EXT4_INODES_PER_GROUP(sb) + 1; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); + ino++; + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino > EXT4_INODES_PER_GROUP(sb)) { + ext4_error(sb, __FUNCTION__, + "reserved inode or inode > inodes count - " + "block_group = %d, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; } @@ -529,13 +592,78 @@ got: BUFFER_TRACE(bh2, "get_write_access"); err = ext4_journal_get_write_access(handle, bh2); if (err) goto fail; + + /* We may have to initialize the block bitmap if it isn't already */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + struct buffer_head *block_bh = read_block_bitmap(sb, group); + + BUFFER_TRACE(block_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bh); + if (err) { + brelse(block_bh); + goto fail; + } + + free = 0; + spin_lock(sb_bgl_lock(sbi, group)); + /* recheck and clear flag under lock if we still need to */ + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + free = ext4_free_blocks_after_init(sb, group, gdp); + gdp->bg_free_blocks_count = cpu_to_le16(free); + } + spin_unlock(sb_bgl_lock(sbi, group)); + + /* Don't need to dirty bitmap block if we didn't change it */ + if (free) { + BUFFER_TRACE(block_bh, "dirty block bitmap"); + err = ext4_journal_dirty_metadata(handle, block_bh); + } + + brelse(block_bh); + if (err) + goto fail; + } + spin_lock(sb_bgl_lock(sbi, group)); + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + + /* When marking the block group with + * ~EXT4_BG_INODE_UNINIT we don't want to depend + * on the value of bg_itable_unsed even though + * mke2fs could have initialized the same for us. + * Instead we calculated the value below + */ + + free = 0; + } else { + free = EXT4_INODES_PER_GROUP(sb) - + le16_to_cpu(gdp->bg_itable_unused); + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + * + */ + if (ino > free) + gdp->bg_itable_unused = + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + } + gdp->bg_free_inodes_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); if (S_ISDIR(mode)) { gdp->bg_used_dirs_count = cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); err = ext4_journal_dirty_metadata(handle, bh2); @@ -557,7 +685,7 @@ got: inode->i_gid = current->fsgid; inode->i_mode = mode; - inode->i_ino = ino; + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 472fc0d3e1c0..0a7e914c495a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -16,6 +16,7 @@ #include <linux/errno.h> #include <linux/slab.h> +#include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -140,25 +141,6 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, } /* - * To avoid calling the atomic setbit hundreds or thousands of times, we only - * need to use it within a single byte (to ensure we get endianness right). - * We can use memset for the rest of the bitmap as there are no other users. - */ -static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -{ - int i; - - if (start_bit >= end_bit) - return; - - ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); - for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) - ext4_set_bit(i, bitmap); - if (i < end_bit) - memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -} - -/* * Set up the block and inode bitmaps, and the inode table for the new group. * This doesn't need to be part of the main transaction, since we are only * changing blocks outside the actual filesystem. We still do journaling to @@ -842,6 +824,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* * Make the new blocks and inodes valid next. We do this before diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e2bdf93693a6..dd4ff9c87358 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,12 +37,14 @@ #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/log2.h> +#include <linux/crc16.h> #include <asm/uaccess.h> #include "xattr.h" #include "acl.h" #include "namei.h" +#include "group.h" static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, return res; } +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + __u16 crc = 0; + + if (sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int offset = offsetof(struct ext4_group_desc, bg_checksum); + __le32 le_group = cpu_to_le32(block_group); + + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + /* for checksum of struct ext4_group_desc do the rest...*/ + if ((sbi->s_es->s_feature_incompat & + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + offset < le16_to_cpu(sbi->s_es->s_desc_size)) + crc = crc16(crc, (__u8 *)gdp + offset, + le16_to_cpu(sbi->s_es->s_desc_size) - + offset); + } + + return cpu_to_le16(crc); +} + +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + if ((sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) + return 0; + + return 1; +} + /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors (struct super_block * sb) { @@ -1362,6 +1401,14 @@ static int ext4_check_descriptors (struct super_block * sb) i, inode_table); return 0; } + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum for group %d failed (%u!=%u)\n", i, + le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), + le16_to_cpu(gdp->bg_checksum)); + return 0; + } first_block += EXT4_BLOCKS_PER_GROUP(sb); gdp = (struct ext4_group_desc *) ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); |