From d2dcd9083f101584e029cbd4f0e1a4e573170d43 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Fri, 9 Mar 2012 06:27:12 +0530 Subject: logfs: destroy the reserved inodes while unmounting We were assuming that the evict_inode() would never be called on reserved inodes. However, (after the commit 8e22c1a4e logfs: get rid of magical inodes) while unmounting the file system, in put_super, we call iput() on all of the reserved inodes. The following simple test used to cause a kernel panic on LogFS: 1. Mount a LogFS file system on /mnt 2. Create a file $ touch /mnt/a 3. Try to unmount the FS $ umount /mnt The simple fix would be to drop the assumption and properly destroy the reserved inodes. Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 16 ++++++++++++++++ fs/logfs/readwrite.c | 1 - fs/logfs/segment.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index a422f42238b2..df093d9e4da1 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, logfs_i_callback); } +static void __logfs_destroy_meta_inode(struct inode *inode) +{ + struct logfs_inode *li = logfs_inode(inode); + BUG_ON(li->li_block); + call_rcu(&inode->i_rcu, logfs_i_callback); +} + static void logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); + if (inode->i_ino < LOGFS_RESERVED_INOS) { + /* + * The reserved inodes are never destroyed unless we are in + * unmont path. + */ + __logfs_destroy_meta_inode(inode); + return; + } + BUG_ON(list_empty(&li->li_freeing_list)); spin_lock(&logfs_inode_lock); li->li_refcount--; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e3ab5e5a904c..c8ea8664699c 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode) return; } - BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); page = inode_to_page(inode); BUG_ON(!page); /* FIXME: Use emergency page */ logfs_put_write_page(page); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index e28d090c98d6..038da0991794 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb) static void map_invalidatepage(struct page *page, unsigned long l) { - BUG(); + return; } static int map_releasepage(struct page *page, gfp_t g) -- cgit v1.2.3 From cd8bfa9c8a13cf3facc5731da17e10188b3795d1 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 2 Apr 2012 09:23:04 +0530 Subject: logfs: initialize the number of iovecs in bio This fixes the following crash when a LogFS file system, created on a encrypted LVM volume, was mounted [ 526.548034] BUG: unable to handle kernel NULL pointer dereference at [ 526.550106] IP: [] memcpy+0xb/0x120 [ 526.551008] PGD bd60067 PUD 1778d067 PMD 0 [ 526.551783] Oops: 0000 [#1] SMP Pid: 2043, comm: mount RIP: 0010:[] [] memcpy+0xb/0x120 Call Trace: kcryptd_io_read+0xdb/0x100 crypt_map+0xfd/0x190 __map_bio+0x48/0x150 __split_and_process_bio+0x51b/0x630 dm_request+0x138/0x230 generic_make_request+0xca/0x100 submit_bio+0x87/0x110 sync_request+0xdd/0x120 [logfs] bdev_readpage+0x2e/0x70 [logfs] do_read_cache_page+0x82/0x180 logfs_mount+0x2ad/0x770 [logfs] mount_fs+0x47/0x1c0 vfs_kern_mount+0x72/0x110 do_kern_mount+0x54/0x110 do_mount+0x520/0x7f0 sys_mount+0x90/0xe0 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42292 Reported-by: Witold Baryluk Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index df0de27c2733..ea29df36893d 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) struct completion complete; bio_init(&bio); + bio.bi_max_vecs = 1; bio.bi_io_vec = &bio_vec; bio_vec.bv_page = page; bio_vec.bv_len = PAGE_SIZE; -- cgit v1.2.3 From 28c0254ede13ab575d2df5c6585ed3d4817c3e6b Mon Sep 17 00:00:00 2001 From: Yan, Zheng Date: Mon, 28 May 2012 14:44:30 +0800 Subject: ceph: check PG_Private flag before accessing page->private I got lots of NULL pointer dereference Oops when compiling kernel on ceph. The bug is because the kernel page migration routine replaces some pages in the page cache with new pages, these new pages' private can be non-zero. Signed-off-by: Zheng Yan Signed-off-by: Sage Weil --- fs/ceph/addr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d22e59b..8b67304e4b80 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -54,7 +54,12 @@ (CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) - +static inline struct ceph_snap_context *page_snap_context(struct page *page) +{ + if (PagePrivate(page)) + return (void *)page->private; + return NULL; +} /* * Dirty a page. Optimistically adjust accounting, on the assumption @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) { struct inode *inode; struct ceph_inode_info *ci; - struct ceph_snap_context *snapc = (void *)page->private; + struct ceph_snap_context *snapc = page_snap_context(page); BUG_ON(!PageLocked(page)); - BUG_ON(!page->private); BUG_ON(!PagePrivate(page)); BUG_ON(!page->mapping); @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g) struct inode *inode = page->mapping ? page->mapping->host : NULL; dout("%p releasepage %p idx %lu\n", inode, page, page->index); WARN_ON(PageDirty(page)); - WARN_ON(page->private); WARN_ON(PagePrivate(page)); return 0; } @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osdc = &fsc->client->osdc; /* verify this is a writeable snap context */ - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc == NULL) { dout("writepage %p page %p not dirty?\n", inode, page); goto out; @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) oldest = get_oldest_context(inode, &snap_size); if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, (void *)page->private); + inode, page, snapc); /* we should only noop if called by kswapd */ WARN_ON((current->flags & PF_MEMALLOC) == 0); ceph_put_snap_context(oldest); @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_put_snap_context((void *)page->private); + ceph_put_snap_context(page_snap_context(page)); page->private = 0; ClearPagePrivate(page); dout("unlocking %d %p\n", i, page); @@ -795,7 +798,7 @@ get_more_pages: } /* only if matching snap context */ - pgsnapc = (void *)page->private; + pgsnapc = page_snap_context(page); if (pgsnapc->seq > snapc->seq) { dout("page snapc %p %lld > oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -984,7 +987,7 @@ retry_locked: BUG_ON(!ci->i_snap_realm); down_read(&mdsc->snap_rwsem); BUG_ON(!ci->i_snap_realm->cached_context); - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap -- cgit v1.2.3 From 15d9882c336db2db73ccf9871ae2398e452f694c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 May 2012 23:26:43 -0500 Subject: libceph: embed ceph messenger structure in ceph_client A ceph client has a pointer to a ceph messenger structure in it. There is always exactly one ceph messenger for a ceph client, so there is no need to allocate it separate from the ceph client structure. Switch the ceph_client structure to embed its ceph_messenger structure. Signed-off-by: Alex Elder Reviewed-by: Yehuda Sadeh Reviewed-by: Sage Weil --- fs/ceph/mds_client.c | 2 +- include/linux/ceph/libceph.h | 2 +- include/linux/ceph/messenger.h | 9 +++++---- net/ceph/ceph_common.c | 18 +++++------------- net/ceph/messenger.c | 30 +++++++++--------------------- net/ceph/mon_client.c | 6 +++--- net/ceph/osd_client.c | 4 ++-- 7 files changed, 26 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 200bc87eceb1..ad30261cd4c0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,7 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); + ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); s->s_con.private = s; s->s_con.ops = &mds_con_ops; s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 92eef7c3d3c5..927361c4b0a8 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -131,7 +131,7 @@ struct ceph_client { u32 supported_features; u32 required_features; - struct ceph_messenger *msgr; /* messenger instance */ + struct ceph_messenger msgr; /* messenger instance */ struct ceph_mon_client monc; struct ceph_osd_client osdc; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 74f6c9bd8074..3fbd4be804ed 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -211,10 +211,11 @@ extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); -extern struct ceph_messenger *ceph_messenger_create( - struct ceph_entity_addr *myaddr, - u32 features, u32 required); -extern void ceph_messenger_destroy(struct ceph_messenger *); +extern void ceph_messenger_init(struct ceph_messenger *msgr, + struct ceph_entity_addr *myaddr, + u32 supported_features, + u32 required_features, + bool nocrc); extern void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cc913193d992..2de3ea1bbd64 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -468,19 +468,15 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, /* msgr */ if (ceph_test_opt(client, MYIP)) myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - err = PTR_ERR(client->msgr); - goto fail; - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); + ceph_messenger_init(&client->msgr, myaddr, + client->supported_features, + client->required_features, + ceph_test_opt(client, NOCRC)); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) - goto fail_msgr; + goto fail; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; @@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); -fail_msgr: - ceph_messenger_destroy(client->msgr); fail: kfree(client); return ERR_PTR(err); @@ -515,8 +509,6 @@ void ceph_destroy_client(struct ceph_client *client) ceph_debugfs_client_cleanup(client); - ceph_messenger_destroy(client->msgr); - ceph_destroy_options(client->options); kfree(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 2ca491fc50e2..d8423a3f6698 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2245,18 +2245,14 @@ out: /* - * create a new messenger instance + * initialize a new messenger instance */ -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, - u32 supported_features, - u32 required_features) +void ceph_messenger_init(struct ceph_messenger *msgr, + struct ceph_entity_addr *myaddr, + u32 supported_features, + u32 required_features, + bool nocrc) { - struct ceph_messenger *msgr; - - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); - if (msgr == NULL) - return ERR_PTR(-ENOMEM); - msgr->supported_features = supported_features; msgr->required_features = required_features; @@ -2269,19 +2265,11 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, msgr->inst.addr.type = 0; get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); encode_my_addr(msgr); + msgr->nocrc = nocrc; - dout("messenger_create %p\n", msgr); - return msgr; -} -EXPORT_SYMBOL(ceph_messenger_create); - -void ceph_messenger_destroy(struct ceph_messenger *msgr) -{ - dout("destroy %p\n", msgr); - kfree(msgr); - dout("destroyed messenger %p\n", msgr); + dout("%s %p\n", __func__, msgr); } -EXPORT_SYMBOL(ceph_messenger_destroy); +EXPORT_SYMBOL(ceph_messenger_init); static void clear_standby(struct ceph_connection *con) { diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 1845cde26227..704dc95dc620 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -763,7 +763,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); if (!monc->con) goto out_monmap; - ceph_con_init(monc->client->msgr, monc->con); + ceph_con_init(&monc->client->msgr, monc->con); monc->con->private = monc; monc->con->ops = &mon_con_ops; @@ -880,8 +880,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc, } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; + monc->client->msgr.inst.name.num = cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b098e7b591f0..cca4c7f1c780 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -639,7 +639,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(osdc->client->msgr, &osd->o_con); + ceph_con_init(&osdc->client->msgr, &osd->o_con); osd->o_con.private = osd; osd->o_con.ops = &osd_con_ops; osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; @@ -1391,7 +1391,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) epoch, maplen); newmap = osdmap_apply_incremental(&p, next, osdc->osdmap, - osdc->client->msgr); + &osdc->client->msgr); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; -- cgit v1.2.3 From 0a305e496059a113f93bdd3ad27a5aaa917fe34d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Jun 2012 11:17:59 +0100 Subject: GFS2: Extend the life of the reservations This patch lengthens the lifespan of the reservations structure for inodes. Before, they were allocated and deallocated for every write operation. With this patch, they are allocated when the first write occurs, and deallocated when the last process closes the file. It's more efficient to do it this way because it saves GFS2 a lot of unnecessary allocates and frees. It also gives us more flexibility for the future: (1) we can now fold the qadata structure back into the structure and save those alloc/frees, (2) we can use this for multi-block reservations. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 2 +- fs/gfs2/file.c | 23 ++++++++++++++++--- fs/gfs2/inode.c | 23 ++++++++++++++++++- fs/gfs2/quota.c | 13 +++++++++-- fs/gfs2/rgrp.c | 68 +++++++++++++++++++++++++++++++-------------------------- fs/gfs2/rgrp.h | 10 +++++++++ fs/gfs2/super.c | 2 ++ fs/gfs2/trans.h | 2 +- 8 files changed, 104 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e80a464850c8..aba77b5720bc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (ip->i_res) + if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); if (qa) { gfs2_quota_unlock(ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f6efc1..37906174d417 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_file *fp; + struct gfs2_inode *ip = GFS2_I(inode); fp = file->private_data; file->private_data = NULL; + if ((file->f_mode & FMODE_WRITE) && ip->i_res && + (atomic_read(&inode->i_writecount) == 1)) + gfs2_rs_delete(ip); + if (gfs2_assert_warn(sdp, fp)) return -EIO; @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + int ret; + + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; if (file->f_flags & O_APPEND) { - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder gh; - int ret; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba2444e077..2a1b4b5a648c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + error = gfs2_rs_alloc(dip); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + /* the new inode needs a reservation so it can allocate xattrs. */ + error = gfs2_rs_alloc(GFS2_I(inode)); + if (error) + goto fail_gunlock2; + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_trans_end(sdp); /* Check if we reserved space in the rgrp. Function link_dinode may not, depending on whether alloc is required. */ - if (dip->i_res) + if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_qadata_put(dip); @@ -819,6 +828,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; + error = gfs2_rs_alloc(dip); + if (error) + return error; + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); @@ -1234,6 +1247,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; + error = gfs2_rs_alloc(ndip); + if (error) + return error; + if (odip != ndip) { error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); @@ -1644,6 +1661,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) return error; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b97178e7d397..197cc2dade7f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -764,6 +764,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); @@ -1549,10 +1553,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out_put; + mutex_lock(&ip->i_inode.i_mutex); error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); if (error) - goto out_put; + goto out_unlockput; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) goto out_q; @@ -1609,8 +1617,9 @@ out_i: gfs2_glock_dq_uninit(&i_gh); out_q: gfs2_glock_dq_uninit(&q_gh); -out_put: +out_unlockput: mutex_unlock(&ip->i_inode.i_mutex); +out_put: qd_put(qd); return error; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9bd1973..e944fefbc9a8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + + down_write(&ip->i_rw_mutex); + if (!ip->i_res) { + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * gfs2_rs_delete - delete a reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -992,22 +1025,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) return ip->i_qadata; } -/** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -static int gfs2_blkrsv_get(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; -} - /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error; + int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; rgd = ip->i_rgd; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4b10f4de25f..d9eda5f9ef2a 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); +extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); +/* This is how to tell if a reservation is "inplace" reserved: */ +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +{ + if (ip->i_res && ip->i_res->rs_requested) + return 1; + return 0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 713e621c240b..65578df29446 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1554,6 +1554,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); + gfs2_rs_delete(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; + ip->i_res = NULL; } return &ip->i_inode; } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 125d4572e1c0..41f42cdccbb8 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -31,7 +31,7 @@ struct gfs2_glock; static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { const struct gfs2_blkreserv *rs = ip->i_res; - if (rs->rs_requested < ip->i_rgd->rd_length) + if (rs && rs->rs_requested < ip->i_rgd->rd_length) return rs->rs_requested + 1; return ip->i_rgd->rd_length; } -- cgit v1.2.3 From 5407e24229408d7586ee451a384fc13e4a2332be Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 18 May 2012 09:28:23 -0400 Subject: GFS2: Fold quota data into the reservations struct This patch moves the ancillary quota data structures into the block reservations structure. This saves GFS2 some time and effort in allocating and deallocating the qadata structure. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 16 ++------------- fs/gfs2/bmap.c | 18 +++++------------ fs/gfs2/dir.c | 9 +-------- fs/gfs2/file.c | 22 ++++++--------------- fs/gfs2/incore.h | 13 +++++-------- fs/gfs2/inode.c | 44 ++++-------------------------------------- fs/gfs2/main.c | 1 - fs/gfs2/quota.c | 59 ++++++++++++++++++++++++++++---------------------------- fs/gfs2/rgrp.c | 19 ------------------ fs/gfs2/rgrp.h | 8 -------- fs/gfs2/super.c | 9 +-------- fs/gfs2/xattr.c | 24 ++--------------------- 12 files changed, 56 insertions(+), 186 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index aba77b5720bc..d6526347d386 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; - struct gfs2_qadata *qa = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); struct page *page; @@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { - qa = gfs2_qadata_get(ip); - if (!qa) { - error = -ENOMEM; - goto out_unlock; - } - error = gfs2_quota_lock_check(ip); if (error) - goto out_alloc_put; + goto out_unlock; error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (error) @@ -708,8 +701,6 @@ out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); } out_unlock: if (&ip->i_inode == sdp->sd_rindex) { @@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct buffer_head *dibh; - struct gfs2_qadata *qa = ip->i_qadata; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -880,10 +870,8 @@ failed: gfs2_trans_end(sdp); if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); - if (qa) { + if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); - gfs2_qadata_put(ip); - } if (inode == sdp->sd_rindex) { gfs2_glock_dq(&m_ip->i_gh); gfs2_holder_uninit(&m_ip->i_gh); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index dab54099dd98..6d957a86482b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1045,12 +1045,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; find_metapath(sdp, lblock, &mp, ip->i_height); - if (!gfs2_qadata_get(ip)) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out; + return error; while (height--) { struct strip_mine sm; @@ -1064,8 +1065,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) gfs2_quota_unhold(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1167,19 +1166,14 @@ static int do_grow(struct inode *inode, u64 size) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; - struct gfs2_qadata *qa = NULL; int error; int unstuff = 0; if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - qa = gfs2_qadata_get(ip); - if (qa == NULL) - return -ENOMEM; - error = gfs2_quota_lock_check(ip); if (error) - goto do_grow_alloc_put; + return error; error = gfs2_inplace_reserve(ip, 1); if (error) @@ -1214,8 +1208,6 @@ do_grow_release: gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); -do_grow_alloc_put: - gfs2_qadata_put(ip); } return error; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8aaeb07a07b5..259b088cfc4c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (!ht) return -ENOMEM; - if (!gfs2_qadata_get(dip)) { - error = -ENOMEM; - goto out; - } - error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out_put; + goto out; /* Count the number of leaves */ bh = leaf_bh; @@ -1942,8 +1937,6 @@ out_rg_gunlock: out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); -out_put: - gfs2_qadata_put(dip); out: kfree(ht); return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 37906174d417..26e2905070ed 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -366,7 +366,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; - struct gfs2_qadata *qa; loff_t size; int ret; @@ -397,14 +396,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_unlock; } - ret = -ENOMEM; - qa = gfs2_qadata_get(ip); - if (qa == NULL) + ret = gfs2_rindex_update(sdp); + if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) - goto out_alloc_put; + goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (ret) @@ -451,8 +449,6 @@ out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&gh); out: @@ -764,7 +760,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct gfs2_inode *ip = GFS2_I(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; - struct gfs2_qadata *qa; int error; const loff_t pos = offset; const loff_t count = len; @@ -804,15 +799,13 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, offset += bytes; continue; } - qa = gfs2_qadata_get(ip); - if (!qa) { - error = -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) goto out_unlock; - } error = gfs2_quota_lock_check(ip); if (error) - goto out_alloc_put; + goto out_unlock; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); @@ -852,7 +845,6 @@ retry: offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); - gfs2_qadata_put(ip); } if (error == 0) @@ -863,8 +855,6 @@ out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&ip->i_gh); out_uninit: diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 67fd6beffece..5cda51a3e3bd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -289,16 +289,14 @@ struct gfs2_glock { #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ -struct gfs2_qadata { /* quota allocation data */ - /* Quota stuff */ - struct gfs2_quota_data *qa_qd[2*MAXQUOTAS]; - struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS]; - unsigned int qa_qd_num; -}; - struct gfs2_blkreserv { u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */ + + /* ancillary quota stuff */ + struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; + struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; + unsigned int rs_qa_qd_num; }; enum { @@ -319,7 +317,6 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ - struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_blkreserv *i_res; /* resource group block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2a1b4b5a648c..2b035e0959b2 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -521,12 +521,10 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, int error; munge_mode_uid_gid(dip, &mode, &uid, &gid); - if (!gfs2_qadata_get(dip)) - return -ENOMEM; error = gfs2_quota_lock(dip, uid, gid); if (error) - goto out; + return error; error = gfs2_quota_check(dip, uid, gid); if (error) @@ -542,8 +540,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, out_quota: gfs2_quota_unlock(dip); -out: - gfs2_qadata_put(dip); return error; } @@ -551,15 +547,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_qadata *qa; int alloc_required; struct buffer_head *dibh; int error; - qa = gfs2_qadata_get(dip); - if (!qa) - return -ENOMEM; - error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) goto fail; @@ -611,7 +602,6 @@ fail_quota_locks: gfs2_quota_unlock(dip); fail: - gfs2_qadata_put(dip); return error; } @@ -734,7 +724,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); - gfs2_qadata_put(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); d_instantiate(dentry, inode); @@ -883,16 +872,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = 0; if (alloc_required) { - struct gfs2_qadata *qa = gfs2_qadata_get(dip); - - if (!qa) { - error = -ENOMEM; - goto out_gunlock; - } - error = gfs2_quota_lock_check(dip); if (error) - goto out_alloc; + goto out_gunlock; error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); if (error) @@ -935,9 +917,6 @@ out_ipres: out_gunlock_q: if (alloc_required) gfs2_quota_unlock(dip); -out_alloc: - if (alloc_required) - gfs2_qadata_put(dip); out_gunlock: gfs2_glock_dq(ghs + 1); out_child: @@ -1374,16 +1353,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (alloc_required) { - struct gfs2_qadata *qa = gfs2_qadata_get(ndip); - - if (!qa) { - error = -ENOMEM; - goto out_gunlock; - } - error = gfs2_quota_lock_check(ndip); if (error) - goto out_alloc; + goto out_gunlock; error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); if (error) @@ -1444,9 +1416,6 @@ out_ipreserv: out_gunlock_q: if (alloc_required) gfs2_quota_unlock(ndip); -out_alloc: - if (alloc_required) - gfs2_qadata_put(ndip); out_gunlock: while (x--) { gfs2_glock_dq(ghs + x); @@ -1607,12 +1576,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) ogid = ngid = NO_QUOTA_CHANGE; - if (!gfs2_qadata_get(ip)) - return -ENOMEM; - error = gfs2_quota_lock(ip, nuid, ngid); if (error) - goto out_alloc; + return error; if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1638,8 +1604,6 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); -out_alloc: - gfs2_qadata_put(ip); return error; } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 6cdb0f2a1b09..e04d0e09ee7b 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo) inode_init_once(&ip->i_inode); init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); - ip->i_qadata = NULL; ip->i_res = NULL; ip->i_hash_cache = NULL; } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 197cc2dade7f..7d1ede7b66fc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd) int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; - struct gfs2_quota_data **qd = qa->qa_qd; + struct gfs2_quota_data **qd; int error; - if (gfs2_assert_warn(sdp, !qa->qa_qd_num) || + if (ip->i_res == NULL) + gfs2_rs_alloc(ip); + + qd = ip->i_res->rs_qa_qd; + + if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) return -EIO; @@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; } @@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; } @@ -542,16 +546,17 @@ out: void gfs2_quota_unhold(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; unsigned int x; + if (ip->i_res == NULL) + return; gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); - for (x = 0; x < qa->qa_qd_num; x++) { - qdsb_put(qa->qa_qd[x]); - qa->qa_qd[x] = NULL; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qdsb_put(ip->i_res->rs_qa_qd[x]); + ip->i_res->rs_qa_qd[x] = NULL; } - qa->qa_qd_num = 0; + ip->i_res->rs_qa_qd_num = 0; } static int sort_qd(const void *a, const void *b) @@ -919,7 +924,6 @@ fail: int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; int error = 0; @@ -932,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *), - sort_qd, NULL); + sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num, + sizeof(struct gfs2_quota_data *), sort_qd, NULL); - for (x = 0; x < qa->qa_qd_num; x++) { + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { int force = NO_FORCE; - qd = qa->qa_qd[x]; + qd = ip->i_res->rs_qa_qd[x]; if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) force = FORCE; - error = do_glock(qd, force, &qa->qa_qd_ghs[x]); + error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); if (error) break; } @@ -949,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) set_bit(GIF_QD_LOCKED, &ip->i_flags); else { while (x--) - gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); gfs2_quota_unhold(ip); } @@ -994,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; @@ -1002,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; - for (x = 0; x < qa->qa_qd_num; x++) { + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { struct gfs2_quota_data *qd; int sync; - qd = qa->qa_qd[x]; + qd = ip->i_res->rs_qa_qd[x]; sync = need_sync(qd); - gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); if (sync && qd_trylock(qd)) qda[count++] = qd; @@ -1042,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type) int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; s64 value; unsigned int x; @@ -1054,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - for (x = 0; x < qa->qa_qd_num; x++) { - qd = qa->qa_qd[x]; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qd = ip->i_res->rs_qa_qd[x]; if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) @@ -1093,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid) { - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; @@ -1102,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - for (x = 0; x < qa->qa_qd_num; x++) { - qd = qa->qa_qd[x]; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qd = ip->i_res->rs_qa_qd[x]; if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e944fefbc9a8..9eca6a9cff8f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1006,25 +1006,6 @@ out: return ret; } -/** - * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int error; - BUG_ON(ip->i_qadata != NULL); - ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - return ip->i_qadata; -} - /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index d9eda5f9ef2a..5d8314dbc899 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -29,14 +29,6 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); -extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip); -static inline void gfs2_qadata_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_qadata == NULL); - kfree(ip->i_qadata); - ip->i_qadata = NULL; -} - extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); extern void gfs2_inplace_release(struct gfs2_inode *ip); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 65578df29446..81fc76264ed4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1399,7 +1399,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) static int gfs2_dinode_dealloc(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa; struct gfs2_rgrpd *rgd; struct gfs2_holder gh; int error; @@ -1409,13 +1408,9 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) return -EIO; } - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out; + return error; rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) { @@ -1443,8 +1438,6 @@ out_rg_gunlock: gfs2_glock_dq_uninit(&gh); out_qs: gfs2_quota_unhold(ip); -out: - gfs2_qadata_put(ip); return error; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 927f4df874ae..523c0de0d805 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -325,13 +325,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, int leave) { - struct gfs2_qadata *qa; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) goto out_alloc; @@ -340,7 +335,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, gfs2_quota_unhold(ip); out_alloc: - gfs2_qadata_put(ip); return error; } @@ -713,17 +707,12 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { - struct gfs2_qadata *qa; struct buffer_head *dibh; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; - error = gfs2_quota_lock_check(ip); if (error) - goto out; + return error; error = gfs2_inplace_reserve(ip, blks); if (error) @@ -753,8 +742,6 @@ out_ipres: gfs2_inplace_release(ip); out_gunlock_q: gfs2_quota_unlock(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1494,16 +1481,11 @@ out_gunlock: int gfs2_ea_dealloc(struct gfs2_inode *ip) { - struct gfs2_qadata *qa; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out_alloc; + return error; error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) @@ -1519,8 +1501,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) out_quota: gfs2_quota_unhold(ip); -out_alloc: - gfs2_qadata_put(ip); return error; } -- cgit v1.2.3 From 23d0bb834e264f38335f19fe601564b8422431e7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 May 2012 15:26:56 +0100 Subject: GFS2: Add "top dir" flag support This patch adds support for the "top dir" flag. Currently this is unused but a subsequent patch is planned which will add support for the Orlov allocation policy when allocating subdirectories in a parent with this flag set. In order to ensure backward compatible behaviour, mkfs.gfs2 does not currently tag the root directory with this flag, it must always be set manually. Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 4 ++++ include/linux/gfs2_ondisk.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 26e2905070ed..6fbf3cbd974d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = { [7] = GFS2_DIF_NOATIME, [12] = GFS2_DIF_EXHASH, [14] = GFS2_DIF_INHERIT_JDATA, + [17] = GFS2_DIF_TOPDIR, }; static const u32 gfs2_to_fsflags[32] = { @@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = { [gfs2fl_AppendOnly] = FS_APPEND_FL, [gfs2fl_NoAtime] = FS_NOATIME_FL, [gfs2fl_ExHash] = FS_INDEX_FL, + [gfs2fl_TopLevel] = FS_TOPDIR_FL, [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, }; @@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode) GFS2_DIF_NOATIME| \ GFS2_DIF_SYNC| \ GFS2_DIF_SYSTEM| \ + GFS2_DIF_TOPDIR| \ GFS2_DIF_INHERIT_JDATA) /** @@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); if (!S_ISDIR(inode->i_mode)) { + gfsflags &= ~GFS2_DIF_TOPDIR; if (gfsflags & GFS2_DIF_INHERIT_JDATA) gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); return do_gfs2_set_flags(filp, gfsflags, ~0); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index fa98bdb073b9..e8ccf6ff3b4d 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -214,6 +214,7 @@ enum { gfs2fl_NoAtime = 7, gfs2fl_Sync = 8, gfs2fl_System = 9, + gfs2fl_TopLevel = 10, gfs2fl_TruncInProg = 29, gfs2fl_InheritDirectio = 30, gfs2fl_InheritJdata = 31, @@ -230,8 +231,9 @@ enum { #define GFS2_DIF_NOATIME 0x00000080 #define GFS2_DIF_SYNC 0x00000100 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */ +#define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */ #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */ -#define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 +#define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */ #define GFS2_DIF_INHERIT_JDATA 0x80000000 struct gfs2_dinode { -- cgit v1.2.3 From 1b8ba31a88c5115687095ca2a01bfcaecb489b5a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 May 2012 10:47:51 +0100 Subject: GFS2: Fix error handling when reading an invalid block from the journal When we read an invalid block from the journal, we should not call withdraw, but simply print a message and return an error. It is up to the caller to then handle that error. In the case of mount that means a failed mount, rather than a withdraw (requiring a reboot). In the case of recovering another nodes journal then we return an error via the uevent. Signed-off-by: Steven Whitehouse --- fs/gfs2/lops.c | 9 +++++++-- fs/gfs2/util.h | 18 +++++++----------- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 852c1be1dd3b..8ff95a2d54ee 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) goto out; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - gfs2_meta_check(sdp, bd->bd_bh); - gfs2_pin(sdp, bd->bd_bh); mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { + printk(KERN_ERR + "Attempting to add uninitialised block to journal (inplace block=%lld)\n", + (unsigned long long)bd->bd_bh->b_blocknr); + BUG(); + } + gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 3586b0dd6aa7..80535739ac7b 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *type, const char *function, char *file, unsigned int line); -static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, - struct buffer_head *bh, - const char *function, - char *file, unsigned int line) +static inline int gfs2_meta_check(struct gfs2_sbd *sdp, + struct buffer_head *bh) { struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; u32 magic = be32_to_cpu(mh->mh_magic); - if (unlikely(magic != GFS2_MAGIC)) - return gfs2_meta_check_ii(sdp, bh, "magic number", function, - file, line); + if (unlikely(magic != GFS2_MAGIC)) { + printk(KERN_ERR "GFS2: Magic number missing at %llu\n", + (unsigned long long)bh->b_blocknr); + return -EIO; + } return 0; } -#define gfs2_meta_check(sdp, bh) \ -gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__) - - int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, u16 type, u16 t, const char *function, -- cgit v1.2.3 From 1bfd89f4e6e1adc6a782d94aa5d4c53be1e404d7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 May 2012 23:26:43 -0500 Subject: libceph: fully initialize connection in con_init() Move the initialization of a ceph connection's private pointer, operations vector pointer, and peer name information into ceph_con_init(). Rearrange the arguments so the connection pointer is first. Hide the byte-swapping of the peer entity number inside ceph_con_init() Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/mds_client.c | 7 ++----- include/linux/ceph/messenger.h | 6 ++++-- net/ceph/messenger.c | 9 ++++++++- net/ceph/mon_client.c | 8 +++----- net/ceph/osd_client.c | 7 ++----- 5 files changed, 19 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ad30261cd4c0..ecd7f15741c1 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,11 +394,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); - s->s_con.private = s; - s->s_con.ops = &mds_con_ops; - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; - s->s_con.peer_name.num = cpu_to_le64(mds); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, + CEPH_ENTITY_TYPE_MDS, mds); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 5e852f444f68..dd27837f79ac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -227,8 +227,10 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u32 required_features, bool nocrc); -extern void ceph_con_init(struct ceph_messenger *msgr, - struct ceph_connection *con); +extern void ceph_con_init(struct ceph_connection *con, void *private, + const struct ceph_connection_operations *ops, + struct ceph_messenger *msgr, __u8 entity_type, + __u64 entity_num); extern void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr); extern bool ceph_con_opened(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 36b440a00cc2..3b65f6e6911b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -521,15 +521,22 @@ void ceph_con_put(struct ceph_connection *con) /* * initialize a new connection. */ -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) +void ceph_con_init(struct ceph_connection *con, void *private, + const struct ceph_connection_operations *ops, + struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num) { dout("con_init %p\n", con); memset(con, 0, sizeof(*con)); + con->private = private; + con->ops = ops; atomic_set(&con->nref, 1); con->msgr = msgr; con_sock_state_init(con); + con->peer_name.type = (__u8) entity_type; + con->peer_name.num = cpu_to_le64(entity_num); + mutex_init(&con->mutex); INIT_LIST_HEAD(&con->out_queue); INIT_LIST_HEAD(&con->out_sent); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 6adbea78b168..ab6b24a5169e 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -142,11 +142,9 @@ static int __open_session(struct ceph_mon_client *monc) monc->sub_renew_after = jiffies; /* i.e., expired */ monc->want_next_osdmap = !!monc->want_next_osdmap; - ceph_con_init(&monc->client->msgr, &monc->con); - monc->con.private = monc; - monc->con.ops = &mon_con_ops; - monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON; - monc->con.peer_name.num = cpu_to_le64(monc->cur_mon); + ceph_con_init(&monc->con, monc, &mon_con_ops, + &monc->client->msgr, + CEPH_ENTITY_TYPE_MON, monc->cur_mon); dout("open_session mon%d opening\n", monc->cur_mon); ceph_con_open(&monc->con, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5b41a6929cd9..448c9da8beff 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -640,11 +640,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(&osdc->client->msgr, &osd->o_con); - osd->o_con.private = osd; - osd->o_con.ops = &osd_con_ops; - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; - osd->o_con.peer_name.num = cpu_to_le64(onum); + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, + CEPH_ENTITY_TYPE_OSD, onum); INIT_LIST_HEAD(&osd->o_keepalive_item); return osd; -- cgit v1.2.3 From df5d2f5560a9c33129391a136ed9f0ac26abe69b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 7 Jun 2012 13:30:16 +0100 Subject: GFS2: Increase buffer size for glocks and glstats debugfs files As per Al Viro's suggestion, this increases the buffer size used for these two files. This provides a speed up of slightly less than 8x (i.e. proportional to the buffer size) for cases when we have large numbers of glocks. Cc: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index dab2526071cc..1c4cddf42a66 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1972,6 +1972,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = 8*PAGE_SIZE; } return ret; } @@ -1984,6 +1987,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = 8*PAGE_SIZE; } return ret; } -- cgit v1.2.3 From ba1ddcb6ca0c46edd275790d1e4e2cfd6219ce19 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 8 Jun 2012 11:16:22 +0100 Subject: GFS2: Cache last hash bucket for glock seq_files For the glocks and glstats seq_files, which are exposed via debugfs we should cache the most recent hash bucket, along with the offset into that bucket. This allows us to restart from that point, rather than having to begin at the beginning each time. This is an idea from Eric Dumazet, however I've slightly extended it so that if the position from which we are due to start is at any point beyond the last cached point, we start from the last cached point, plus whatever is the appropriate offset. I don't really expect people to be lseeking around these files, but if they did so with only positive offsets, then we'd still get some of the benefit of using a cached offset. With my simple test of around 200k entries in the file, I'm seeing an approx 10x speed up. Cc: Eric Dumazet Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1c4cddf42a66..3ad8cb3eeb88 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -46,10 +46,12 @@ #include "trace_gfs2.h" struct gfs2_glock_iter { - int hash; /* hash bucket index */ - struct gfs2_sbd *sdp; /* incore superblock */ - struct gfs2_glock *gl; /* current glock struct */ - char string[512]; /* scratch space */ + int hash; /* hash bucket index */ + unsigned nhash; /* Index within current bucket */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + loff_t last_pos; /* last position */ + char string[512]; /* scratch space */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); @@ -950,7 +952,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) if (seq) { struct gfs2_glock_iter *gi = seq->private; vsprintf(gi->string, fmt, args); - seq_printf(seq, gi->string); + seq_puts(seq, gi->string); } else { vaf.fmt = fmt; vaf.va = &args; @@ -1854,8 +1856,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gl = gi->gl; if (gl) { gi->gl = glock_hash_next(gl); + gi->nhash++; } else { + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; } while (gi->gl == NULL) { gi->hash++; @@ -1864,6 +1872,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) return 1; } gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; } /* Skip entries for other sb and dead entries */ } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); @@ -1876,7 +1885,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - gi->hash = 0; + if (gi->last_pos <= *pos) + n = gi->nhash + (*pos - gi->last_pos); + else + gi->hash = 0; + + gi->nhash = 0; rcu_read_lock(); do { @@ -1884,6 +1898,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) return NULL; } while (n--); + gi->last_pos = *pos; return gi->gl; } @@ -1893,7 +1908,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, struct gfs2_glock_iter *gi = seq->private; (*pos)++; - + gi->last_pos = *pos; if (gfs2_glock_iter_next(gi)) return NULL; -- cgit v1.2.3 From 90306c41dc3d8e5f12ecd0193dae99e0e7f6e896 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 29 May 2012 23:01:09 -0500 Subject: GFS2: Use lvbs for storing rgrp information with mount option Instead of reading in the resource groups when gfs2 is checking for free space to allocate from, gfs2 can store the necessary infromation in the resource group's lvb. Also, instead of searching for unlinked inodes in every resource group that's checked for free space, gfs2 can store the number of unlinked but inodes in the lvb, and only check for unlinked inodes if it will find some. The first time a resource group is locked, the lvb must initialized. Since this involves counting the unlinked inodes in the resource group, this takes a little extra time. But after that, if the resource group is locked with GL_SKIP, the buffer head won't be read in unless it's actually needed. Enabling the resource groups lvbs is done via the rgrplvb mount option. If this option isn't set, the lvbs will still be set and updated, but they won't be verfied or used by the filesystem. To safely turn on this option, all of the nodes mounting the filesystem must be running code with this patch, and the filesystem must have been completely unmounted since they were updated. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 1 + fs/gfs2/incore.h | 2 + fs/gfs2/rgrp.c | 147 +++++++++++++++++++++++++++++++++++++++++--- fs/gfs2/super.c | 12 ++++ include/linux/gfs2_ondisk.h | 10 +++ 5 files changed, 163 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3ad8cb3eeb88..10ae1645d9a5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); + memset(gl->gl_lvb, 0, 32 * sizeof(char)); gl->gl_lksb.sb_lvbptr = gl->gl_lvb; gl->gl_tchange = jiffies; gl->gl_object = NULL; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5cda51a3e3bd..dc730700b3b4 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -89,6 +89,7 @@ struct gfs2_rgrpd { u64 rd_igeneration; struct gfs2_bitmap *rd_bits; struct gfs2_sbd *rd_sbd; + struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ @@ -470,6 +471,7 @@ struct gfs2_args { unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ + unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ int ar_commit; /* Commit interval */ int ar_statfs_quantum; /* The fast statfs interval */ int ar_quota_quantum; /* The quota interval */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9eca6a9cff8f..3c6f7ed16a3b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -660,6 +660,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; @@ -769,9 +770,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } +static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + + if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || + rgl->rl_dinodes != str->rg_dinodes || + rgl->rl_igeneration != str->rg_igeneration) + return 0; + return 1; +} + +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + +static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; + rgl->rl_unlinked = cpu_to_be32(unlinked); +} + +static u32 count_unlinked(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + const u32 length = rgd->rd_length; + const u8 *buffer = NULL; + u32 i, goal, count = 0; + + for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { + goal = 0; + buffer = bi->bi_bh->b_data + bi->bi_offset; + WARN_ON(!buffer_uptodate(bi->bi_bh)); + while (goal < bi->bi_len * GFS2_NBBY) { + goal = gfs2_bitfit(buffer, bi->bi_len, goal, + GFS2_BLKST_UNLINKED); + if (goal == BFITNOENT) + break; + count++; + goal++; + } + } + + return count; +} + + /** - * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps - * @gh: The glock holder for the resource group + * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps + * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. @@ -779,9 +836,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) * Returns: errno */ -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; @@ -789,6 +845,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) unsigned int x, y; int error; + if (rgd->rd_bits[0].bi_bh != NULL) + return 0; + for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); @@ -815,7 +874,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, + rgd->rd_bits[0].bi_bh->b_data); + } + else if (sdp->sd_args.ar_rgrplvb) { + if (!gfs2_rgrp_lvb_valid(rgd)){ + gfs2_consist_rgrpd(rgd); + error = -EIO; + goto fail; + } + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + } return 0; fail: @@ -829,6 +901,39 @@ fail: return error; } +int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +{ + u32 rl_flags; + + if (rgd->rd_flags & GFS2_RDF_UPTODATE) + return 0; + + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + return gfs2_rgrp_bh_get(rgd); + + rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); + rl_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgd->rd_free_clone = rgd->rd_free; + rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); + rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); + return 0; +} + +int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; + struct gfs2_sbd *sdp = rgd->rd_sbd; + + if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) + return 0; + return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); +} + /** * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @gh: The glock holder for the resource group @@ -842,8 +947,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) for (x = 0; x < length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - brelse(bi->bi_bh); - bi->bi_bh = NULL; + if (bi->bi_bh) { + brelse(bi->bi_bh); + bi->bi_bh = NULL; + } } } @@ -987,6 +1094,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, bh, 1); gfs2_rgrp_out(rgd, bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -1116,6 +1224,9 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) int error, rg_locked, flags = LM_FLAG_TRY; int loops = 0; + if (sdp->sd_args.ar_rgrplvb) + flags |= GL_SKIP; + if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) rgd = begin = ip->i_rgd; else @@ -1133,22 +1244,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) } else { error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); + if (!error && sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rgd); + if (error) { + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + return error; + } + } } switch (error) { case 0: if (try_rgrp_fit(rgd, ip)) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rgd); ip->i_rgd = rgd; return 0; } - if (rgd->rd_flags & GFS2_RDF_CHECK) + if (rgd->rd_flags & GFS2_RDF_CHECK) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rgd); try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); + } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); if (rgd == begin) { - flags = 0; + flags &= ~LM_FLAG_TRY; loops++; } break; @@ -1529,6 +1652,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -1575,6 +1699,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -1611,6 +1736,8 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, 1); } static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) @@ -1630,6 +1757,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, -1); gfs2_statfs_change(sdp, 0, +1, -1); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 81fc76264ed4..788068758f3a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -78,6 +78,8 @@ enum { Opt_quota_quantum, Opt_barrier, Opt_nobarrier, + Opt_rgrplvb, + Opt_norgrplvb, Opt_error, }; @@ -115,6 +117,8 @@ static const match_table_t tokens = { {Opt_quota_quantum, "quota_quantum=%d"}, {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, + {Opt_rgrplvb, "rgrplvb"}, + {Opt_norgrplvb, "norgrplvb"}, {Opt_error, NULL} }; @@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) case Opt_nobarrier: args->ar_nobarrier = 1; break; + case Opt_rgrplvb: + args->ar_rgrplvb = 1; + break; + case Opt_norgrplvb: + args->ar_rgrplvb = 0; + break; case Opt_error: default: printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); @@ -1379,6 +1389,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",nobarrier"); if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) seq_printf(s, ",demote_interface_used"); + if (args->ar_rgrplvb) + seq_printf(s, ",rgrplvb"); return 0; } diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index e8ccf6ff3b4d..b2de1f9a88d6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -170,6 +170,16 @@ struct gfs2_rindex { #define GFS2_RGF_NOALLOC 0x00000008 #define GFS2_RGF_TRIMMED 0x00000010 +struct gfs2_rgrp_lvb { + __be32 rl_magic; + __be32 rl_flags; + __be32 rl_free; + __be32 rl_dinodes; + __be64 rl_igeneration; + __be32 rl_unlinked; + __be32 __pad; +}; + struct gfs2_rgrp { struct gfs2_meta_header rg_header; -- cgit v1.2.3 From 331cbdeedeb2f4ef01ccb761513708af0fe77098 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 9 Jun 2012 11:10:55 +0800 Subject: writeback: Fix some comment errors Signed-off-by: Wanpeng Li Signed-off-by: Fengguang Wu --- fs/fs-writeback.c | 4 ++-- fs/super.c | 2 +- fs/sync.c | 2 +- mm/page-writeback.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 41a3ccff18d8..0b2c87e08e90 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -628,8 +628,8 @@ static long writeback_sb_inodes(struct super_block *sb, } /* - * Don't bother with new inodes or inodes beeing freed, first - * kind does not need peridic writeout yet, and for the latter + * Don't bother with new inodes or inodes being freed, first + * kind does not need periodic writeout yet, and for the latter * kind writeout is handled by the freer. */ spin_lock(&inode->i_lock); diff --git a/fs/super.c b/fs/super.c index cf001775617f..3d65443aea8c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -318,7 +318,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock) /* * grab_super_passive - acquire a passive reference - * @s: reference we are trying to grab + * @sb: reference we are trying to grab * * Tries to acquire a passive reference. This is used in places where we * cannot take an active reference but we need to ensure that the diff --git a/fs/sync.c b/fs/sync.c index 11e3d1c44901..1830704df071 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -92,7 +92,7 @@ static void sync_filesystems(int wait) } /* - * sync everything. Start out by waking pdflush, because that writes back + * sync everything. Start out by waking flusher, because that writes back * all queues in parallel. */ SYSCALL_DEFINE0(sync) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ec14419e53b5..e5363f34e025 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -946,7 +946,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, * bdi->dirty_ratelimit = balanced_dirty_ratelimit; * * However to get a more stable dirty_ratelimit, the below elaborated - * code makes use of task_ratelimit to filter out sigular points and + * code makes use of task_ratelimit to filter out singular points and * limit the step size. * * The below code essentially only uses the relative value of @@ -969,7 +969,7 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, * feel and care are stable dirty rate and small position error. * * |task_ratelimit - dirty_ratelimit| is used to limit the step size - * and filter out the sigular points of balanced_dirty_ratelimit. Which + * and filter out the singular points of balanced_dirty_ratelimit. Which * keeps jumping around randomly and can even leap far away at times * due to the small 200ms estimation period of dirty_rate (we want to * keep that period small to reduce time lags). -- cgit v1.2.3 From a4808147dcf1ecf2f76212a78fd9692b3c112f47 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 11 Jun 2012 13:16:35 +0100 Subject: seq_file: Add seq_vprintf function and export it The existing seq_printf function is rewritten in terms of the new seq_vprintf which is also exported to modules. This allows GFS2 (and potentially other seq_file users) to have a vprintf based interface and to avoid an extra copy into a temporary buffer in some cases. Signed-off-by: Steven Whitehouse Reported-by: Eric Dumazet Acked-by: Al Viro --- fs/seq_file.c | 18 ++++++++++++++---- include/linux/seq_file.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/seq_file.c b/fs/seq_file.c index 0cbd0494b79e..14cf9de1dbe1 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc) } EXPORT_SYMBOL(seq_escape); -int seq_printf(struct seq_file *m, const char *f, ...) +int seq_vprintf(struct seq_file *m, const char *f, va_list args) { - va_list args; int len; if (m->count < m->size) { - va_start(args, f); len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); - va_end(args); if (m->count + len < m->size) { m->count += len; return 0; @@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...) seq_set_overflow(m); return -1; } +EXPORT_SYMBOL(seq_vprintf); + +int seq_printf(struct seq_file *m, const char *f, ...) +{ + int ret; + va_list args; + + va_start(args, f); + ret = seq_vprintf(m, f, args); + va_end(args); + + return ret; +} EXPORT_SYMBOL(seq_printf); /** diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index fc61854f6224..83c44eefe698 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -86,6 +86,7 @@ int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); +__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); -- cgit v1.2.3 From 1bb49303b7a82eb9bce0595087523343683abdf0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 11 Jun 2012 13:26:50 +0100 Subject: GFS2: Use seq_vprintf for glocks debugfs file Make use of the newly added seq_vprintf() function. Signed-off-by: Steven Whitehouse Reported-by: Eric Dumazet Acked-by: Al Viro --- fs/gfs2/glock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 10ae1645d9a5..4d5d63d9d2c0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -51,7 +51,6 @@ struct gfs2_glock_iter { struct gfs2_sbd *sdp; /* incore superblock */ struct gfs2_glock *gl; /* current glock struct */ loff_t last_pos; /* last position */ - char string[512]; /* scratch space */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); @@ -951,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) va_start(args, fmt); if (seq) { - struct gfs2_glock_iter *gi = seq->private; - vsprintf(gi->string, fmt, args); - seq_puts(seq, gi->string); + seq_vprintf(seq, fmt, args); } else { vaf.fmt = fmt; vaf.va = &args; -- cgit v1.2.3 From 0fe2f1e929ecabf834f4af2ffd300fe70700f4b3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 11 Jun 2012 13:49:47 +0100 Subject: GFS2: Size seq_file buffer more carefully This places a limit on the buffer size for archs with larger PAGE_SIZE. Signed-off-by: Steven Whitehouse Reported-by: Eric Dumazet --- fs/gfs2/glock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4d5d63d9d2c0..1ed81f40da0d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1977,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = { .show = gfs2_sbstats_seq_show, }; +#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) + static int gfs2_glocks_open(struct inode *inode, struct file *file) { int ret = seq_open_private(file, &gfs2_glock_seq_ops, @@ -1985,9 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; - seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN); + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) - seq->size = 8*PAGE_SIZE; + seq->size = GFS2_SEQ_GOODSIZE; } return ret; } @@ -2000,9 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; - seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN); + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) - seq->size = 8*PAGE_SIZE; + seq->size = GFS2_SEQ_GOODSIZE; } return ret; } -- cgit v1.2.3 From 0d515210b6969ecfc161f71a4515831d9a6e58f4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Jun 2012 10:27:41 -0400 Subject: GFS2: Add kobject release method This patch adds a kobject release function that properly maintains the kobject use count, so that accesses to the sysfs files do not cause an access to freed kernel memory after an unmount. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 36 ++++++++++++++++++++++++------------ fs/gfs2/sys.c | 21 ++++++++++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b8c250fc4922..9b2389756acd 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent } error = init_names(sdp, silent); - if (error) - goto fail; + if (error) { + /* In this case, we haven't initialized sysfs, so we have to + manually free the sdp. */ + free_percpu(sdp->sd_lkstats); + kfree(sdp); + sb->s_fs_info = NULL; + return error; + } snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); - gfs2_create_debugfs_file(sdp); - error = gfs2_sys_fs_add(sdp); + /* + * If we hit an error here, gfs2_sys_fs_add will have called function + * kobject_put which causes the sysfs usage count to go to zero, which + * causes sysfs to call function gfs2_sbd_release, which frees sdp. + * Subsequent error paths here will call gfs2_sys_fs_del, which also + * kobject_put to free sdp. + */ if (error) - goto fail; + return error; + + gfs2_create_debugfs_file(sdp); error = gfs2_lm_mount(sdp, silent); if (error) - goto fail_sys; + goto fail_debug; error = init_locking(sdp, &mount_gh, DO); if (error) @@ -1215,12 +1228,12 @@ fail_locking: fail_lm: gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); -fail_sys: - gfs2_sys_fs_del(sdp); -fail: +fail_debug: gfs2_delete_debugfs_file(sdp); free_percpu(sdp->sd_lkstats); - kfree(sdp); + /* gfs2_sys_fs_del must be the last thing we do, since it causes + * sysfs to call function gfs2_sbd_release, which frees sdp. */ + gfs2_sys_fs_del(sdp); sb->s_fs_info = NULL; return error; } @@ -1390,10 +1403,9 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); - kill_block_super(sb); gfs2_delete_debugfs_file(sdp); free_percpu(sdp->sd_lkstats); - kfree(sdp); + kill_block_super(sb); } struct file_system_type gfs2_fs_type = { diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9c2592b1d5ff..e4bee4bebbf6 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = { NULL, }; +static void gfs2_sbd_release(struct kobject *kobj) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + + kfree(sdp); +} + static struct kobj_type gfs2_ktype = { + .release = gfs2_sbd_release, .default_attrs = gfs2_attrs, .sysfs_ops = &gfs2_attr_ops, }; @@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) char ro[20]; char spectator[20]; char *envp[] = { ro, spectator, NULL }; + int sysfs_frees_sdp = 0; sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); @@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, "%s", sdp->sd_table_name); if (error) - goto fail; + goto fail_reg; + sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling + function gfs2_sbd_release. */ error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) goto fail_reg; @@ -615,9 +626,13 @@ fail_lock_module: fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_reg: - kobject_put(&sdp->sd_kobj); -fail: + free_percpu(sdp->sd_lkstats); fs_err(sdp, "error %d adding sysfs files", error); + if (sysfs_frees_sdp) + kobject_put(&sdp->sd_kobj); + else + kfree(sdp); + sb->s_fs_info = NULL; return error; } -- cgit v1.2.3 From a59d6293e5372d7c35212932e083e2a541151eff Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 23 May 2012 15:13:07 +0200 Subject: debugfs: change parameter check in debugfs_remove() functions The dentry parameter in debugfs_remove() and debugfs_remove_recursive() is checked being a NULL pointer. To make cleanup by callers easier this check is extended using the IS_ERR_OR_NULL macro instead because the debugfs_create_... functions can return a ERR_PTR() value. Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b80bc846a15a..0de5e26870c3 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -498,7 +498,7 @@ void debugfs_remove(struct dentry *dentry) struct dentry *parent; int ret; - if (!dentry) + if (IS_ERR_OR_NULL(dentry)) return; parent = dentry->d_parent; @@ -530,7 +530,7 @@ void debugfs_remove_recursive(struct dentry *dentry) struct dentry *child; struct dentry *parent; - if (!dentry) + if (IS_ERR_OR_NULL(dentry)) return; parent = dentry->d_parent; -- cgit v1.2.3 From f29e5956aebafe63f81e80f972c44c4a666e5c7f Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:19 -0700 Subject: pstore: Add console log messages support Pstore doesn't support logging kernel messages in run-time, it only dumps dmesg when kernel oopses/panics. This makes pstore useless for debugging hangs caused by HW issues or improper use of HW (e.g. weird device inserted -> driver tried to write a reserved bits -> SoC hanged. In that case we don't get any messages in the pstore. Therefore, let's add a runtime logging support: PSTORE_TYPE_CONSOLE. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Acked-by: Colin Cross Signed-off-by: Greg Kroah-Hartman --- fs/pstore/Kconfig | 7 +++++++ fs/pstore/inode.c | 3 +++ fs/pstore/platform.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/pstore.h | 1 + 4 files changed, 48 insertions(+) (limited to 'fs') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 23ade2680a4a..d044de6ee308 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -12,6 +12,13 @@ config PSTORE If you don't have a platform persistent store driver, say N. +config PSTORE_CONSOLE + bool "Log kernel console messages" + depends on PSTORE + help + When the option is enabled, pstore will log all kernel + messages, even if no oops or panic happened. + config PSTORE_RAM tristate "Log panic/oops to a RAM buffer" depends on PSTORE diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 11a2aa2a56c4..45bff5441b04 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -212,6 +212,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, case PSTORE_TYPE_DMESG: sprintf(name, "dmesg-%s-%lld", psname, id); break; + case PSTORE_TYPE_CONSOLE: + sprintf(name, "console-%s", psname); + break; case PSTORE_TYPE_MCE: sprintf(name, "mce-%s-%lld", psname, id); break; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 82c585f715e3..61461ed9b6c8 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -1,6 +1,7 @@ /* * Persistent Storage - platform driver interface parts. * + * Copyright (C) 2007-2008 Google, Inc. * Copyright (C) 2010 Intel Corporation * * This program is free software; you can redistribute it and/or modify @@ -22,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +158,40 @@ static struct kmsg_dumper pstore_dumper = { .dump = pstore_dump, }; +#ifdef CONFIG_PSTORE_CONSOLE +static void pstore_console_write(struct console *con, const char *s, unsigned c) +{ + const char *e = s + c; + + while (s < e) { + unsigned long flags; + + if (c > psinfo->bufsize) + c = psinfo->bufsize; + spin_lock_irqsave(&psinfo->buf_lock, flags); + memcpy(psinfo->buf, s, c); + psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); + s += c; + c = e - s; + } +} + +static struct console pstore_console = { + .name = "pstore", + .write = pstore_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME, + .index = -1, +}; + +static void pstore_register_console(void) +{ + register_console(&pstore_console); +} +#else +static void pstore_register_console(void) {} +#endif + /* * platform specific persistent storage driver registers with * us here. If pstore is already mounted, call the platform @@ -193,6 +229,7 @@ int pstore_register(struct pstore_info *psi) pstore_get_records(0); kmsg_dump_register(&pstore_dumper); + pstore_register_console(); pstore_timer.expires = jiffies + PSTORE_INTERVAL; add_timer(&pstore_timer); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e1461e143be2..1bd014b8e432 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -29,6 +29,7 @@ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, PSTORE_TYPE_MCE = 1, + PSTORE_TYPE_CONSOLE = 2, PSTORE_TYPE_UNKNOWN = 255 }; -- cgit v1.2.3 From cac2eb7b580c95e3871a71276c99e2bd751a1624 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:20 -0700 Subject: pstore/ram: Give proper names to dump-related variables We're about to add support for other message types, so let's rename some variables to not be confused later. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 453030f9c5bc..9b274b98bf3b 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -68,9 +68,9 @@ struct ramoops_context { size_t record_size; int dump_oops; bool ecc; - unsigned int count; - unsigned int max_count; - unsigned int read_count; + unsigned int max_dump_cnt; + unsigned int dump_write_cnt; + unsigned int dump_read_cnt; struct pstore_info pstore; }; @@ -81,7 +81,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; - cxt->read_count = 0; + cxt->dump_read_cnt = 0; return 0; } @@ -94,10 +94,10 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; - if (cxt->read_count >= cxt->max_count) + if (cxt->dump_read_cnt >= cxt->max_dump_cnt) return -EINVAL; - *id = cxt->read_count++; + *id = cxt->dump_read_cnt++; prz = cxt->przs[*id]; /* Only supports dmesg output so far. */ @@ -141,7 +141,7 @@ static int ramoops_pstore_write(enum pstore_type_id type, size_t size, struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; - struct persistent_ram_zone *prz = cxt->przs[cxt->count]; + struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; size_t hlen; /* Currently ramoops is designed to only store dmesg dumps. */ @@ -172,7 +172,7 @@ static int ramoops_pstore_write(enum pstore_type_id type, size = prz->buffer_size - hlen; persistent_ram_write(prz, cxt->pstore.buf, size); - cxt->count = (cxt->count + 1) % cxt->max_count; + cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt; return 0; } @@ -182,7 +182,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, { struct ramoops_context *cxt = psi->data; - if (id >= cxt->max_count) + if (id >= cxt->max_dump_cnt) return -EINVAL; persistent_ram_free_old(cxt->przs[id]); @@ -213,7 +213,7 @@ static int __init ramoops_probe(struct platform_device *pdev) /* Only a single ramoops area allowed at a time, so fail extra * probes. */ - if (cxt->max_count) + if (cxt->max_dump_cnt) goto fail_out; if (!pdata->mem_size || !pdata->record_size) { @@ -239,22 +239,22 @@ static int __init ramoops_probe(struct platform_device *pdev) goto fail_out; } - cxt->max_count = pdata->mem_size / pdata->record_size; - cxt->count = 0; + cxt->max_dump_cnt = pdata->mem_size / pdata->record_size; + cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->dump_oops = pdata->dump_oops; cxt->ecc = pdata->ecc; - cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_count, GFP_KERNEL); + cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt, GFP_KERNEL); if (!cxt->przs) { err = -ENOMEM; dev_err(dev, "failed to initialize a prz array\n"); goto fail_out; } - for (i = 0; i < cxt->max_count; i++) { + for (i = 0; i < cxt->max_dump_cnt; i++) { size_t sz = cxt->record_size; phys_addr_t start = cxt->phys_addr + sz * i; @@ -293,7 +293,7 @@ static int __init ramoops_probe(struct platform_device *pdev) pr_info("attached 0x%lx@0x%llx (%ux0x%zx), ecc: %s\n", cxt->size, (unsigned long long)cxt->phys_addr, - cxt->max_count, cxt->record_size, + cxt->max_dump_cnt, cxt->record_size, ramoops_ecc ? "on" : "off"); return 0; @@ -302,7 +302,7 @@ fail_buf: kfree(cxt->pstore.buf); fail_clear: cxt->pstore.bufsize = 0; - cxt->max_count = 0; + cxt->max_dump_cnt = 0; fail_przs: for (i = 0; cxt->przs[i]; i++) persistent_ram_free(cxt->przs[i]); @@ -321,7 +321,7 @@ static int __exit ramoops_remove(struct platform_device *pdev) iounmap(cxt->virt_addr); release_mem_region(cxt->phys_addr, cxt->size); - cxt->max_count = 0; + cxt->max_dump_cnt = 0; /* TODO(kees): When pstore supports unregistering, call it here. */ kfree(cxt->pstore.buf); -- cgit v1.2.3 From f4c5d2423c64266ba0daa9cc803d1d5ba469fe36 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:21 -0700 Subject: pstore/ram: Factor dmesg przs initialization out of probe() This will help make code clearer when we'll add support for other message types. This also makes probe() much shorter and understandable, plus makes mem/record size checking a bit easier. Implementation detail: we now use a paddr pointer, this will be used for allocating persistent ram zones for other message types. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 99 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 9b274b98bf3b..6b7676738493 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -202,13 +202,65 @@ static struct ramoops_context oops_cxt = { }, }; +static void ramoops_free_przs(struct ramoops_context *cxt) +{ + int i; + + if (!cxt->przs) + return; + + for (i = 0; cxt->przs[i]; i++) + persistent_ram_free(cxt->przs[i]); + kfree(cxt->przs); +} + +static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, + phys_addr_t *paddr, size_t dump_mem_sz) +{ + int err = -ENOMEM; + int i; + + if (!cxt->record_size) + return 0; + + cxt->max_dump_cnt = dump_mem_sz / cxt->record_size; + if (!cxt->max_dump_cnt) + return -ENOMEM; + + cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt, + GFP_KERNEL); + if (!cxt->przs) { + dev_err(dev, "failed to initialize a prz array for dumps\n"); + return -ENOMEM; + } + + for (i = 0; i < cxt->max_dump_cnt; i++) { + size_t sz = cxt->record_size; + + cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc); + if (IS_ERR(cxt->przs[i])) { + err = PTR_ERR(cxt->przs[i]); + dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", + sz, (unsigned long long)*paddr, err); + goto fail_prz; + } + *paddr += sz; + } + + return 0; +fail_prz: + ramoops_free_przs(cxt); + return err; +} + static int __init ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = pdev->dev.platform_data; struct ramoops_context *cxt = &oops_cxt; + size_t dump_mem_sz; + phys_addr_t paddr; int err = -EINVAL; - int i; /* Only a single ramoops area allowed at a time, so fail extra * probes. @@ -225,21 +277,6 @@ static int __init ramoops_probe(struct platform_device *pdev) pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); pdata->record_size = rounddown_pow_of_two(pdata->record_size); - /* Check for the minimum memory size */ - if (pdata->mem_size < MIN_MEM_SIZE && - pdata->record_size < MIN_MEM_SIZE) { - pr_err("memory size too small, minimum is %lu\n", - MIN_MEM_SIZE); - goto fail_out; - } - - if (pdata->mem_size < pdata->record_size) { - pr_err("The memory size must be larger than the " - "records size\n"); - goto fail_out; - } - - cxt->max_dump_cnt = pdata->mem_size / pdata->record_size; cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; @@ -247,24 +284,14 @@ static int __init ramoops_probe(struct platform_device *pdev) cxt->dump_oops = pdata->dump_oops; cxt->ecc = pdata->ecc; - cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt, GFP_KERNEL); - if (!cxt->przs) { - err = -ENOMEM; - dev_err(dev, "failed to initialize a prz array\n"); - goto fail_out; - } - - for (i = 0; i < cxt->max_dump_cnt; i++) { - size_t sz = cxt->record_size; - phys_addr_t start = cxt->phys_addr + sz * i; + paddr = cxt->phys_addr; - cxt->przs[i] = persistent_ram_new(start, sz, cxt->ecc); - if (IS_ERR(cxt->przs[i])) { - err = PTR_ERR(cxt->przs[i]); - dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", - sz, (unsigned long long)start, err); - goto fail_przs; - } + dump_mem_sz = cxt->size; + err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); + if (err) { + pr_err("memory size too small, minimum is %lu\n", + cxt->record_size); + goto fail_count; } cxt->pstore.data = cxt; @@ -303,10 +330,8 @@ fail_buf: fail_clear: cxt->pstore.bufsize = 0; cxt->max_dump_cnt = 0; -fail_przs: - for (i = 0; cxt->przs[i]; i++) - persistent_ram_free(cxt->przs[i]); - kfree(cxt->przs); +fail_count: + ramoops_free_przs(cxt); fail_out: return err; } -- cgit v1.2.3 From 755d66b48fe5a1f2a07802fcc8704e8b9e775e7d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:22 -0700 Subject: pstore/ram: Factor ramoops_get_next_prz() out of ramoops_pstore_read() This will help make code clearer when we'll add support for other message types. The patch also changes return value from -EINVAL to 0 in case of end-of-records. The exact value doesn't matter for pstore (it should be just <= 0), but 0 feels more correct. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6b7676738493..d770d7266e96 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -85,6 +85,33 @@ static int ramoops_pstore_open(struct pstore_info *psi) return 0; } +static struct persistent_ram_zone * +ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, + u64 *id, + enum pstore_type_id *typep, enum pstore_type_id type, + bool update) +{ + struct persistent_ram_zone *prz; + int i = (*c)++; + + if (i >= max) + return NULL; + + prz = przs[i]; + + if (update) { + /* Update old/shadowed buffer. */ + persistent_ram_save_old(prz); + if (!persistent_ram_old_size(prz)) + return NULL; + } + + *typep = type; + *id = i; + + return prz; +} + static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, struct timespec *time, char **buf, @@ -94,20 +121,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; - if (cxt->dump_read_cnt >= cxt->max_dump_cnt) - return -EINVAL; - - *id = cxt->dump_read_cnt++; - prz = cxt->przs[*id]; + prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, + cxt->max_dump_cnt, id, type, + PSTORE_TYPE_DMESG, 1); + if (!prz) + return 0; - /* Only supports dmesg output so far. */ - *type = PSTORE_TYPE_DMESG; /* TODO(kees): Bogus time for the moment. */ time->tv_sec = 0; time->tv_nsec = 0; - /* Update old/shadowed buffer. */ - persistent_ram_save_old(prz); size = persistent_ram_old_size(prz); *buf = kmalloc(size, GFP_KERNEL); if (*buf == NULL) -- cgit v1.2.3 From b5d38e9bf1b0c4db19e336b59b38dfb5d28bf1bf Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:23 -0700 Subject: pstore/ram: Add console messages handling The console log size is configurable via ramoops.console_size module option, and the log itself is available via /console-ramoops file. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 100 ++++++++++++++++++++++++++++++++++++++------- include/linux/pstore_ram.h | 1 + 2 files changed, 87 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index d770d7266e96..c7acf94ff475 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -41,6 +41,10 @@ module_param(record_size, ulong, 0400); MODULE_PARM_DESC(record_size, "size of each dump done on oops/panic"); +static ulong ramoops_console_size = MIN_MEM_SIZE; +module_param_named(console_size, ramoops_console_size, ulong, 0400); +MODULE_PARM_DESC(console_size, "size of kernel console log"); + static ulong mem_address; module_param(mem_address, ulong, 0400); MODULE_PARM_DESC(mem_address, @@ -63,14 +67,17 @@ MODULE_PARM_DESC(ramoops_ecc, struct ramoops_context { struct persistent_ram_zone **przs; + struct persistent_ram_zone *cprz; phys_addr_t phys_addr; unsigned long size; size_t record_size; + size_t console_size; int dump_oops; bool ecc; unsigned int max_dump_cnt; unsigned int dump_write_cnt; unsigned int dump_read_cnt; + unsigned int console_read_cnt; struct pstore_info pstore; }; @@ -82,6 +89,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) struct ramoops_context *cxt = psi->data; cxt->dump_read_cnt = 0; + cxt->console_read_cnt = 0; return 0; } @@ -124,6 +132,9 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, PSTORE_TYPE_DMESG, 1); + if (!prz) + prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, + 1, id, type, PSTORE_TYPE_CONSOLE, 0); if (!prz) return 0; @@ -167,7 +178,13 @@ static int ramoops_pstore_write(enum pstore_type_id type, struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; size_t hlen; - /* Currently ramoops is designed to only store dmesg dumps. */ + if (type == PSTORE_TYPE_CONSOLE) { + if (!cxt->cprz) + return -ENOMEM; + persistent_ram_write(cxt->cprz, cxt->pstore.buf, size); + return 0; + } + if (type != PSTORE_TYPE_DMESG) return -EINVAL; @@ -204,12 +221,23 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; + struct persistent_ram_zone *prz; - if (id >= cxt->max_dump_cnt) + switch (type) { + case PSTORE_TYPE_DMESG: + if (id >= cxt->max_dump_cnt) + return -EINVAL; + prz = cxt->przs[id]; + break; + case PSTORE_TYPE_CONSOLE: + prz = cxt->cprz; + break; + default: return -EINVAL; + } - persistent_ram_free_old(cxt->przs[id]); - persistent_ram_zap(cxt->przs[id]); + persistent_ram_free_old(prz); + persistent_ram_zap(prz); return 0; } @@ -276,6 +304,32 @@ fail_prz: return err; } +static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone **prz, + phys_addr_t *paddr, size_t sz) +{ + if (!sz) + return 0; + + if (*paddr + sz > *paddr + cxt->size) + return -ENOMEM; + + *prz = persistent_ram_new(*paddr, sz, cxt->ecc); + if (IS_ERR(*prz)) { + int err = PTR_ERR(*prz); + + dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", + sz, (unsigned long long)*paddr, err); + return err; + } + + persistent_ram_zap(*prz); + + *paddr += sz; + + return 0; +} + static int __init ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -291,34 +345,50 @@ static int __init ramoops_probe(struct platform_device *pdev) if (cxt->max_dump_cnt) goto fail_out; - if (!pdata->mem_size || !pdata->record_size) { - pr_err("The memory size and the record size must be " + if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size)) { + pr_err("The memory size and the record/console size must be " "non-zero\n"); goto fail_out; } pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); pdata->record_size = rounddown_pow_of_two(pdata->record_size); + pdata->console_size = rounddown_pow_of_two(pdata->console_size); cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; + cxt->console_size = pdata->console_size; cxt->dump_oops = pdata->dump_oops; cxt->ecc = pdata->ecc; paddr = cxt->phys_addr; - dump_mem_sz = cxt->size; + dump_mem_sz = cxt->size - cxt->console_size; err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); - if (err) { + if (err) + goto fail_out; + + err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr, cxt->console_size); + if (err) + goto fail_init_cprz; + + if (!cxt->przs && !cxt->cprz) { pr_err("memory size too small, minimum is %lu\n", - cxt->record_size); - goto fail_count; + cxt->console_size + cxt->record_size); + goto fail_cnt; } cxt->pstore.data = cxt; - cxt->pstore.bufsize = cxt->przs[0]->buffer_size; + /* + * Console can handle any buffer size, so prefer dumps buffer + * size since usually it is smaller. + */ + if (cxt->przs) + cxt->pstore.bufsize = cxt->przs[0]->buffer_size; + else + cxt->pstore.bufsize = cxt->cprz->buffer_size; cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); spin_lock_init(&cxt->pstore.buf_lock); if (!cxt->pstore.buf) { @@ -341,9 +411,8 @@ static int __init ramoops_probe(struct platform_device *pdev) record_size = pdata->record_size; dump_oops = pdata->dump_oops; - pr_info("attached 0x%lx@0x%llx (%ux0x%zx), ecc: %s\n", + pr_info("attached 0x%lx@0x%llx, ecc: %s\n", cxt->size, (unsigned long long)cxt->phys_addr, - cxt->max_dump_cnt, cxt->record_size, ramoops_ecc ? "on" : "off"); return 0; @@ -353,7 +422,9 @@ fail_buf: fail_clear: cxt->pstore.bufsize = 0; cxt->max_dump_cnt = 0; -fail_count: +fail_cnt: + kfree(cxt->cprz); +fail_init_cprz: ramoops_free_przs(cxt); fail_out: return err; @@ -405,6 +476,7 @@ static int __init ramoops_init(void) dummy_data->mem_size = mem_size; dummy_data->mem_address = mem_address; dummy_data->record_size = record_size; + dummy_data->console_size = ramoops_console_size; dummy_data->dump_oops = dump_oops; dummy_data->ecc = ramoops_ecc; dummy = platform_create_bundle(&ramoops_driver, ramoops_probe, diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 3b823d49a85a..9385d41cb1c3 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -93,6 +93,7 @@ struct ramoops_platform_data { unsigned long mem_size; unsigned long mem_address; unsigned long record_size; + unsigned long console_size; int dump_oops; bool ecc; }; -- cgit v1.2.3 From 602b5be4f14cabd5b751c340919958549475ab62 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:24 -0700 Subject: pstore/ram_core: Silence some printks Since we use multiple regions, the messages are somewhat annoying. We do print total mapped memory already, so no need to print the information for each region in the library routines. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Acked-by: Colin Cross Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index c5fbdbbf81ac..78f6d4b2addb 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -409,14 +409,14 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool " size %zu, start %zu\n", buffer_size(prz), buffer_start(prz)); else { - pr_info("persistent_ram: found existing buffer," + pr_debug("persistent_ram: found existing buffer," " size %zu, start %zu\n", buffer_size(prz), buffer_start(prz)); persistent_ram_save_old(prz); return 0; } } else { - pr_info("persistent_ram: no valid data in buffer" + pr_debug("persistent_ram: no valid data in buffer" " (sig = 0x%08x)\n", prz->buffer->sig); } -- cgit v1.2.3 From b8587daa756141da776e3d4c3a5a315f5af78708 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:27 -0700 Subject: pstore/ram_core: Remove now unused code The code tried to maintain the global list of persistent ram zones, which isn't a great idea overall, plus since Android's ram_console is no longer there, we can remove some unused functions. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Acked-by: Colin Cross Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 77 ---------------------------------------------- include/linux/pstore_ram.h | 19 ------------ 2 files changed, 96 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 78f6d4b2addb..0fd81611525c 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -35,8 +35,6 @@ struct persistent_ram_buffer { #define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */ -static __initdata LIST_HEAD(persistent_ram_list); - static inline size_t buffer_size(struct persistent_ram_zone *prz) { return atomic_read(&prz->buffer->size); @@ -462,78 +460,3 @@ err: kfree(prz); return ERR_PTR(ret); } - -#ifndef MODULE -static int __init persistent_ram_buffer_init(const char *name, - struct persistent_ram_zone *prz) -{ - int i; - struct persistent_ram *ram; - struct persistent_ram_descriptor *desc; - phys_addr_t start; - - list_for_each_entry(ram, &persistent_ram_list, node) { - start = ram->start; - for (i = 0; i < ram->num_descs; i++) { - desc = &ram->descs[i]; - if (!strcmp(desc->name, name)) - return persistent_ram_buffer_map(start, - desc->size, prz); - start += desc->size; - } - } - - return -EINVAL; -} - -static __init -struct persistent_ram_zone *__persistent_ram_init(struct device *dev, bool ecc) -{ - struct persistent_ram_zone *prz; - int ret = -ENOMEM; - - prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL); - if (!prz) { - pr_err("persistent_ram: failed to allocate persistent ram zone\n"); - goto err; - } - - ret = persistent_ram_buffer_init(dev_name(dev), prz); - if (ret) { - pr_err("persistent_ram: failed to initialize buffer\n"); - goto err; - } - - persistent_ram_post_init(prz, ecc); - - return prz; -err: - kfree(prz); - return ERR_PTR(ret); -} - -struct persistent_ram_zone * __init -persistent_ram_init_ringbuffer(struct device *dev, bool ecc) -{ - return __persistent_ram_init(dev, ecc); -} - -int __init persistent_ram_early_init(struct persistent_ram *ram) -{ - int ret; - - ret = memblock_reserve(ram->start, ram->size); - if (ret) { - pr_err("Failed to reserve persistent memory from %08lx-%08lx\n", - (long)ram->start, (long)(ram->start + ram->size - 1)); - return ret; - } - - list_add_tail(&ram->node, &persistent_ram_list); - - pr_info("Initialized persistent memory from %08lx-%08lx\n", - (long)ram->start, (long)(ram->start + ram->size - 1)); - - return 0; -} -#endif diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9385d41cb1c3..2470bb591434 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -25,21 +25,6 @@ struct persistent_ram_buffer; -struct persistent_ram_descriptor { - const char *name; - phys_addr_t size; -}; - -struct persistent_ram { - phys_addr_t start; - phys_addr_t size; - - int num_descs; - struct persistent_ram_descriptor *descs; - - struct list_head node; -}; - struct persistent_ram_zone { phys_addr_t paddr; size_t size; @@ -63,15 +48,11 @@ struct persistent_ram_zone { size_t old_log_size; }; -int persistent_ram_early_init(struct persistent_ram *ram); - struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, size_t size, bool ecc); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, - bool ecc); int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, unsigned int count); -- cgit v1.2.3 From a3f5f075c2e2c52c9c656c54ea77ceff5b2e1e25 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:28 -0700 Subject: pstore/platform: Make automatic updates interval configurable There is no behavioural change, the default value is still 60 seconds. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 61461ed9b6c8..34ca3141eb0a 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "internal.h" @@ -40,7 +41,10 @@ * whether the system is actually still running well enough * to let someone see the entry */ -#define PSTORE_INTERVAL (60 * HZ) +static int pstore_update_ms = 60000; +module_param_named(update_ms, pstore_update_ms, int, 0600); +MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content " + "(default is 60000; -1 means runtime updates are disabled)"); static int pstore_new_entry; @@ -231,8 +235,11 @@ int pstore_register(struct pstore_info *psi) kmsg_dump_register(&pstore_dumper); pstore_register_console(); - pstore_timer.expires = jiffies + PSTORE_INTERVAL; - add_timer(&pstore_timer); + if (pstore_update_ms >= 0) { + pstore_timer.expires = jiffies + + msecs_to_jiffies(pstore_update_ms); + add_timer(&pstore_timer); + } return 0; } @@ -291,7 +298,7 @@ static void pstore_timefunc(unsigned long dummy) schedule_work(&pstore_work); } - mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL); + mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } module_param(backend, charp, 0444); -- cgit v1.2.3 From 521f7288a8126a8ec28e3ab623aacf0590684b80 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:20:29 -0700 Subject: pstore/platform: Disable automatic updates by default Having automatic updates seems pointless for production system, and even dangerous and thus counter-productive: 1. If we can mount pstore, or read files, we can as well read /proc/kmsg. So, there's little point in duplicating the functionality and present the same information but via another userland ABI; 2. Expecting the kernel to behave sanely after oops/panic is naive. It might work, but you'd rather not try it. Screwed up kernel can do rather bad things, like recursive faults[1]; and pstore rather provoking bad things to happen. It uses: 1. Timers (assumes sane interrupts state); 2. Workqueues and mutexes (assumes scheduler in a sane state); 3. kzalloc (a working slab allocator); That's too much for a dead kernel, so the debugging facility itself might just make debugging harder, which is not what we want. Maybe for non-oops message types it would make sense to re-enable automatic updates, but so far I don't see any use case for this. Even for tracing, it has its own run-time/normal ABI, so we're only interested in pstore upon next boot, to retrieve what has gone wrong with HW or SW. So, let's disable the updates by default. [1] BUG: unable to handle kernel paging request at fffffffffffffff8 IP: [] kthread_data+0xb/0x20 [...] Process kworker/0:1 (pid: 14, threadinfo ffff8800072c0000, task ffff88000725b100) [... Call Trace: [] wq_worker_sleeping+0x10/0xa0 [] __schedule+0x568/0x7d0 [] ? trace_hardirqs_on+0xd/0x10 [] ? call_rcu_sched+0x12/0x20 [] ? release_task+0x156/0x2d0 [] ? release_task+0x1e/0x2d0 [] ? trace_hardirqs_on+0xd/0x10 [] schedule+0x24/0x70 [] do_exit+0x1f8/0x370 [] oops_end+0x77/0xb0 [] no_context+0x1a6/0x1b5 [] __bad_area_nosemaphore+0x1ce/0x1ed [] ? ttwu_queue+0xc6/0xe0 [] bad_area_nosemaphore+0xe/0x10 [] do_page_fault+0x2c7/0x450 [] ? __lock_release+0x6b/0xe0 [] ? mark_held_locks+0x61/0x140 [] ? __wake_up+0x4e/0x70 [] ? trace_hardirqs_off_thunk+0x3a/0x3c [] ? pstore_register+0x120/0x120 [] page_fault+0x1f/0x30 [] ? pstore_register+0x120/0x120 [] ? memcpy+0x68/0x110 [] ? pstore_get_records+0x3a/0x130 [] ? persistent_ram_copy_old+0x64/0x90 [] ramoops_pstore_read+0x84/0x130 [] pstore_get_records+0x79/0x130 [] ? process_one_work+0x116/0x450 [] ? pstore_register+0x120/0x120 [] pstore_dowork+0xe/0x10 [] process_one_work+0x174/0x450 [] ? process_one_work+0x116/0x450 [] worker_thread+0x123/0x2d0 [] ? manage_workers.isra.28+0x120/0x120 [] kthread+0x8e/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x70/0x70 [] ? gs_change+0xb/0xb Code: be e2 00 00 00 48 c7 c7 d1 2a 4e 81 e8 bf fb fd ff 48 8b 5d f0 4c 8b 65 f8 c9 c3 0f 1f 44 00 00 48 8b 87 08 02 00 00 55 48 89 e5 <48> 8b 40 f8 5d c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [] kthread_data+0xb/0x20 RSP CR2: fffffffffffffff8 ---[ end trace 996a332dc399111d ]--- Fixing recursive fault but reboot is needed! Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 34ca3141eb0a..be4614f24a2f 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -41,10 +41,12 @@ * whether the system is actually still running well enough * to let someone see the entry */ -static int pstore_update_ms = 60000; +static int pstore_update_ms = -1; module_param_named(update_ms, pstore_update_ms, int, 0600); MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content " - "(default is 60000; -1 means runtime updates are disabled)"); + "(default is -1, which means runtime updates are disabled; " + "enabling this option is not safe, it may lead to further " + "corruption on Oopses)"); static int pstore_new_entry; -- cgit v1.2.3 From 666d1d8ad201803862514317c17695925e61316b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Jun 2012 23:03:56 -0400 Subject: GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve This function combines rgrp functions get_local_rgrp and gfs2_inplace_reserve so that the double retry loop is gone. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 82 +++++++++++++++++++++------------------------------------- 1 file changed, 29 insertions(+), 53 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3c6f7ed16a3b..e53d0a1c234f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1207,25 +1207,30 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip } /** - * get_local_rgrp - Choose and lock a rgrp for allocation + * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @last_unlinked: the last unlinked block - * - * Try to acquire rgrp in way which avoids contending with others. + * @requested: the number of blocks to be reserved * * Returns: errno */ -static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; - int error, rg_locked, flags = LM_FLAG_TRY; + int error = 0, rg_locked, flags = LM_FLAG_TRY; + u64 last_unlinked = NO_BLOCK; int loops = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; + rs = ip->i_res; + rs->rs_requested = requested; + if (gfs2_assert_warn(sdp, requested)) { + error = -EINVAL; + goto out; + } if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) rgd = begin = ip->i_rgd; @@ -1263,63 +1268,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) gfs2_rgrp_bh_get(rgd); - try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); + try_rgrp_unlink(rgd, &last_unlinked, + ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); - if (rgd == begin) { - flags &= ~LM_FLAG_TRY; - loops++; - } + if (rgd != begin) /* If we didn't wrap */ + break; + + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && + !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + goto out; + } else if (loops == 2) + /* Flushing the log may release space */ + gfs2_log_flush(sdp, NULL); break; default: - return error; + goto out; } } - - return -ENOSPC; -} - -/** - * gfs2_inplace_reserve - Reserve space in the filesystem - * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved - * - * Returns: errno - */ - -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_blkreserv *rs; - int error = 0; - u64 last_unlinked = NO_BLOCK; - int tries = 0; - - rs = ip->i_res; - rs->rs_requested = requested; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } - - do { - error = get_local_rgrp(ip, &last_unlinked); - if (error != -ENOSPC) - break; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - break; - continue; - } - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - } while (tries++ < 3); + error = -ENOSPC; out: if (error) -- cgit v1.2.3 From 7d0fa3ecba2f12ceef93fffe615e5dd9b50bb794 Mon Sep 17 00:00:00 2001 From: Alain Renaud Date: Fri, 8 Jun 2012 15:34:46 -0400 Subject: xfs: xfs_vm_writepage clear iomap_valid when !buffer_uptodate (REV2) On filesytems with a block size smaller than PAGE_SIZE we currently have a problem with unwritten extents. If a we have multi-block page for which an unwritten extent has been allocated, and only some of the buffers have been written to, and they are not contiguous, we can expose stale data from disk in the blocks between the writes after extent conversion. Example of a page with unwritten and real data. buffer content 0 empty b_state = 0 1 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 2 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 3 empty b_state = 0 4 empty b_state = 0 5 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 6 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 7 empty b_state = 0 Buffers 1, 2, 5, and 6 have been written to, leaving 0, 3, 4, and 7 empty. Currently buffers 1, 2, 5, and 6 are added to a single ioend, and when IO has completed, extent conversion creates a real extent from block 1 through block 6, leaving 0 and 7 unwritten. However buffers 3 and 4 were not written to disk, so stale data is exposed from those blocks on a subsequent read. Fix this by setting iomap_valid = 0 when we find a buffer that is not Uptodate. This ensures that buffers 5 and 6 are not added to the same ioend as buffers 1 and 2. Later these blocks will be converted into two separate real extents, leaving the blocks in between unwritten. Signed-off-by: Alain Renaud Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index ae31c313a79e..8dad722c0041 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -981,10 +981,15 @@ xfs_vm_writepage( imap_valid = 0; } } else { - if (PageUptodate(page)) { + if (PageUptodate(page)) ASSERT(buffer_mapped(bh)); - imap_valid = 0; - } + /* + * This buffer is not uptodate and will not be + * written to disk. Ensure that we will put any + * subsequent writeable buffers into a new + * ioend. + */ + imap_valid = 0; continue; } -- cgit v1.2.3 From 0f2cf9d3d917b269645902506adaa4ff92b5e506 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Thu, 7 Jun 2012 15:44:32 +0800 Subject: xfs: fix debug_object WARN at xfs_alloc_vextent() Fengguang reports: [ 780.529603] XFS (vdd): Ending clean mount [ 781.454590] ODEBUG: object is on stack, but not annotated [ 781.455433] ------------[ cut here ]------------ [ 781.455433] WARNING: at /c/kernel-tests/sound/lib/debugobjects.c:301 __debug_object_init+0x173/0x1f1() [ 781.455433] Hardware name: Bochs [ 781.455433] Modules linked in: [ 781.455433] Pid: 26910, comm: kworker/0:2 Not tainted 3.4.0+ #51 [ 781.455433] Call Trace: [ 781.455433] [] warn_slowpath_common+0x83/0x9b [ 781.455433] [] warn_slowpath_null+0x1a/0x1c [ 781.455433] [] __debug_object_init+0x173/0x1f1 [ 781.455433] [] debug_object_init+0x14/0x16 [ 781.455433] [] __init_work+0x20/0x22 [ 781.455433] [] xfs_alloc_vextent+0x6c/0xd5 Use INIT_WORK_ONSTACK in xfs_alloc_vextent instead of INIT_WORK. Reported-by: Wu Fengguang Signed-off-by: Jie Liu Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 229641fb8e67..a996e398692b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2441,7 +2441,7 @@ xfs_alloc_vextent( DECLARE_COMPLETION_ONSTACK(done); args->done = &done; - INIT_WORK(&args->work, xfs_alloc_vextent_worker); + INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); queue_work(xfs_alloc_wq, &args->work); wait_for_completion(&done); return args->result; -- cgit v1.2.3 From d2c2819117176e139dc761873c664aaa770c79c9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jun 2012 15:44:53 +1000 Subject: xfs: m_maxioffset is redundant The m_maxioffset field in the struct xfs_mount contains the same value as the superblock s_maxbytes field. There is no need to carry two copies of this limit around, so use the VFS superblock version. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 12 ++++++------ fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_mount.c | 2 -- fs/xfs/xfs_mount.h | 3 +-- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8dad722c0041..84e372596d56 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -323,10 +323,10 @@ xfs_map_blocks( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); - ASSERT(offset <= mp->m_maxioffset); + ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_maxioffset) - count = mp->m_maxioffset - offset; + if (offset + count > mp->m_super->s_maxbytes) + count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, @@ -1162,9 +1162,9 @@ __xfs_get_blocks( lockmode = xfs_ilock_map_shared(ip); } - ASSERT(offset <= mp->m_maxioffset); - if (offset + size > mp->m_maxioffset) - size = mp->m_maxioffset - offset; + ASSERT(offset <= mp->m_super->s_maxbytes); + if (offset + size > mp->m_super->s_maxbytes) + size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index aadfce6681ee..4590cd1da432 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -416,8 +416,8 @@ retry: * Make sure preallocation does not create extents beyond the range we * actually support in this filesystem. */ - if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset)) - last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset); + if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)) + last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); ASSERT(last_fsb > offset_fsb); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 536021fb3d4e..9536fd190191 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1200,8 +1200,6 @@ xfs_mountfs( xfs_set_maxicount(mp); - mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); - error = xfs_uuid_mount(mp); if (error) goto out; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8b89c5ac72d9..47c6b3b3eb9c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -176,7 +176,6 @@ typedef struct xfs_mount { uint m_qflags; /* quota status flags */ xfs_trans_reservations_t m_reservations;/* precomputed res values */ __uint64_t m_maxicount; /* maximum inode count */ - __uint64_t m_maxioffset; /* maximum inode offset */ __uint64_t m_resblks; /* total reserved blocks */ __uint64_t m_resblks_avail;/* available reserved blocks */ __uint64_t m_resblks_save; /* reserved blks @ remount,ro */ @@ -297,7 +296,7 @@ xfs_preferred_iosize(xfs_mount_t *mp) PAGE_CACHE_SIZE)); } -#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) +#define XFS_MAXIOFFSET(mp) ((mp)->m_super->s_maxbytes) #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) -- cgit v1.2.3 From 32972383ca46223aa2b129826b3789721ec147aa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jun 2012 15:44:54 +1000 Subject: xfs: make largest supported offset less shouty XFS_MAXIOFFSET() is just a simple macro that resolves to mp->m_maxioffset. It doesn't need to exist, and it just makes the code unnecessarily loud and shouty. Make it quiet and easy to read. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap.c | 2 +- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_mount.h | 2 -- fs/xfs/xfs_qm.c | 2 +- fs/xfs/xfs_vnodeops.c | 10 +++++----- 7 files changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 58b815ec8c91..848ffa77707b 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5517,7 +5517,7 @@ xfs_getbmap( if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; - fixlen = XFS_MAXIOFFSET(mp); + fixlen = mp->m_super->s_maxbytes; } else { prealloced = 0; fixlen = XFS_ISIZE(ip); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9f7ec15a6522..59e22c989cd4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -273,7 +273,7 @@ xfs_file_aio_read( } } - n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; + n = mp->m_super->s_maxbytes - iocb->ki_pos; if (n <= 0 || size == 0) return 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a59eea09930a..257f3c463e0e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1226,7 +1226,7 @@ xfs_itruncate_extents( * then there is nothing to do. */ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (first_unmap_block == last_block) return 0; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 4590cd1da432..915edf6639f0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -285,7 +285,7 @@ xfs_iomap_eof_want_preallocate( * do any speculative allocation. */ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); - count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); while (count_fsb > 0) { imaps = nimaps; firstblock = NULLFSBLOCK; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 47c6b3b3eb9c..90a45305407d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -296,8 +296,6 @@ xfs_preferred_iosize(xfs_mount_t *mp) PAGE_CACHE_SIZE)); } -#define XFS_MAXIOFFSET(mp) ((mp)->m_super->s_maxbytes) - #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 249db1987764..2e86fa0cfc0d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -940,7 +940,7 @@ xfs_qm_dqiterate( map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); lblkno = 0; - maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); do { nmaps = XFS_DQITER_MAP_SIZE; /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b6a82d817a82..c22f4e0ecac1 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -174,7 +174,7 @@ xfs_free_eofblocks( * of the file. If not, then there is nothing to do. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); - last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return 0; map_len = last_fsb - end_fsb; @@ -2262,10 +2262,10 @@ xfs_change_file_space( llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; - if ( (bf->l_start < 0) - || (bf->l_start > XFS_MAXIOFFSET(mp)) - || (bf->l_start + llen < 0) - || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) + if (bf->l_start < 0 || + bf->l_start > mp->m_super->s_maxbytes || + bf->l_start + llen < 0 || + bf->l_start + llen > mp->m_super->s_maxbytes) return XFS_ERROR(EINVAL); bf->l_whence = 0; -- cgit v1.2.3 From 5276432997feb2366ac1e77949e94fe86a394813 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jun 2012 15:45:44 +1000 Subject: xfs: kill copy and paste segment checks in xfs_file_aio_read The generic segment check code now returns a count of the number of bytes in the iovec, so we don't need to roll our own anymore. Signed-off-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_file.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 59e22c989cd4..c4559c6e6f2c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -236,7 +236,6 @@ xfs_file_aio_read( ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; - unsigned long seg; XFS_STATS_INC(xs_read_calls); @@ -247,19 +246,9 @@ xfs_file_aio_read( if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - /* START copy & waste from filemap.c */ - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iovp[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - size += iv->iov_len; - if (unlikely((ssize_t)(size|iv->iov_len) < 0)) - return XFS_ERROR(-EINVAL); - } - /* END copy & waste from filemap.c */ + ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); + if (ret < 0) + return ret; if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = -- cgit v1.2.3 From 51c84223af604ce2d00d0416c30a38c50aed00bd Mon Sep 17 00:00:00 2001 From: Chen Baozi Date: Sat, 26 May 2012 00:48:47 +0800 Subject: xfs: fix typo in comment of xfs_dinode_t. There should be "XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR" instead of "XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR". Signed-off-by: Chen Baozi Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_dinode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index a3721633abc8..1d9643b3dce6 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -33,7 +33,7 @@ typedef struct xfs_timestamp { * variable size the leftover area split into a data and an attribute fork. * The format of the data and attribute fork depends on the format of the * inode as indicated by di_format and di_aformat. To access the data and - * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros + * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros * below. * * There is a very similar struct icdinode in xfs_inode which matches the -- cgit v1.2.3 From 2411967305dbfb8930b9b9c11f55f6c1ef7361e1 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 24 May 2012 15:42:17 -0400 Subject: nfsd: probe the back channel on new connections Initiate a CB probe when a new connection with the correct direction is added to a session (IFF backchannel is marked as down). Without this a BIND_CONN_TO_SESSION has no effect on the internal backchannel state, which causes the server to reply to every SEQUENCE op with the SEQ4_STATUS_CB_PATH_DOWN flag set until DESTROY_SESSION. Signed-off-by: Weston Andros Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 94effd5bc4a1..8b80a10d4fc5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -862,6 +862,11 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); + if (ses->se_client->cl_cb_state == NFSD4_CB_DOWN && + dir & NFS4_CDFC4_BACK) { + /* callback channel may be back up */ + nfsd4_probe_callback(ses->se_client); + } return nfs_ok; } -- cgit v1.2.3 From 7df302f75ee28a6a87436e93b625ef60d37d098e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 May 2012 13:56:37 -0400 Subject: NFSD: TEST_STATEID should not return NFS4ERR_STALE_STATEID According to RFC 5661, the TEST_STATEID operation is not allowed to return NFS4ERR_STALE_STATEID. In addition, RFC 5661 says: 15.1.16.5. NFS4ERR_STALE_STATEID (Error Code 10023) A stateid generated by an earlier server instance was used. This error is moot in NFSv4.1 because all operations that take a stateid MUST be preceded by the SEQUENCE operation, and the earlier server instance is detected by the session infrastructure that supports SEQUENCE. I triggered NFS4ERR_STALE_STATEID while testing the Linux client's NOGRACE recovery. Bruce suggested an additional test that could be useful to client developers. Lastly, RFC 5661, section 18.48.3 has this: o Special stateids are always considered invalid (they result in the error code NFS4ERR_BAD_STATEID). An explicit check is made for those state IDs to avoid printk noise. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 22 ++++++++++++++++------ fs/nfsd/state.h | 1 - 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8b80a10d4fc5..59b9efc9d69b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include "xdr4.h" @@ -3338,18 +3339,26 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s return nfserr_old_stateid; } -__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) +static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; struct nfs4_ol_stateid *ols; __be32 status; - if (STALE_STATEID(stateid)) - return nfserr_stale_stateid; - + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + /* Client debugging aid. */ + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, + sizeof(addr_str)); + pr_warn_ratelimited("NFSD: client %s testing state ID " + "with incorrect client ID\n", addr_str); + return nfserr_bad_stateid; + } s = find_stateid(cl, stateid); if (!s) - return nfserr_stale_stateid; + return nfserr_bad_stateid; status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) return status; @@ -3468,7 +3477,8 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) - stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid); + stateid->ts_id_status = + nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); nfs4_unlock_state(); return nfs_ok; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 849091e16ea6..495df4e3aa67 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -475,7 +475,6 @@ extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); extern int nfs4_client_to_reclaim(const char *name); extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void release_session_client(struct nfsd4_session *); -extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); /* nfs4recover operations */ -- cgit v1.2.3 From 9068bed1a35da413df8751b8b1b845a04f62b9fd Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Tue, 5 Jun 2012 16:29:06 -0400 Subject: nfsd4: remove unnecessary comment For the most part readers of cl_cb_state only need a value that is "eventually" right. And the value is set only either 1) in response to some change of state, in which case it's set to UNKNOWN and then a callback rpc is sent to probe the real state, or b) in the handling of a response to such a callback. UNKNOWN is therefore always a "temporary" state, and for the other states we're happy to accept last writer wins. So I think we're OK here. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a5fd6b982f27..cbaf4f8bb7b7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -756,7 +756,6 @@ static void do_probe_callback(struct nfs4_client *clp) */ void nfsd4_probe_callback(struct nfs4_client *clp) { - /* XXX: atomicity? Also, should we be using cl_flags? */ clp->cl_cb_state = NFSD4_CB_UNKNOWN; set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); do_probe_callback(clp); -- cgit v1.2.3 From e1aaa8916f19d23d39d88e985bc6933a74159e91 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Wed, 6 Jun 2012 16:01:37 -0400 Subject: nfsd4: nfsd4_lock() cleanup Share a little common logic. And note the comments here are a little out of date (e.g. we don't always create new state in the "new" case any more.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59b9efc9d69b..de8f7e45102d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4059,11 +4059,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); if (lock->lk_is_new) { - /* - * Client indicates that this is a new lockowner. - * Use open owner and open stateid to create lock owner and - * lock stateid. - */ struct nfs4_ol_stateid *open_stp = NULL; if (nfsd4_has_session(cstate)) @@ -4090,17 +4085,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new_state); - if (status) - goto out; - } else { - /* lock (lock owner + lock stateid) already exists */ + } else status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, NFS4_LOCK_STID, &lock_stp); - if (status) - goto out; - } + if (status) + goto out; lock_sop = lockowner(lock_stp->st_stateowner); lkflg = setlkflg(lock->lk_type); -- cgit v1.2.3 From 4af825041b06c2ef9b5933288267a11e029eb360 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Thu, 7 Jun 2012 17:30:45 -0400 Subject: nfsd4: process_open2 cleanup Note we can simplify the error handling a little by doing the truncate earlier. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index de8f7e45102d..9efa4055b5a8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3011,16 +3011,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } else { status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); + if (status) + goto out; + status = nfsd4_truncate(rqstp, current_fh, open); if (status) goto out; stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); - status = nfsd4_truncate(rqstp, current_fh, open); - if (status) { - release_open_stateid(stp); - goto out; - } } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); -- cgit v1.2.3 From 924d37118f9e18825294b2012a10c6245d6c25e1 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 18 Jun 2012 19:15:50 -0700 Subject: pstore/ram: Probe as early as possible Registering the platform driver before module_init allows us to log oopses that happen during device probing. This requires changing module_init to postcore_initcall, and switching from platform_driver_probe to platform_driver_register because the platform device is not registered when the platform driver is registered; and because we use driver_register, now can't use create_bundle() (since it will try to register the same driver once again), so we have to switch to platform_device_register_data(). Also, some __init -> __devinit changes were needed. Overall, the registration logic is now much clearer, since we have only one driver registration point, and just an optional dummy device, which is created from the module parameters. Suggested-by: Colin Cross Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 63 +++++++++++++++++++++++----------------------- fs/pstore/ram_core.c | 9 ++++--- include/linux/pstore_ram.h | 6 ++--- 3 files changed, 40 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index c7acf94ff475..0b36e91978e6 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -330,7 +330,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return 0; } -static int __init ramoops_probe(struct platform_device *pdev) +static int __devinit ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = pdev->dev.platform_data; @@ -452,6 +452,7 @@ static int __exit ramoops_remove(struct platform_device *pdev) } static struct platform_driver ramoops_driver = { + .probe = ramoops_probe, .remove = __exit_p(ramoops_remove), .driver = { .name = "ramoops", @@ -459,46 +460,46 @@ static struct platform_driver ramoops_driver = { }, }; -static int __init ramoops_init(void) +static void ramoops_register_dummy(void) { - int ret; - ret = platform_driver_probe(&ramoops_driver, ramoops_probe); - if (ret == -ENODEV) { - /* - * If we didn't find a platform device, we use module parameters - * building platform data on the fly. - */ - pr_info("platform device not found, using module parameters\n"); - dummy_data = kzalloc(sizeof(struct ramoops_platform_data), - GFP_KERNEL); - if (!dummy_data) - return -ENOMEM; - dummy_data->mem_size = mem_size; - dummy_data->mem_address = mem_address; - dummy_data->record_size = record_size; - dummy_data->console_size = ramoops_console_size; - dummy_data->dump_oops = dump_oops; - dummy_data->ecc = ramoops_ecc; - dummy = platform_create_bundle(&ramoops_driver, ramoops_probe, - NULL, 0, dummy_data, - sizeof(struct ramoops_platform_data)); - - if (IS_ERR(dummy)) - ret = PTR_ERR(dummy); - else - ret = 0; + if (!mem_size) + return; + + pr_info("using module parameters\n"); + + dummy_data = kzalloc(sizeof(*dummy_data), GFP_KERNEL); + if (!dummy_data) { + pr_info("could not allocate pdata\n"); + return; } - return ret; + dummy_data->mem_size = mem_size; + dummy_data->mem_address = mem_address; + dummy_data->record_size = record_size; + dummy_data->console_size = ramoops_console_size; + dummy_data->dump_oops = dump_oops; + dummy_data->ecc = ramoops_ecc; + + dummy = platform_device_register_data(NULL, "ramoops", -1, + dummy_data, sizeof(struct ramoops_platform_data)); + if (IS_ERR(dummy)) { + pr_info("could not create platform device: %ld\n", + PTR_ERR(dummy)); + } +} + +static int __init ramoops_init(void) +{ + ramoops_register_dummy(); + return platform_driver_register(&ramoops_driver); } +postcore_initcall(ramoops_init); static void __exit ramoops_exit(void) { platform_driver_unregister(&ramoops_driver); kfree(dummy_data); } - -module_init(ramoops_init); module_exit(ramoops_exit); MODULE_LICENSE("GPL"); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 0fd81611525c..26531856daf8 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -390,7 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return 0; } -static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool ecc) +static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, + bool ecc) { int ret; @@ -436,9 +437,9 @@ void persistent_ram_free(struct persistent_ram_zone *prz) kfree(prz); } -struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, - size_t size, - bool ecc) +struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, + size_t size, + bool ecc) { struct persistent_ram_zone *prz; int ret = -ENOMEM; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 2470bb591434..e681af92c04b 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -48,9 +48,9 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, - size_t size, - bool ecc); +struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, + size_t size, + bool ecc); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From 90b58d96907e0a45555429c0d3a79c85cea4b9fc Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 18 Jun 2012 19:15:51 -0700 Subject: pstore/ram: Fix error handling during przs allocation persistent_ram_new() returns ERR_PTR() value on errors, so during freeing of the przs we should check for both NULL and IS_ERR() entries, otherwise bad things will happen. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 0b36e91978e6..58b93fbd117e 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -260,7 +260,7 @@ static void ramoops_free_przs(struct ramoops_context *cxt) if (!cxt->przs) return; - for (i = 0; cxt->przs[i]; i++) + for (i = 0; !IS_ERR_OR_NULL(cxt->przs[i]); i++) persistent_ram_free(cxt->przs[i]); kfree(cxt->przs); } -- cgit v1.2.3 From beeb94321a7a6d493b4a06ff0cd771f09f41c35e Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 18 Jun 2012 19:15:52 -0700 Subject: pstore/ram_core: Proper checking for post_init errors (e.g. improper ECC size) We will implement variable-sized ECC buffers soon, so post_init routine might fail much more likely, so we'd better check for its errors. To make error handling simple, modify persistent_ram_free() to it be safe at all times. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 26531856daf8..f62ebf2dfed7 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -427,11 +427,17 @@ static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, void persistent_ram_free(struct persistent_ram_zone *prz) { - if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { - vunmap(prz->vaddr); - } else { - iounmap(prz->vaddr); - release_mem_region(prz->paddr, prz->size); + if (!prz) + return; + + if (prz->vaddr) { + if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { + vunmap(prz->vaddr); + } else { + iounmap(prz->vaddr); + release_mem_region(prz->paddr, prz->size); + } + prz->vaddr = NULL; } persistent_ram_free_old(prz); kfree(prz); @@ -454,10 +460,12 @@ struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, if (ret) goto err; - persistent_ram_post_init(prz, ecc); + ret = persistent_ram_post_init(prz, ecc); + if (ret) + goto err; return prz; err: - kfree(prz); + persistent_ram_free(prz); return ERR_PTR(ret); } -- cgit v1.2.3 From 1e6a9e56252399ae8c143f2327b4bb8cd289c3d5 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 18 Jun 2012 19:15:53 -0700 Subject: pstore/ram_core: Better ECC size checking - Instead of exploiting unsigned overflows (which doesn't work for all sizes), use straightforward checking for ECC total size not exceeding initial buffer size; - Printing overflowed buffer_size is not informative. Instead, print ecc_size and buffer_size; - No need for buffer_size argument in persistent_ram_init_ecc(), we can address prz->buffer_size directly. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index f62ebf2dfed7..a5a7b13d358c 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -171,12 +171,12 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) } } -static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, - size_t buffer_size) +static int persistent_ram_init_ecc(struct persistent_ram_zone *prz) { int numerr; struct persistent_ram_buffer *buffer = prz->buffer; int ecc_blocks; + size_t ecc_total; if (!prz->ecc) return 0; @@ -187,14 +187,14 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, prz->ecc_poly = 0x11d; ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size); - prz->buffer_size -= (ecc_blocks + 1) * prz->ecc_size; - - if (prz->buffer_size > buffer_size) { - pr_err("persistent_ram: invalid size %zu, non-ecc datasize %zu\n", - buffer_size, prz->buffer_size); + ecc_total = (ecc_blocks + 1) * prz->ecc_size; + if (ecc_total >= prz->buffer_size) { + pr_err("%s: invalid ecc_size %u (total %zu, buffer size %zu)\n", + __func__, prz->ecc_size, ecc_total, prz->buffer_size); return -EINVAL; } + prz->buffer_size -= ecc_total; prz->par_buffer = buffer->data + prz->buffer_size; prz->par_header = prz->par_buffer + ecc_blocks * prz->ecc_size; @@ -397,7 +397,7 @@ static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, prz->ecc = ecc; - ret = persistent_ram_init_ecc(prz, prz->buffer_size); + ret = persistent_ram_init_ecc(prz); if (ret) return ret; -- cgit v1.2.3 From 76e8f1386673b864cfca3c24c4d5814740e76465 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 11 Jun 2012 10:39:43 -0400 Subject: xfs: check for stale inode before acquiring iflock on push An inode in the AIL can be flush locked and marked stale if a cluster free transaction occurs at the right time. The inode item is then marked as flushing, which causes xfsaild to spin and leaves the filesystem stalled. This is reproduced by running xfstests 273 in a loop for an extended period of time. Check for stale inodes before the flush lock. This marks the inode as pinned, leads to a log flush and allows the filesystem to proceed. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode_item.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6cdbf90c6f7b..d041d47d9d86 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -504,6 +504,14 @@ xfs_inode_item_push( goto out_unlock; } + /* + * Stale inode items should force out the iclog. + */ + if (ip->i_flags & XFS_ISTALE) { + rval = XFS_ITEM_PINNED; + goto out_unlock; + } + /* * Someone else is already flushing the inode. Nothing we can do * here but wait for the flush to finish and remove the item from @@ -514,15 +522,6 @@ xfs_inode_item_push( goto out_unlock; } - /* - * Stale inode items should force out the iclog. - */ - if (ip->i_flags & XFS_ISTALE) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return XFS_ITEM_PINNED; - } - ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); -- cgit v1.2.3 From 079da28c64ebeca357adae77aea3ae7160e45d98 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Jun 2012 14:20:26 +1000 Subject: xfs: fix allocbt cursor leak in xfs_alloc_ag_vextent_near When we fail to find an matching extent near the requested extent specification during a left-right distance search in xfs_alloc_ag_vextent_near, we fail to free the original cursor that we used to look up the XFS_BTNUM_CNT tree and hence leak it. Reported-by: Chris J Arges Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a996e398692b..9d1aeb7e2734 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1080,6 +1080,7 @@ restart: goto restart; } + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; -- cgit v1.2.3 From bcf62ab64d1ba257dd9d4283a077a7219a05073a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Jun 2012 00:32:26 +0200 Subject: xfs: Fix overallocation in xfs_buf_allocate_memory() Commit de1cbee which removed b_file_offset in favor of b_bn introduced a bug causing xfs_buf_allocate_memory() to overestimate the number of necessary pages. The problem is that xfs_buf_alloc() sets b_bn to -1 and thus effectively every buffer is straddling a page boundary which causes xfs_buf_allocate_memory() to allocate two pages and use vmalloc() for access which is unnecessary. Dave says xfs_buf_alloc() doesn't need to set b_bn to -1 anymore since the buffer is inserted into the cache only after being fully initialized now. So just make xfs_buf_alloc() fill in proper block number from the beginning. CC: David Chinner Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 172d3cc8f8cb..a4beb421018a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -201,14 +201,7 @@ xfs_buf_alloc( bp->b_length = numblks; bp->b_io_length = numblks; bp->b_flags = flags; - - /* - * We do not set the block number here in the buffer because we have not - * finished initialising the buffer. We insert the buffer into the cache - * in this state, so this ensures that we are unable to do IO on a - * buffer that hasn't been fully initialised. - */ - bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_bn = blkno; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); @@ -567,11 +560,6 @@ xfs_buf_get( if (bp != new_bp) xfs_buf_free(new_bp); - /* - * Now we have a workable buffer, fill in the block number so - * that we can do IO on it. - */ - bp->b_bn = blkno; bp->b_io_length = bp->b_length; found: @@ -772,7 +760,7 @@ xfs_buf_get_uncached( int error, i; xfs_buf_t *bp; - bp = xfs_buf_alloc(target, 0, numblks, 0); + bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0); if (unlikely(bp == NULL)) goto fail; -- cgit v1.2.3 From 11159a0500c1eb7a8a2de37b7dceb53373c75350 Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Fri, 25 May 2012 15:45:36 -0500 Subject: xfs: shutdown xfs_sync_worker before the log Revert commit 1307bbd, which uses the s_umount semaphore to provide exclusion between xfs_sync_worker and unmount, in favor of shutting down the sync worker before freeing the log in xfs_log_unmount. This is a cleaner way of resolving the race between xfs_sync_worker and unmount than using s_umount. Signed-off-by: Ben Myers Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner --- fs/xfs/xfs_log.c | 1 + fs/xfs/xfs_sync.c | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f30d9807dc48..0e1a64f0439c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -810,6 +810,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) void xfs_log_unmount(xfs_mount_t *mp) { + cancel_delayed_work_sync(&mp->m_sync_work); xfs_trans_ail_destroy(mp); xlog_dealloc_log(mp->m_log); } diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index c9d3409c5ca3..1e9ee064dbb2 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -386,23 +386,23 @@ xfs_sync_worker( * We shouldn't write/force the log if we are in the mount/unmount * process or on a read only filesystem. The workqueue still needs to be * active in both cases, however, because it is used for inode reclaim - * during these times. Use the s_umount semaphore to provide exclusion - * with unmount. + * during these times. Use the MS_ACTIVE flag to avoid doing anything + * during mount. Doing work during unmount is avoided by calling + * cancel_delayed_work_sync on this work queue before tearing down + * the ail and the log in xfs_log_unmount. */ - if (down_read_trylock(&mp->m_super->s_umount)) { - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && - xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp); - else - xfs_log_force(mp, 0); - - /* start pushing all the metadata that is currently - * dirty */ - xfs_ail_push_all(mp->m_ail); - } - up_read(&mp->m_super->s_umount); + if (!(mp->m_super->s_flags & MS_ACTIVE) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + /* dgc: errors ignored here */ + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + + /* start pushing all the metadata that is currently + * dirty */ + xfs_ail_push_all(mp->m_ail); } /* queue us up again */ -- cgit v1.2.3 From ad223e6030be017470e46f153de27a43979759e0 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Thu, 14 Jun 2012 09:22:15 -0500 Subject: xfs: rename log structure to xlog Rename the XFS log structure to xlog to help crash distinquish it from the other logs in Linux. Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_log.c | 76 ++++++++++++++++++++++++++++-------------------- fs/xfs/xfs_log_cil.c | 22 +++++++------- fs/xfs/xfs_log_priv.h | 46 +++++++++++++++++++---------- fs/xfs/xfs_log_recover.c | 38 ++++++++++++------------ fs/xfs/xfs_mount.h | 4 +-- fs/xfs/xfs_trace.h | 18 ++++++------ 6 files changed, 116 insertions(+), 88 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0e1a64f0439c..d90d4a388609 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -38,13 +38,21 @@ kmem_zone_t *xfs_log_ticket_zone; /* Local miscellaneous function prototypes */ -STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket, - xlog_in_core_t **, xfs_lsn_t *); +STATIC int +xlog_commit_record( + struct xlog *log, + struct xlog_ticket *ticket, + struct xlog_in_core **iclog, + xfs_lsn_t *commitlsnp); + STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks); -STATIC int xlog_space_left(struct log *log, atomic64_t *head); +STATIC int +xlog_space_left( + struct xlog *log, + atomic64_t *head); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_dealloc_log(xlog_t *log); @@ -64,8 +72,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log, int eventual_size); STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); -STATIC void xlog_grant_push_ail(struct log *log, - int need_bytes); +STATIC void +xlog_grant_push_ail( + struct xlog *log, + int need_bytes); STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket); STATIC void xlog_ungrant_log_space(xlog_t *log, @@ -73,7 +83,9 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void xlog_verify_grant_tail(struct log *log); +STATIC void +xlog_verify_grant_tail( + struct xlog *log); STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, int count, boolean_t syncing); STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, @@ -89,9 +101,9 @@ STATIC int xlog_iclogs_empty(xlog_t *log); static void xlog_grant_sub_space( - struct log *log, - atomic64_t *head, - int bytes) + struct xlog *log, + atomic64_t *head, + int bytes) { int64_t head_val = atomic64_read(head); int64_t new, old; @@ -115,9 +127,9 @@ xlog_grant_sub_space( static void xlog_grant_add_space( - struct log *log, - atomic64_t *head, - int bytes) + struct xlog *log, + atomic64_t *head, + int bytes) { int64_t head_val = atomic64_read(head); int64_t new, old; @@ -165,7 +177,7 @@ xlog_grant_head_wake_all( static inline int xlog_ticket_reservation( - struct log *log, + struct xlog *log, struct xlog_grant_head *head, struct xlog_ticket *tic) { @@ -182,7 +194,7 @@ xlog_ticket_reservation( STATIC bool xlog_grant_head_wake( - struct log *log, + struct xlog *log, struct xlog_grant_head *head, int *free_bytes) { @@ -204,7 +216,7 @@ xlog_grant_head_wake( STATIC int xlog_grant_head_wait( - struct log *log, + struct xlog *log, struct xlog_grant_head *head, struct xlog_ticket *tic, int need_bytes) @@ -256,7 +268,7 @@ shutdown: */ STATIC int xlog_grant_head_check( - struct log *log, + struct xlog *log, struct xlog_grant_head *head, struct xlog_ticket *tic, int *need_bytes) @@ -323,7 +335,7 @@ xfs_log_regrant( struct xfs_mount *mp, struct xlog_ticket *tic) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; int need_bytes; int error = 0; @@ -389,7 +401,7 @@ xfs_log_reserve( bool permanent, uint t_type) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xlog_ticket *tic; int need_bytes; int error = 0; @@ -465,7 +477,7 @@ xfs_log_done( struct xlog_in_core **iclog, uint flags) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; xfs_lsn_t lsn = 0; if (XLOG_FORCED_SHUTDOWN(log) || @@ -839,7 +851,7 @@ void xfs_log_space_wake( struct xfs_mount *mp) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; int free_bytes; if (XLOG_FORCED_SHUTDOWN(log)) @@ -917,7 +929,7 @@ xfs_lsn_t xlog_assign_tail_lsn_locked( struct xfs_mount *mp) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xfs_log_item *lip; xfs_lsn_t tail_lsn; @@ -966,7 +978,7 @@ xlog_assign_tail_lsn( */ STATIC int xlog_space_left( - struct log *log, + struct xlog *log, atomic64_t *head) { int free_bytes; @@ -1278,7 +1290,7 @@ out: */ STATIC int xlog_commit_record( - struct log *log, + struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, xfs_lsn_t *commitlsnp) @@ -1312,7 +1324,7 @@ xlog_commit_record( */ STATIC void xlog_grant_push_ail( - struct log *log, + struct xlog *log, int need_bytes) { xfs_lsn_t threshold_lsn = 0; @@ -1791,7 +1803,7 @@ xlog_write_start_rec( static xlog_op_header_t * xlog_write_setup_ophdr( - struct log *log, + struct xlog *log, struct xlog_op_header *ophdr, struct xlog_ticket *ticket, uint flags) @@ -1874,7 +1886,7 @@ xlog_write_setup_copy( static int xlog_write_copy_finish( - struct log *log, + struct xlog *log, struct xlog_in_core *iclog, uint flags, int *record_cnt, @@ -1959,7 +1971,7 @@ xlog_write_copy_finish( */ int xlog_write( - struct log *log, + struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, xfs_lsn_t *start_lsn, @@ -2822,7 +2834,7 @@ _xfs_log_force( uint flags, int *log_flushed) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xlog_in_core *iclog; xfs_lsn_t lsn; @@ -2970,7 +2982,7 @@ _xfs_log_force_lsn( uint flags, int *log_flushed) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xlog_in_core *iclog; int already_slept = 0; @@ -3148,7 +3160,7 @@ xfs_log_ticket_get( */ xlog_ticket_t * xlog_ticket_alloc( - struct log *log, + struct xlog *log, int unit_bytes, int cnt, char client, @@ -3279,7 +3291,7 @@ xlog_ticket_alloc( */ void xlog_verify_dest_ptr( - struct log *log, + struct xlog *log, char *ptr) { int i; @@ -3308,7 +3320,7 @@ xlog_verify_dest_ptr( */ STATIC void xlog_verify_grant_tail( - struct log *log) + struct xlog *log) { int tail_cycle, tail_blocks; int cycle, space; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7d6197c58493..ddc4529d07d3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -44,7 +44,7 @@ */ static struct xlog_ticket * xlog_cil_ticket_alloc( - struct log *log) + struct xlog *log) { struct xlog_ticket *tic; @@ -72,7 +72,7 @@ xlog_cil_ticket_alloc( */ void xlog_cil_init_post_recovery( - struct log *log) + struct xlog *log) { log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); log->l_cilp->xc_ctx->sequence = 1; @@ -182,7 +182,7 @@ xlog_cil_prepare_log_vecs( */ STATIC void xfs_cil_prepare_item( - struct log *log, + struct xlog *log, struct xfs_log_vec *lv, int *len, int *diff_iovecs) @@ -231,7 +231,7 @@ xfs_cil_prepare_item( */ static void xlog_cil_insert_items( - struct log *log, + struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket) { @@ -373,7 +373,7 @@ xlog_cil_committed( */ STATIC int xlog_cil_push( - struct log *log) + struct xlog *log) { struct xfs_cil *cil = log->l_cilp; struct xfs_log_vec *lv; @@ -601,7 +601,7 @@ xlog_cil_push_work( */ static void xlog_cil_push_background( - struct log *log) + struct xlog *log) { struct xfs_cil *cil = log->l_cilp; @@ -629,7 +629,7 @@ xlog_cil_push_background( static void xlog_cil_push_foreground( - struct log *log, + struct xlog *log, xfs_lsn_t push_seq) { struct xfs_cil *cil = log->l_cilp; @@ -683,7 +683,7 @@ xfs_log_commit_cil( xfs_lsn_t *commit_lsn, int flags) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; int log_flags = 0; struct xfs_log_vec *log_vector; @@ -754,7 +754,7 @@ xfs_log_commit_cil( */ xfs_lsn_t xlog_cil_force_lsn( - struct log *log, + struct xlog *log, xfs_lsn_t sequence) { struct xfs_cil *cil = log->l_cilp; @@ -833,7 +833,7 @@ xfs_log_item_in_current_chkpt( */ int xlog_cil_init( - struct log *log) + struct xlog *log) { struct xfs_cil *cil; struct xfs_cil_ctx *ctx; @@ -869,7 +869,7 @@ xlog_cil_init( void xlog_cil_destroy( - struct log *log) + struct xlog *log) { if (log->l_cilp->xc_ctx) { if (log->l_cilp->xc_ctx->ticket) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 5bc33261f5be..72eba2201b14 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -19,7 +19,7 @@ #define __XFS_LOG_PRIV_H__ struct xfs_buf; -struct log; +struct xlog; struct xlog_ticket; struct xfs_mount; @@ -352,7 +352,7 @@ typedef struct xlog_in_core { struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; - struct log *ic_log; + struct xlog *ic_log; int ic_size; int ic_offset; int ic_bwritecnt; @@ -409,7 +409,7 @@ struct xfs_cil_ctx { * operations almost as efficient as the old logging methods. */ struct xfs_cil { - struct log *xc_log; + struct xlog *xc_log; struct list_head xc_cil; spinlock_t xc_cil_lock; struct xfs_cil_ctx *xc_ctx; @@ -487,7 +487,7 @@ struct xlog_grant_head { * overflow 31 bits worth of byte offset, so using a byte number will mean * that round off problems won't occur when releasing partial reservations. */ -typedef struct log { +typedef struct xlog { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ @@ -553,9 +553,14 @@ extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern kmem_zone_t *xfs_log_ticket_zone; -struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, - int count, char client, bool permanent, - xfs_km_flags_t alloc_flags); +struct xlog_ticket * +xlog_ticket_alloc( + struct xlog *log, + int unit_bytes, + int count, + char client, + bool permanent, + xfs_km_flags_t alloc_flags); static inline void @@ -567,9 +572,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) } void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); -int xlog_write(struct log *log, struct xfs_log_vec *log_vector, - struct xlog_ticket *tic, xfs_lsn_t *start_lsn, - xlog_in_core_t **commit_iclog, uint flags); +int +xlog_write( + struct xlog *log, + struct xfs_log_vec *log_vector, + struct xlog_ticket *tic, + xfs_lsn_t *start_lsn, + struct xlog_in_core **commit_iclog, + uint flags); /* * When we crack an atomic LSN, we sample it first so that the value will not @@ -629,17 +639,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space) /* * Committed Item List interfaces */ -int xlog_cil_init(struct log *log); -void xlog_cil_init_post_recovery(struct log *log); -void xlog_cil_destroy(struct log *log); +int +xlog_cil_init(struct xlog *log); +void +xlog_cil_init_post_recovery(struct xlog *log); +void +xlog_cil_destroy(struct xlog *log); /* * CIL force routines */ -xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); +xfs_lsn_t +xlog_cil_force_lsn( + struct xlog *log, + xfs_lsn_t sequence); static inline void -xlog_cil_force(struct log *log) +xlog_cil_force(struct xlog *log) { xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ca386909131a..a7be98abd6a9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1471,8 +1471,8 @@ xlog_recover_add_item( STATIC int xlog_recover_add_to_cont_trans( - struct log *log, - xlog_recover_t *trans, + struct xlog *log, + struct xlog_recover *trans, xfs_caddr_t dp, int len) { @@ -1517,8 +1517,8 @@ xlog_recover_add_to_cont_trans( */ STATIC int xlog_recover_add_to_trans( - struct log *log, - xlog_recover_t *trans, + struct xlog *log, + struct xlog_recover *trans, xfs_caddr_t dp, int len) { @@ -1588,8 +1588,8 @@ xlog_recover_add_to_trans( */ STATIC int xlog_recover_reorder_trans( - struct log *log, - xlog_recover_t *trans, + struct xlog *log, + struct xlog_recover *trans, int pass) { xlog_recover_item_t *item, *n; @@ -1642,8 +1642,8 @@ xlog_recover_reorder_trans( */ STATIC int xlog_recover_buffer_pass1( - struct log *log, - xlog_recover_item_t *item) + struct xlog *log, + struct xlog_recover_item *item) { xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; struct list_head *bucket; @@ -1696,7 +1696,7 @@ xlog_recover_buffer_pass1( */ STATIC int xlog_check_buffer_cancelled( - struct log *log, + struct xlog *log, xfs_daddr_t blkno, uint len, ushort flags) @@ -2689,9 +2689,9 @@ xlog_recover_free_trans( STATIC int xlog_recover_commit_pass1( - struct log *log, - struct xlog_recover *trans, - xlog_recover_item_t *item) + struct xlog *log, + struct xlog_recover *trans, + struct xlog_recover_item *item) { trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); @@ -2716,10 +2716,10 @@ xlog_recover_commit_pass1( STATIC int xlog_recover_commit_pass2( - struct log *log, - struct xlog_recover *trans, - struct list_head *buffer_list, - xlog_recover_item_t *item) + struct xlog *log, + struct xlog_recover *trans, + struct list_head *buffer_list, + struct xlog_recover_item *item) { trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); @@ -2753,7 +2753,7 @@ xlog_recover_commit_pass2( */ STATIC int xlog_recover_commit_trans( - struct log *log, + struct xlog *log, struct xlog_recover *trans, int pass) { @@ -2793,8 +2793,8 @@ out: STATIC int xlog_recover_unmount_trans( - struct log *log, - xlog_recover_t *trans) + struct xlog *log, + struct xlog_recover *trans) { /* Do nothing now */ xfs_warn(log->l_mp, "%s: Unmount LR", __func__); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 90a45305407d..8724336a9a08 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -53,7 +53,7 @@ typedef struct xfs_trans_reservations { #include "xfs_sync.h" -struct log; +struct xlog; struct xfs_mount_args; struct xfs_inode; struct xfs_bmbt_irec; @@ -133,7 +133,7 @@ typedef struct xfs_mount { uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ uint m_writeio_blocks; /* min write size blocks */ - struct log *m_log; /* log specific stuff */ + struct xlog *m_log; /* log specific stuff */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7cf9d3529e51..caf5dabfd553 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -32,7 +32,7 @@ struct xfs_da_node_entry; struct xfs_dquot; struct xfs_log_item; struct xlog_ticket; -struct log; +struct xlog; struct xlog_recover; struct xlog_recover_item; struct xfs_buf_log_format; @@ -762,7 +762,7 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_force); DEFINE_DQUOT_EVENT(xfs_dqflush_done); DECLARE_EVENT_CLASS(xfs_loggrant_class, - TP_PROTO(struct log *log, struct xlog_ticket *tic), + TP_PROTO(struct xlog *log, struct xlog_ticket *tic), TP_ARGS(log, tic), TP_STRUCT__entry( __field(dev_t, dev) @@ -830,7 +830,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, #define DEFINE_LOGGRANT_EVENT(name) \ DEFINE_EVENT(xfs_loggrant_class, name, \ - TP_PROTO(struct log *log, struct xlog_ticket *tic), \ + TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \ TP_ARGS(log, tic)) DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); @@ -1664,7 +1664,7 @@ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); DECLARE_EVENT_CLASS(xfs_log_recover_item_class, - TP_PROTO(struct log *log, struct xlog_recover *trans, + TP_PROTO(struct xlog *log, struct xlog_recover *trans, struct xlog_recover_item *item, int pass), TP_ARGS(log, trans, item, pass), TP_STRUCT__entry( @@ -1698,7 +1698,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class, #define DEFINE_LOG_RECOVER_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_item_class, name, \ - TP_PROTO(struct log *log, struct xlog_recover *trans, \ + TP_PROTO(struct xlog *log, struct xlog_recover *trans, \ struct xlog_recover_item *item, int pass), \ TP_ARGS(log, trans, item, pass)) @@ -1709,7 +1709,7 @@ DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), + TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), TP_ARGS(log, buf_f), TP_STRUCT__entry( __field(dev_t, dev) @@ -1739,7 +1739,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ + TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \ TP_ARGS(log, buf_f)) DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); @@ -1752,7 +1752,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), + TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), TP_ARGS(log, in_f), TP_STRUCT__entry( __field(dev_t, dev) @@ -1790,7 +1790,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, ) #define DEFINE_LOG_RECOVER_INO_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ + TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \ TP_ARGS(log, in_f)) DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); -- cgit v1.2.3 From 9a8d2fdbb47aaa1eaa136b89da5e5e6b60015c78 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Thu, 14 Jun 2012 09:22:16 -0500 Subject: xfs: remove xlog_t typedef Remove the xlog_t type definitions. Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_log.c | 223 +++++++++++++++++++++++++++++------------------ fs/xfs/xfs_log_priv.h | 18 ++-- fs/xfs/xfs_log_recover.c | 140 +++++++++++++++-------------- 3 files changed, 224 insertions(+), 157 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d90d4a388609..7f4f9370d0e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -45,51 +45,85 @@ xlog_commit_record( struct xlog_in_core **iclog, xfs_lsn_t *commitlsnp); -STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks); +STATIC struct xlog * +xlog_alloc_log( + struct xfs_mount *mp, + struct xfs_buftarg *log_target, + xfs_daddr_t blk_offset, + int num_bblks); STATIC int xlog_space_left( struct xlog *log, atomic64_t *head); -STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); -STATIC void xlog_dealloc_log(xlog_t *log); +STATIC int +xlog_sync( + struct xlog *log, + struct xlog_in_core *iclog); +STATIC void +xlog_dealloc_log( + struct xlog *log); /* local state machine functions */ STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); -STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog); -STATIC int xlog_state_get_iclog_space(xlog_t *log, - int len, - xlog_in_core_t **iclog, - xlog_ticket_t *ticket, - int *continued_write, - int *logoffsetp); -STATIC int xlog_state_release_iclog(xlog_t *log, - xlog_in_core_t *iclog); -STATIC void xlog_state_switch_iclogs(xlog_t *log, - xlog_in_core_t *iclog, - int eventual_size); -STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); +STATIC void +xlog_state_do_callback( + struct xlog *log, + int aborted, + struct xlog_in_core *iclog); +STATIC int +xlog_state_get_iclog_space( + struct xlog *log, + int len, + struct xlog_in_core **iclog, + struct xlog_ticket *ticket, + int *continued_write, + int *logoffsetp); +STATIC int +xlog_state_release_iclog( + struct xlog *log, + struct xlog_in_core *iclog); +STATIC void +xlog_state_switch_iclogs( + struct xlog *log, + struct xlog_in_core *iclog, + int eventual_size); +STATIC void +xlog_state_want_sync( + struct xlog *log, + struct xlog_in_core *iclog); STATIC void xlog_grant_push_ail( - struct xlog *log, - int need_bytes); -STATIC void xlog_regrant_reserve_log_space(xlog_t *log, - xlog_ticket_t *ticket); -STATIC void xlog_ungrant_log_space(xlog_t *log, - xlog_ticket_t *ticket); + struct xlog *log, + int need_bytes); +STATIC void +xlog_regrant_reserve_log_space( + struct xlog *log, + struct xlog_ticket *ticket); +STATIC void +xlog_ungrant_log_space( + struct xlog *log, + struct xlog_ticket *ticket); #if defined(DEBUG) -STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); +STATIC void +xlog_verify_dest_ptr( + struct xlog *log, + char *ptr); STATIC void xlog_verify_grant_tail( - struct xlog *log); -STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, - int count, boolean_t syncing); -STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, - xfs_lsn_t tail_lsn); + struct xlog *log); +STATIC void +xlog_verify_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + int count, + boolean_t syncing); +STATIC void +xlog_verify_tail_lsn( + struct xlog *log, + struct xlog_in_core *iclog, + xfs_lsn_t tail_lsn); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) @@ -97,7 +131,9 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #define xlog_verify_tail_lsn(a,b,c) #endif -STATIC int xlog_iclogs_empty(xlog_t *log); +STATIC int +xlog_iclogs_empty( + struct xlog *log); static void xlog_grant_sub_space( @@ -684,7 +720,7 @@ xfs_log_mount_finish(xfs_mount_t *mp) int xfs_log_unmount_write(xfs_mount_t *mp) { - xlog_t *log = mp->m_log; + struct xlog *log = mp->m_log; xlog_in_core_t *iclog; #ifdef DEBUG xlog_in_core_t *first_iclog; @@ -893,7 +929,7 @@ int xfs_log_need_covered(xfs_mount_t *mp) { int needed = 0; - xlog_t *log = mp->m_log; + struct xlog *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; @@ -1024,9 +1060,9 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog = bp->b_fspriv; - xlog_t *l = iclog->ic_log; - int aborted = 0; + struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog *l = iclog->ic_log; + int aborted = 0; /* * Race to shutdown the filesystem if we see an error. @@ -1067,8 +1103,9 @@ xlog_iodone(xfs_buf_t *bp) */ STATIC void -xlog_get_iclog_buffer_size(xfs_mount_t *mp, - xlog_t *log) +xlog_get_iclog_buffer_size( + struct xfs_mount *mp, + struct xlog *log) { int size; int xhdrs; @@ -1129,13 +1166,14 @@ done: * Its primary purpose is to fill in enough, so recovery can occur. However, * some other stuff may be filled in too. */ -STATIC xlog_t * -xlog_alloc_log(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks) +STATIC struct xlog * +xlog_alloc_log( + struct xfs_mount *mp, + struct xfs_buftarg *log_target, + xfs_daddr_t blk_offset, + int num_bblks) { - xlog_t *log; + struct xlog *log; xlog_rec_header_t *head; xlog_in_core_t **iclogp; xlog_in_core_t *iclog, *prev_iclog=NULL; @@ -1144,7 +1182,7 @@ xlog_alloc_log(xfs_mount_t *mp, int error = ENOMEM; uint log2_size = 0; - log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); if (!log) { xfs_warn(mp, "Log allocation failed: No memory!"); goto out; @@ -1434,8 +1472,9 @@ xlog_bdstrat( */ STATIC int -xlog_sync(xlog_t *log, - xlog_in_core_t *iclog) +xlog_sync( + struct xlog *log, + struct xlog_in_core *iclog) { xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; @@ -1584,7 +1623,8 @@ xlog_sync(xlog_t *log, * Deallocate a log structure */ STATIC void -xlog_dealloc_log(xlog_t *log) +xlog_dealloc_log( + struct xlog *log) { xlog_in_core_t *iclog, *next_iclog; int i; @@ -1616,10 +1656,11 @@ xlog_dealloc_log(xlog_t *log) */ /* ARGSUSED */ static inline void -xlog_state_finish_copy(xlog_t *log, - xlog_in_core_t *iclog, - int record_cnt, - int copy_bytes) +xlog_state_finish_copy( + struct xlog *log, + struct xlog_in_core *iclog, + int record_cnt, + int copy_bytes) { spin_lock(&log->l_icloglock); @@ -2142,7 +2183,8 @@ xlog_write( * State Change: DIRTY -> ACTIVE */ STATIC void -xlog_state_clean_log(xlog_t *log) +xlog_state_clean_log( + struct xlog *log) { xlog_in_core_t *iclog; int changed = 0; @@ -2222,7 +2264,7 @@ xlog_state_clean_log(xlog_t *log) STATIC xfs_lsn_t xlog_get_lowest_lsn( - xlog_t *log) + struct xlog *log) { xlog_in_core_t *lsn_log; xfs_lsn_t lowest_lsn, lsn; @@ -2245,9 +2287,9 @@ xlog_get_lowest_lsn( STATIC void xlog_state_do_callback( - xlog_t *log, - int aborted, - xlog_in_core_t *ciclog) + struct xlog *log, + int aborted, + struct xlog_in_core *ciclog) { xlog_in_core_t *iclog; xlog_in_core_t *first_iclog; /* used to know when we've @@ -2467,7 +2509,7 @@ xlog_state_done_syncing( xlog_in_core_t *iclog, int aborted) { - xlog_t *log = iclog->ic_log; + struct xlog *log = iclog->ic_log; spin_lock(&log->l_icloglock); @@ -2521,12 +2563,13 @@ xlog_state_done_syncing( * is copied. */ STATIC int -xlog_state_get_iclog_space(xlog_t *log, - int len, - xlog_in_core_t **iclogp, - xlog_ticket_t *ticket, - int *continued_write, - int *logoffsetp) +xlog_state_get_iclog_space( + struct xlog *log, + int len, + struct xlog_in_core **iclogp, + struct xlog_ticket *ticket, + int *continued_write, + int *logoffsetp) { int log_offset; xlog_rec_header_t *head; @@ -2631,8 +2674,9 @@ restart: * move grant reservation head forward. */ STATIC void -xlog_regrant_reserve_log_space(xlog_t *log, - xlog_ticket_t *ticket) +xlog_regrant_reserve_log_space( + struct xlog *log, + struct xlog_ticket *ticket) { trace_xfs_log_regrant_reserve_enter(log, ticket); @@ -2677,8 +2721,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, * in the current reservation field. */ STATIC void -xlog_ungrant_log_space(xlog_t *log, - xlog_ticket_t *ticket) +xlog_ungrant_log_space( + struct xlog *log, + struct xlog_ticket *ticket) { int bytes; @@ -2717,8 +2762,8 @@ xlog_ungrant_log_space(xlog_t *log, */ STATIC int xlog_state_release_iclog( - xlog_t *log, - xlog_in_core_t *iclog) + struct xlog *log, + struct xlog_in_core *iclog) { int sync = 0; /* do we sync? */ @@ -2768,9 +2813,10 @@ xlog_state_release_iclog( * that every data block. We have run out of space in this log record. */ STATIC void -xlog_state_switch_iclogs(xlog_t *log, - xlog_in_core_t *iclog, - int eventual_size) +xlog_state_switch_iclogs( + struct xlog *log, + struct xlog_in_core *iclog, + int eventual_size) { ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); if (!eventual_size) @@ -3114,7 +3160,9 @@ xfs_log_force_lsn( * disk. */ STATIC void -xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) +xlog_state_want_sync( + struct xlog *log, + struct xlog_in_core *iclog) { assert_spin_locked(&log->l_icloglock); @@ -3158,7 +3206,7 @@ xfs_log_ticket_get( /* * Allocate and initialise a new log ticket. */ -xlog_ticket_t * +struct xlog_ticket * xlog_ticket_alloc( struct xlog *log, int unit_bytes, @@ -3346,9 +3394,10 @@ xlog_verify_grant_tail( /* check if it will fit */ STATIC void -xlog_verify_tail_lsn(xlog_t *log, - xlog_in_core_t *iclog, - xfs_lsn_t tail_lsn) +xlog_verify_tail_lsn( + struct xlog *log, + struct xlog_in_core *iclog, + xfs_lsn_t tail_lsn) { int blocks; @@ -3385,10 +3434,11 @@ xlog_verify_tail_lsn(xlog_t *log, * the cycle numbers agree with the current cycle number. */ STATIC void -xlog_verify_iclog(xlog_t *log, - xlog_in_core_t *iclog, - int count, - boolean_t syncing) +xlog_verify_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + int count, + boolean_t syncing) { xlog_op_header_t *ophead; xlog_in_core_t *icptr; @@ -3482,7 +3532,7 @@ xlog_verify_iclog(xlog_t *log, */ STATIC int xlog_state_ioerror( - xlog_t *log) + struct xlog *log) { xlog_in_core_t *iclog, *ic; @@ -3527,7 +3577,7 @@ xfs_log_force_umount( struct xfs_mount *mp, int logerror) { - xlog_t *log; + struct xlog *log; int retval; log = mp->m_log; @@ -3634,7 +3684,8 @@ xfs_log_force_umount( } STATIC int -xlog_iclogs_empty(xlog_t *log) +xlog_iclogs_empty( + struct xlog *log) { xlog_in_core_t *iclog; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 72eba2201b14..18a801d76a42 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -487,7 +487,7 @@ struct xlog_grant_head { * overflow 31 bits worth of byte offset, so using a byte number will mean * that round off problems won't occur when releasing partial reservations. */ -typedef struct xlog { +struct xlog { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ @@ -540,7 +540,7 @@ typedef struct xlog { char *l_iclog_bak[XLOG_MAX_ICLOGS]; #endif -} xlog_t; +}; #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) @@ -548,9 +548,17 @@ typedef struct xlog { #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) /* common routines */ -extern int xlog_recover(xlog_t *log); -extern int xlog_recover_finish(xlog_t *log); -extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); +extern int +xlog_recover( + struct xlog *log); +extern int +xlog_recover_finish( + struct xlog *log); +extern void +xlog_pack_data( + struct xlog *log, + struct xlog_in_core *iclog, + int); extern kmem_zone_t *xfs_log_ticket_zone; struct xlog_ticket * diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a7be98abd6a9..a76ba886e738 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -43,10 +43,18 @@ #include "xfs_utils.h" #include "xfs_trace.h" -STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); -STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); +STATIC int +xlog_find_zeroed( + struct xlog *, + xfs_daddr_t *); +STATIC int +xlog_clear_stale_blocks( + struct xlog *, + xfs_lsn_t); #if defined(DEBUG) -STATIC void xlog_recover_check_summary(xlog_t *); +STATIC void +xlog_recover_check_summary( + struct xlog *); #else #define xlog_recover_check_summary(log) #endif @@ -74,7 +82,7 @@ struct xfs_buf_cancel { static inline int xlog_buf_bbcount_valid( - xlog_t *log, + struct xlog *log, int bbcount) { return bbcount > 0 && bbcount <= log->l_logBBsize; @@ -87,7 +95,7 @@ xlog_buf_bbcount_valid( */ STATIC xfs_buf_t * xlog_get_bp( - xlog_t *log, + struct xlog *log, int nbblks) { struct xfs_buf *bp; @@ -138,10 +146,10 @@ xlog_put_bp( */ STATIC xfs_caddr_t xlog_align( - xlog_t *log, + struct xlog *log, xfs_daddr_t blk_no, int nbblks, - xfs_buf_t *bp) + struct xfs_buf *bp) { xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); @@ -155,10 +163,10 @@ xlog_align( */ STATIC int xlog_bread_noalign( - xlog_t *log, + struct xlog *log, xfs_daddr_t blk_no, int nbblks, - xfs_buf_t *bp) + struct xfs_buf *bp) { int error; @@ -189,10 +197,10 @@ xlog_bread_noalign( STATIC int xlog_bread( - xlog_t *log, + struct xlog *log, xfs_daddr_t blk_no, int nbblks, - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_caddr_t *offset) { int error; @@ -211,10 +219,10 @@ xlog_bread( */ STATIC int xlog_bread_offset( - xlog_t *log, + struct xlog *log, xfs_daddr_t blk_no, /* block to read from */ int nbblks, /* blocks to read */ - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_caddr_t offset) { xfs_caddr_t orig_offset = bp->b_addr; @@ -241,10 +249,10 @@ xlog_bread_offset( */ STATIC int xlog_bwrite( - xlog_t *log, + struct xlog *log, xfs_daddr_t blk_no, int nbblks, - xfs_buf_t *bp) + struct xfs_buf *bp) { int error; @@ -378,8 +386,8 @@ xlog_recover_iodone( */ STATIC int xlog_find_cycle_start( - xlog_t *log, - xfs_buf_t *bp, + struct xlog *log, + struct xfs_buf *bp, xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle) @@ -421,7 +429,7 @@ xlog_find_cycle_start( */ STATIC int xlog_find_verify_cycle( - xlog_t *log, + struct xlog *log, xfs_daddr_t start_blk, int nbblks, uint stop_on_cycle_no, @@ -490,7 +498,7 @@ out: */ STATIC int xlog_find_verify_log_record( - xlog_t *log, + struct xlog *log, xfs_daddr_t start_blk, xfs_daddr_t *last_blk, int extra_bblks) @@ -600,7 +608,7 @@ out: */ STATIC int xlog_find_head( - xlog_t *log, + struct xlog *log, xfs_daddr_t *return_head_blk) { xfs_buf_t *bp; @@ -871,7 +879,7 @@ validate_head: */ STATIC int xlog_find_tail( - xlog_t *log, + struct xlog *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk) { @@ -1080,7 +1088,7 @@ done: */ STATIC int xlog_find_zeroed( - xlog_t *log, + struct xlog *log, xfs_daddr_t *blk_no) { xfs_buf_t *bp; @@ -1183,7 +1191,7 @@ bp_err: */ STATIC void xlog_add_record( - xlog_t *log, + struct xlog *log, xfs_caddr_t buf, int cycle, int block, @@ -1205,7 +1213,7 @@ xlog_add_record( STATIC int xlog_write_log_records( - xlog_t *log, + struct xlog *log, int cycle, int start_block, int blocks, @@ -1305,7 +1313,7 @@ xlog_write_log_records( */ STATIC int xlog_clear_stale_blocks( - xlog_t *log, + struct xlog *log, xfs_lsn_t tail_lsn) { int tail_cycle, head_cycle; @@ -2050,11 +2058,11 @@ xfs_qm_dqcheck( */ STATIC void xlog_recover_do_dquot_buffer( - xfs_mount_t *mp, - xlog_t *log, - xlog_recover_item_t *item, - xfs_buf_t *bp, - xfs_buf_log_format_t *buf_f) + struct xfs_mount *mp, + struct xlog *log, + struct xlog_recover_item *item, + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint type; @@ -2108,9 +2116,9 @@ xlog_recover_do_dquot_buffer( */ STATIC int xlog_recover_buffer_pass2( - xlog_t *log, - struct list_head *buffer_list, - xlog_recover_item_t *item) + struct xlog *log, + struct list_head *buffer_list, + struct xlog_recover_item *item) { xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; xfs_mount_t *mp = log->l_mp; @@ -2189,9 +2197,9 @@ xlog_recover_buffer_pass2( STATIC int xlog_recover_inode_pass2( - xlog_t *log, - struct list_head *buffer_list, - xlog_recover_item_t *item) + struct xlog *log, + struct list_head *buffer_list, + struct xlog_recover_item *item) { xfs_inode_log_format_t *in_f; xfs_mount_t *mp = log->l_mp; @@ -2452,14 +2460,14 @@ error: } /* - * Recover QUOTAOFF records. We simply make a note of it in the xlog_t + * Recover QUOTAOFF records. We simply make a note of it in the xlog * structure, so that we know not to do any dquot item or dquot buffer recovery, * of that type. */ STATIC int xlog_recover_quotaoff_pass1( - xlog_t *log, - xlog_recover_item_t *item) + struct xlog *log, + struct xlog_recover_item *item) { xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; ASSERT(qoff_f); @@ -2483,9 +2491,9 @@ xlog_recover_quotaoff_pass1( */ STATIC int xlog_recover_dquot_pass2( - xlog_t *log, - struct list_head *buffer_list, - xlog_recover_item_t *item) + struct xlog *log, + struct list_head *buffer_list, + struct xlog_recover_item *item) { xfs_mount_t *mp = log->l_mp; xfs_buf_t *bp; @@ -2578,9 +2586,9 @@ xlog_recover_dquot_pass2( */ STATIC int xlog_recover_efi_pass2( - xlog_t *log, - xlog_recover_item_t *item, - xfs_lsn_t lsn) + struct xlog *log, + struct xlog_recover_item *item, + xfs_lsn_t lsn) { int error; xfs_mount_t *mp = log->l_mp; @@ -2616,8 +2624,8 @@ xlog_recover_efi_pass2( */ STATIC int xlog_recover_efd_pass2( - xlog_t *log, - xlog_recover_item_t *item) + struct xlog *log, + struct xlog_recover_item *item) { xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; @@ -2812,9 +2820,9 @@ xlog_recover_unmount_trans( */ STATIC int xlog_recover_process_data( - xlog_t *log, + struct xlog *log, struct hlist_head rhash[], - xlog_rec_header_t *rhead, + struct xlog_rec_header *rhead, xfs_caddr_t dp, int pass) { @@ -2986,7 +2994,7 @@ abort_error: */ STATIC int xlog_recover_process_efis( - xlog_t *log) + struct xlog *log) { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; @@ -3147,7 +3155,7 @@ xlog_recover_process_one_iunlink( */ STATIC void xlog_recover_process_iunlinks( - xlog_t *log) + struct xlog *log) { xfs_mount_t *mp; xfs_agnumber_t agno; @@ -3209,9 +3217,9 @@ xlog_recover_process_iunlinks( #ifdef DEBUG STATIC void xlog_pack_data_checksum( - xlog_t *log, - xlog_in_core_t *iclog, - int size) + struct xlog *log, + struct xlog_in_core *iclog, + int size) { int i; __be32 *up; @@ -3234,8 +3242,8 @@ xlog_pack_data_checksum( */ void xlog_pack_data( - xlog_t *log, - xlog_in_core_t *iclog, + struct xlog *log, + struct xlog_in_core *iclog, int roundoff) { int i, j, k; @@ -3274,9 +3282,9 @@ xlog_pack_data( STATIC void xlog_unpack_data( - xlog_rec_header_t *rhead, + struct xlog_rec_header *rhead, xfs_caddr_t dp, - xlog_t *log) + struct xlog *log) { int i, j, k; @@ -3299,8 +3307,8 @@ xlog_unpack_data( STATIC int xlog_valid_rec_header( - xlog_t *log, - xlog_rec_header_t *rhead, + struct xlog *log, + struct xlog_rec_header *rhead, xfs_daddr_t blkno) { int hlen; @@ -3343,7 +3351,7 @@ xlog_valid_rec_header( */ STATIC int xlog_do_recovery_pass( - xlog_t *log, + struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int pass) @@ -3595,7 +3603,7 @@ xlog_do_recovery_pass( */ STATIC int xlog_do_log_recovery( - xlog_t *log, + struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { @@ -3646,7 +3654,7 @@ xlog_do_log_recovery( */ STATIC int xlog_do_recover( - xlog_t *log, + struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { @@ -3721,7 +3729,7 @@ xlog_do_recover( */ int xlog_recover( - xlog_t *log) + struct xlog *log) { xfs_daddr_t head_blk, tail_blk; int error; @@ -3767,7 +3775,7 @@ xlog_recover( */ int xlog_recover_finish( - xlog_t *log) + struct xlog *log) { /* * Now we're ready to do the transactions needed for the @@ -3814,7 +3822,7 @@ xlog_recover_finish( */ void xlog_recover_check_summary( - xlog_t *log) + struct xlog *log) { xfs_mount_t *mp; xfs_agf_t *agfp; -- cgit v1.2.3 From db3a3bcf08d5c79290b369d70cc0b80c369d8ad9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Jun 2012 15:50:14 -0400 Subject: NFSv2/v3: Remove incorrect dprintks from the readdir reply code The actual size of the directory is unknown to the client, so it is always requesting the maximum number it can handle. If the server is replying with fewer entries than was requested, then that will usually reflect the fact that we've hit the end of the directory. Flagging it as an error is therefore incorrect. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 10 ++-------- fs/nfs/nfs3xdr.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index baf759bccd05..db81166182c9 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -978,16 +978,10 @@ static int decode_readdirok(struct xdr_stream *xdr) pglen = xdr->buf->page_len; hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: + if (pglen > recvd) + pglen = recvd; xdr_read_pages(xdr, pglen); return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; } static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 902de489ec9b..3c61c7f80a4b 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2045,16 +2045,10 @@ static int decode_dirlist3(struct xdr_stream *xdr) pglen = xdr->buf->page_len; hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: + if (pglen > recvd) + pglen = recvd; xdr_read_pages(xdr, pglen); return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; } static int decode_readdir3resok(struct xdr_stream *xdr, -- cgit v1.2.3 From 44b8db13860a449b5d85afdc65da654ce56da678 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Mon, 18 Jun 2012 16:31:31 +0900 Subject: GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs This patch fixes buffer_head double free in following code path: gfs2_block_map => gfs2_meta_inode_buffer => gfs2_meta_indirect_buffer => gfs2_meta_read => release_metapath gfs2_block_map calls gfs2_meta_inode_buffer with &mp.mp_bh[0] as an argument. mp.mp_bh are filled with zero at the beginning of gfs2_block_map. If gfs2_meta_inode_buffer returns non-zero value, gfs2_block_map calls release_metapath to free buffers chained to mp.mp_bh. release_metapath checks each slot of mp.mp_bh[i] and free(with brelse) unless the slot is filled with NULL. &mp.mp_bh[0] passed to gfs2_meta_inode_buffer is filled at gfs2_meta_read. gfs2_meta_read is filled a buffer allocated with gfs2_getbuf even if EIO occurs. When EIO occurs, the allocated buffer is brelse'ed though the pointer(wrong poiner) points the brelse'ed is passed back to caller via an argument bhp. gfs2_meta_indirect_buffer, the caller also pass the wrong pointer to its caller with EIO. Finally gfs2_block_map gets both EIO and &mp.mp_bh[0] filled with the wrong pointer. release_metapath calls brelse again on the wrong pointer. Signed-off-by: Masatake YAMATO Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6c1e5d1c404a..3a56c8d94de0 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct gfs2_sbd *sdp = gl->gl_sbd; struct buffer_head *bh; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + *bhp = NULL; return -EIO; + } *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); @@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (tr && tr->tr_touched) gfs2_io_error_bh(sdp, bh); brelse(bh); + *bhp = NULL; return -EIO; } -- cgit v1.2.3 From 64bd577ea0021f5903505de061b3b7d8a785ee94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Jun 2012 22:35:05 -0400 Subject: NFS: Let xdr_read_pages() check for buffer overflows xdr_read_pages will already do all of the buffer overflow checks that are currently being open-coded in the various callers. This patch simplifies the existing code by replacing the open coded checks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 22 +++------------------- fs/nfs/nfs3xdr.c | 23 +++-------------------- fs/nfs/nfs4xdr.c | 39 ++++++--------------------------------- 3 files changed, 12 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index db81166182c9..d04f0df7be55 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -106,19 +106,16 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_overflow; count = be32_to_cpup(p); - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; out: - xdr_read_pages(xdr, count); result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */ result->count = count; return count; @@ -440,7 +437,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) static int decode_path(struct xdr_stream *xdr) { u32 length, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -449,12 +445,9 @@ static int decode_path(struct xdr_stream *xdr) length = be32_to_cpup(p); if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN)) goto out_size; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, length); if (unlikely(length > recvd)) goto out_cheating; - - xdr_read_pages(xdr, length); xdr_terminate_string(xdr->buf, length); return 0; out_size: @@ -972,16 +965,7 @@ out_overflow: */ static int decode_readdirok(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 3c61c7f80a4b..d64a00ff5a16 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -246,7 +246,6 @@ static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, static int decode_nfspath3(struct xdr_stream *xdr) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -255,12 +254,9 @@ static int decode_nfspath3(struct xdr_stream *xdr) count = be32_to_cpup(p); if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN)) goto out_nametoolong; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - - xdr_read_pages(xdr, count); xdr_terminate_string(xdr->buf, count); return 0; @@ -1587,7 +1583,6 @@ static int decode_read3resok(struct xdr_stream *xdr, struct nfs_readres *result) { u32 eof, count, ocount, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4 + 4 + 4); @@ -1598,13 +1593,10 @@ static int decode_read3resok(struct xdr_stream *xdr, ocount = be32_to_cpup(p++); if (unlikely(ocount != count)) goto out_mismatch; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - out: - xdr_read_pages(xdr, count); result->eof = eof; result->count = count; return count; @@ -2039,16 +2031,7 @@ out_truncated: */ static int decode_dirlist3(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readdir3resok(struct xdr_stream *xdr, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 18fae29b0301..2754f7268c1f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4920,9 +4920,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) { - struct kvec *iov = req->rq_rcv_buf.head; __be32 *p; - uint32_t count, eof, recvd, hdrlen; + uint32_t count, eof, recvd; int status; status = decode_op_hdr(xdr, OP_READ); @@ -4933,15 +4932,13 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ goto out_overflow; eof = be32_to_cpup(p++); count = be32_to_cpup(p); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (count > recvd) { dprintk("NFS: server cheating in read reply: " "count %u > recvd %u\n", count, recvd); count = recvd; eof = 0; } - xdr_read_pages(xdr, count); res->eof = eof; res->count = count; return 0; @@ -4952,10 +4949,6 @@ out_overflow: static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; - u32 recvd, pglen = rcvbuf->page_len; int status; __be32 verf[2]; @@ -4967,22 +4960,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", __func__, verf[0], verf[1]); - - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = rcvbuf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - - - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; u32 len, recvd; __be32 *p; int status; @@ -5000,14 +4983,12 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, len); if (recvd < len) { dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } - xdr_read_pages(xdr, len); /* * The XDR encode routine has set things up so that * the link text will be copied directly into the @@ -5066,7 +5047,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, __be32 *savep, *bm_p; uint32_t attrlen, bitmap[3] = {0}; - struct kvec *iov = req->rq_rcv_buf.head; int status; size_t page_len = xdr->buf->page_len; @@ -5089,7 +5069,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { - size_t hdrlen; /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of @@ -5098,7 +5077,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; attrlen += res->acl_data_offset; if (attrlen > page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { @@ -5707,9 +5685,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, __be32 *p; int status; u32 layout_count; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - u32 hdrlen, recvd; + u32 recvd; status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) @@ -5746,8 +5722,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, res->type, res->layoutp->len); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, res->layoutp->len); if (res->layoutp->len > recvd) { dprintk("NFS: server cheating in layoutget reply: " "layout len %u > recvd %u\n", @@ -5755,8 +5730,6 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, return -EINVAL; } - xdr_read_pages(xdr, res->layoutp->len); - if (layout_count > 1) { /* We only handle a length one array at the moment. Any * further entries are just ignored. Note that this means -- cgit v1.2.3 From 256e48bb473b631fbb5aa03d6ed38c652ad3caa7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jun 2012 11:18:13 -0400 Subject: NFSv4: Simplify the GETATTR attribute length calculation Use the xdr_stream position counter as the basis for the calculation instead of assuming that we can calculate an offset to the start of the iovec. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 2754f7268c1f..93f8bec9f4f3 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3078,7 +3078,7 @@ out_overflow: return -EIO; } -static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) +static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) { __be32 *p; @@ -3086,7 +3086,7 @@ static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, if (unlikely(!p)) goto out_overflow; *attrlen = be32_to_cpup(p); - *savep = xdr->p; + *savep = xdr_stream_pos(xdr); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4068,10 +4068,10 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } -static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen) +static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); - unsigned int nwords = xdr->p - savep; + unsigned int nwords = (xdr_stream_pos(xdr) - savep) >> 2; if (unlikely(attrwords != nwords)) { dprintk("%s: server returned incorrect attribute length: " @@ -4193,7 +4193,7 @@ out_overflow: static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4222,7 +4222,7 @@ xdr_error: static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4254,7 +4254,7 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4299,7 +4299,8 @@ out_overflow: static int decode_first_threshold_item4(struct xdr_stream *xdr, struct nfs4_threshold *res) { - __be32 *p, *savep; + __be32 *p; + unsigned int savep; uint32_t bitmap[3] = {0,}, attrlen; int status; @@ -4503,7 +4504,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4615,7 +4616,7 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3]; int status; @@ -5044,7 +5045,8 @@ decode_restorefh(struct xdr_stream *xdr) static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { - __be32 *savep, *bm_p; + unsigned int savep; + __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -7076,6 +7078,7 @@ out: int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { + unsigned int savep; uint32_t bitmap[3] = {0}; uint32_t len; __be32 *p = xdr_inline_decode(xdr, 4); @@ -7114,7 +7117,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (decode_attr_bitmap(xdr, bitmap) < 0) goto out_overflow; - if (decode_attr_length(xdr, &len, &p) < 0) + if (decode_attr_length(xdr, &len, &savep) < 0) goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, -- cgit v1.2.3 From 1aecca3e83e5da981ade916920d3d2a6b9644cc3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jun 2012 11:41:29 -0400 Subject: NFSv3: Don't open code stream position calculation in decode_getacl3resok Use the new xdr_stream_pos() helper instead. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d64a00ff5a16..5013bdd85ab9 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2341,7 +2341,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr, if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) goto out; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + hdrlen = xdr_stream_pos(xdr); acl = NULL; if (result->mask & NFS_ACL) -- cgit v1.2.3 From 8ed27d4fb1ce95e65f5a3b26b02d3b77135cc7a1 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 29 May 2012 15:57:59 -0400 Subject: NFS: add more context to state manager error mesgs Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f38300e9f171..76bbac367880 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1857,10 +1857,12 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; + const char *section = "", *section_sep = ""; /* Ensure exclusive access to NFSv4 state */ do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { + section = "purge state"; status = nfs4_reclaim_lease(clp); if (status < 0) goto out_error; @@ -1869,6 +1871,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + section = "lease expired"; /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status < 0) @@ -1888,6 +1891,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + section = "check lease"; status = nfs4_check_lease(clp); if (status < 0) goto out_error; @@ -1898,6 +1902,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Initialize or reset the session */ if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) && nfs4_has_session(clp)) { + section = "reset session"; status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) continue; @@ -1908,6 +1913,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Send BIND_CONN_TO_SESSION */ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state) && nfs4_has_session(clp)) { + section = "bind conn to session"; status = nfs4_bind_conn_to_session(clp); if (status < 0) goto out_error; @@ -1916,6 +1922,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1930,6 +1937,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1948,6 +1956,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Recall session slots */ if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) && nfs4_has_session(clp)) { + section = "recall slot"; status = nfs4_recall_slot(clp); if (status < 0) goto out_error; @@ -1964,8 +1973,11 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" - " with error %d\n", clp->cl_hostname, -status); + if (strlen(section)) + section_sep = ": "; + pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" + " with error %d\n", section_sep, section, + clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } -- cgit v1.2.3 From 1a2dd948e2b1e27476982bc7dd6961585823aec5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:40:47 -0400 Subject: NFSv4.1: Handle slot recalls before doing state recovery Handling a slot recall situation should always takes precedence over state recovery to allow the server to manage its resources. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 76bbac367880..da62f66a85ad 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1814,7 +1814,6 @@ static int nfs4_recall_slot(struct nfs_client *clp) spin_unlock(&fc_tbl->slot_tbl_lock); kfree(old); - nfs4_end_drain_session(clp); return 0; } @@ -1920,6 +1919,16 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; } + /* Recall session slots */ + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) + && nfs4_has_session(clp)) { + section = "recall slot"; + status = nfs4_recall_slot(clp); + if (status < 0) + goto out_error; + continue; + } + /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { section = "reclaim reboot"; @@ -1953,16 +1962,6 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs_client_return_marked_delegations(clp); continue; } - /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) - && nfs4_has_session(clp)) { - section = "recall slot"; - status = nfs4_recall_slot(clp); - if (status < 0) - goto out_error; - continue; - } - nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ -- cgit v1.2.3 From 60f00153d93e0bea872f1a9f5b01423247649083 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:51:00 -0400 Subject: NFSv4.1: Clean up nfs4_recall_slot() Move the test for nfs4_has_session out of the nfs4_state_manager() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da62f66a85ad..338a12acdc38 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1792,12 +1792,14 @@ out: static int nfs4_recall_slot(struct nfs_client *clp) { - struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table; - struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs; + struct nfs4_slot_table *fc_tbl; struct nfs4_slot *new, *old; int i; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); + fc_tbl = &clp->cl_session->fc_slot_table; new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), GFP_NOFS); if (!new) @@ -1810,7 +1812,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) fc_tbl->slots = new; fc_tbl->max_slots = fc_tbl->target_max_slots; fc_tbl->target_max_slots = 0; - fc_attrs->max_reqs = fc_tbl->max_slots; + clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); kfree(old); @@ -1920,8 +1922,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) - && nfs4_has_session(clp)) { + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) { section = "recall slot"; status = nfs4_recall_slot(clp); if (status < 0) -- cgit v1.2.3 From 1a47e7a6662f155c8118d64737086a72cf34edf1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:53:38 -0400 Subject: NFSv4.1: Cleanup - move nfs4_has_session tests out of state manager loop Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 338a12acdc38..d04e0a1c0234 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1764,6 +1764,8 @@ static int nfs4_reset_session(struct nfs_client *clp) struct rpc_cred *cred; int status; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); @@ -1824,6 +1826,8 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) struct rpc_cred *cred; int ret; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); @@ -1901,8 +1905,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Initialize or reset the session */ - if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) - && nfs4_has_session(clp)) { + if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { section = "reset session"; status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) @@ -1913,7 +1916,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Send BIND_CONN_TO_SESSION */ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, - &clp->cl_state) && nfs4_has_session(clp)) { + &clp->cl_state)) { section = "bind conn to session"; status = nfs4_bind_conn_to_session(clp); if (status < 0) -- cgit v1.2.3 From b42353ff8d346a2f6afac3e3983b7286ed4238d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 11:19:47 -0400 Subject: NFSv4.1: Clean up nfs4_reclaim_lease Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d04e0a1c0234..1cfc4603fd9a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1642,7 +1642,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return 0; } -static int nfs4_reclaim_lease(struct nfs_client *clp) +static int nfs4_establish_lease(struct nfs_client *clp) { struct rpc_cred *cred; const struct nfs4_state_recovery_ops *ops = @@ -1655,7 +1655,37 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) status = ops->establish_clid(clp, cred); put_rpccred(cred); if (status != 0) + return status; + pnfs_destroy_all_layouts(clp); + return 0; +} + +static int nfs4_reclaim_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) + return nfs4_handle_reclaim_lease_error(clp, status); + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + return 0; +} + +static int nfs4_purge_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) return nfs4_handle_reclaim_lease_error(clp, status); + clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); return 0; } @@ -1868,31 +1898,19 @@ static void nfs4_state_manager(struct nfs_client *clp) do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { section = "purge state"; - status = nfs4_reclaim_lease(clp); + status = nfs4_purge_lease(clp); if (status < 0) goto out_error; - clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + continue; } - if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { section = "lease expired"; /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status < 0) goto out_error; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - continue; - clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - - if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, - &clp->cl_state)) - nfs4_state_start_reclaim_nograce(clp); - else - set_bit(NFS4CLNT_RECLAIM_REBOOT, - &clp->cl_state); - - pnfs_destroy_all_layouts(clp); + continue; } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { -- cgit v1.2.3 From 140150dbb1f9cf3ef963fb55505f994d74ff3276 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 15:20:25 -0400 Subject: SUNRPC: Remove unused function xdr_encode_pages Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +--- include/linux/sunrpc/xdr.h | 2 -- net/sunrpc/xdr.c | 28 ---------------------------- 3 files changed, 1 insertion(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4664ed..5a7b3723cc6f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2766,9 +2766,7 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) * * In the case of WRITE, we also want to put the GETATTR after * the operation -- in this case because we want to make sure - * we get the post-operation mtime and size. This means that - * we can't use xdr_encode_pages() as written: we need a variant - * of it which would leave room in the 'tail' iovec. + * we get the post-operation mtime and size. * * Both of these changes to the XDR layer would in fact be quite * minor, but I decided to leave them for a subsequent patch. diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 647faf2289a7..63988990bd36 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -104,8 +104,6 @@ __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); -void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, - unsigned int); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(struct xdr_buf *, const u32); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2e3694eccd82..d65d380571bc 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -128,34 +128,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) } EXPORT_SYMBOL_GPL(xdr_terminate_string); -void -xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, - unsigned int len) -{ - struct kvec *tail = xdr->tail; - u32 *p; - - xdr->pages = pages; - xdr->page_base = base; - xdr->page_len = len; - - p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); - tail->iov_base = p; - tail->iov_len = 0; - - if (len & 3) { - unsigned int pad = 4 - (len & 3); - - *p = 0; - tail->iov_base = (char *)p + (len & 3); - tail->iov_len = pad; - len += pad; - } - xdr->buflen += len; - xdr->len += len; -} -EXPORT_SYMBOL_GPL(xdr_encode_pages); - void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) -- cgit v1.2.3 From 98d9452448122486f81030c6c70f29471f65e1ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 12:01:14 -0400 Subject: NFSv4: Decode getdevicelist should use nfs4_verifier The verifier returned by the GETDEVICELIST operation is not a write verifier, but a nfs4_verifier. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 93f8bec9f4f3..1e2c47b3889d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5579,7 +5579,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, { __be32 *p; int status, i; - struct nfs_writeverf verftemp; + nfs4_verifier verftemp; status = decode_op_hdr(xdr, OP_GETDEVICELIST); if (status) @@ -5593,7 +5593,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); + p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); res->num_devs = be32_to_cpup(p); -- cgit v1.2.3 From 2f2c63bc221c5fcded24de2704575d0abf96b910 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 11:56:09 -0400 Subject: NFS: Cleanup - only store the write verifier in struct nfs_page The 'committed' field is not needed once we have put the struct nfs_page on the right list. Also correct the type of the verifier: it is not an array of __be32, but simply an 8 byte long opaque array. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 12 +++++++----- fs/nfs/nfs4filelayout.c | 6 +++--- fs/nfs/nfs4xdr.c | 12 ++++++++---- fs/nfs/write.c | 4 ++-- include/linux/nfs_page.h | 2 +- include/linux/nfs_xdr.h | 6 +++++- 6 files changed, 26 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 5013bdd85ab9..6cbe89400dfc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -325,14 +325,14 @@ static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier) memcpy(p, verifier, NFS3_CREATEVERFSIZE); } -static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier) +static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) { __be32 *p; p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE); if (unlikely(p == NULL)) goto out_overflow; - memcpy(verifier, p, NFS3_WRITEVERFSIZE); + memcpy(verifier->data, p, NFS3_WRITEVERFSIZE); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -1668,20 +1668,22 @@ static int decode_write3resok(struct xdr_stream *xdr, { __be32 *p; - p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE); + p = xdr_inline_decode(xdr, 4 + 4); if (unlikely(p == NULL)) goto out_overflow; result->count = be32_to_cpup(p++); result->verf->committed = be32_to_cpup(p++); if (unlikely(result->verf->committed > NFS_FILE_SYNC)) goto out_badvalue; - memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE); + if (decode_writeverf3(xdr, &result->verf->verifier)) + goto out_eio; return result->count; out_badvalue: dprintk("NFS: bad stable_how value: %u\n", result->verf->committed); return -EIO; out_overflow: print_overflow_msg(__func__, xdr); +out_eio: return -EIO; } @@ -2314,7 +2316,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, goto out; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, result->verf->verifier); + error = decode_writeverf3(xdr, &result->verf->verifier); out: return error; out_status: diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e1340293872c..85b70639921b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -351,9 +351,9 @@ static void prepare_to_resend_writes(struct nfs_commit_data *data) struct nfs_page *first = nfs_list_entry(data->pages.next); data->task.tk_status = 0; - memcpy(data->verf.verifier, first->wb_verf.verifier, - sizeof(first->wb_verf.verifier)); - data->verf.verifier[0]++; /* ensure verifier mismatch */ + memcpy(&data->verf.verifier, &first->wb_verf, + sizeof(data->verf.verifier)); + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ } static int filelayout_commit_done_cb(struct rpc_task *task, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1e2c47b3889d..610ebccbde5d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4158,13 +4158,18 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier) return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } +static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) +{ + return decode_opaque_fixed(xdr, verifier->data, NFS4_VERIFIER_SIZE); +} + static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_verifier(xdr, res->verf->verifier); + status = decode_write_verifier(xdr, &res->verf->verifier); return status; } @@ -5192,13 +5197,12 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) if (status) return status; - p = xdr_inline_decode(xdr, 16); + p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); - return 0; + return decode_write_verifier(xdr, &res->verf->verifier); out_overflow: print_overflow_msg(__func__, xdr); return -EIO; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d6861c0dc14..ee929e5e1f7b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -620,7 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1547,7 +1547,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) { /* We have a match */ nfs_inode_remove_request(req); dprintk(" OK\n"); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 88d166b555e8..880805774f9f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -42,7 +42,7 @@ struct nfs_page { wb_bytes; /* Length of request */ struct kref wb_kref; /* reference count */ unsigned long wb_flags; - struct nfs_writeverf wb_verf; /* Commit cookie */ + struct nfs_write_verifier wb_verf; /* Commit cookie */ }; struct nfs_pageio_descriptor; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8aadd90b808a..5c0014d1c969 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -514,9 +514,13 @@ struct nfs_writeargs { struct nfs4_sequence_args seq_args; }; +struct nfs_write_verifier { + char data[8]; +}; + struct nfs_writeverf { + struct nfs_write_verifier verifier; enum nfs3_stable_how committed; - __be32 verifier[2]; }; struct nfs_writeres { -- cgit v1.2.3 From 05bf14adcac188f573e22f72734fd0e2fab71aec Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 6 Jun 2012 23:57:13 -0400 Subject: NFSv4.1: Use session max response size for GETDEVICEINFO gdia_maxcount We prepare for the largest possible GETDEVICEINFO response, which can not be greater than the negotiated session maximum response size. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index a1fab8da7f03..f81231f30d94 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -728,7 +728,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla pdev->layout_type = LAYOUT_NFSV4_1_FILES; pdev->pages = pages; pdev->pgbase = 0; - pdev->pglen = PAGE_SIZE * max_pages; + pdev->pglen = max_resp_sz; pdev->mincount = 0; rc = nfs4_proc_getdeviceinfo(server, pdev); -- cgit v1.2.3 From e3074507d93a0b7f1430dec7c6addb307d4f30da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 May 2012 15:30:41 -0400 Subject: NFS: Simplify NFSv4.1 Kconfig Convert the pNFS file layout to use the same system as the object and block layout. Remove unnecessary dependencies on NFS_FS Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f90f4f5cd421..404c6a8ac394 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -88,9 +88,8 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_FS && NFS_V4 && EXPERIMENTAL + depends on NFS_V4 && EXPERIMENTAL select SUNRPC_BACKCHANNEL - select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol (RFC 5661) in the kernel's NFS client. @@ -99,15 +98,17 @@ config NFS_V4_1 config PNFS_FILE_LAYOUT tristate + depends on NFS_V4_1 + default m config PNFS_BLOCK tristate - depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM + depends on NFS_V4_1 && BLK_DEV_DM default m config PNFS_OBJLAYOUT tristate - depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD + depends on NFS_V4_1 && SCSI_OSD_ULD default m config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN -- cgit v1.2.3 From 6e5b587d2f4271a1a4a47e3169db7157aefc31ed Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 31 May 2012 15:16:03 -0400 Subject: NFSv4.1 handle OPEN O_CREATE mdsthreshold Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5a7b3723cc6f..c84c93c4cd36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2825,6 +2825,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct dentry *de = dentry; struct nfs4_state *state; struct rpc_cred *cred = NULL; + struct nfs4_threshold **thp = NULL; fmode_t fmode = 0; int status = 0; @@ -2832,9 +2833,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, cred = ctx->cred; de = ctx->dentry; fmode = ctx->mode; + thp = &ctx->mdsthreshold; } sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, thp); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); -- cgit v1.2.3 From e38eb6506ff426a2bb93433fecfcc863a95fcd03 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:40 -0400 Subject: NFS: set_pnfs_layoutdriver() from nfs4_proc_fsinfo() The generic client doesn't need to know about pnfs layout drivers, so this should be done in the v4 code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/nfs4proc.c | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f005b5bebdc7..e646b14024c1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -975,7 +975,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c84c93c4cd36..e9a8ad2df7af 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3332,8 +3332,14 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { + int error; + nfs_fattr_init(fsinfo->fattr); - return nfs4_do_fsinfo(server, fhandle, fsinfo); + error = nfs4_do_fsinfo(server, fhandle, fsinfo); + if (error == 0) + set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + + return error; } static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, -- cgit v1.2.3 From eeebf91675421b730448489ebf4720e5c419beec Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:41 -0400 Subject: NFS: Use nfs4_destroy_server() to clean up NFS v4 I can use this function to return delegations and unset the pnfs layout driver rather than continuing to do these things in the generic client. With this change, we no longer need an nfs4_kill_super(). Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 ++- fs/nfs/delegation.c | 3 +-- fs/nfs/delegation.h | 2 +- fs/nfs/super.c | 23 +++++------------------ 4 files changed, 9 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e646b14024c1..bf0f896284a8 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -266,6 +266,8 @@ static void pnfs_init_server(struct nfs_server *server) static void nfs4_destroy_server(struct nfs_server *server) { + nfs_server_return_all_delegations(server); + unset_pnfs_layoutdriver(server); nfs4_purge_state_owners(server); } @@ -1137,7 +1139,6 @@ void nfs_free_server(struct nfs_server *server) dprintk("--> nfs_free_server()\n"); nfs_server_remove_lists(server); - unset_pnfs_layoutdriver(server); if (server->destroy != NULL) server->destroy(server); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bd3a9601d32d..9a7a1b488af9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -417,9 +417,8 @@ static void nfs_mark_return_delegation(struct nfs_server *server, * @sb: sb to process * */ -void nfs_super_return_all_delegations(struct super_block *sb) +void nfs_server_return_all_delegations(struct nfs_server *server) { - struct nfs_server *server = NFS_SB(sb); struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 72709c4193fa..206db5679996 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -38,7 +38,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); -void nfs_super_return_all_delegations(struct super_block *sb); +void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 906f09c7d842..5a1c860743c3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -347,13 +347,12 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs_fs_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -361,7 +360,7 @@ static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_remote_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -369,7 +368,7 @@ struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_xdev_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -377,7 +376,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_remote_referral_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -385,7 +384,7 @@ struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_referral_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -2874,18 +2873,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -static void nfs4_kill_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - dprintk("--> %s\n", __func__); - nfs_super_return_all_delegations(sb); - kill_anon_super(sb); - nfs_fscache_release_super_cookie(sb); - nfs_free_server(server); - dprintk("<-- %s\n", __func__); -} - /* * Clone an NFS4 server record on xdev traversal (FSID-change) */ -- cgit v1.2.3 From a5c58892b427a2752e3ec44b0aad4ce9221dc63b Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:42 -0400 Subject: NFS: Create a v4-specific fsync function v2 and v3 don't need to worry about doing a pnfs layoutcommit. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a6708e6b438d..8941ac41c59b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -265,7 +265,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * fall back to doing a synchronous write. */ static int -nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -277,9 +277,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) dentry->d_parent->d_name.name, dentry->d_name.name, datasync); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); @@ -290,10 +287,20 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = xchg(&ctx->error, 0); if (!ret && status < 0) ret = status; - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); + return ret; +} + +static int +nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); mutex_unlock(&inode->i_mutex); + return ret; } @@ -956,6 +963,23 @@ out_drop: goto out_put_ctx; } +static int +nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + + return ret; +} + const struct file_operations nfs4_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -966,7 +990,7 @@ const struct file_operations nfs4_file_operations = { .open = nfs4_file_open, .flush = nfs_file_flush, .release = nfs_file_release, - .fsync = nfs_file_fsync, + .fsync = nfs4_file_fsync, .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, -- cgit v1.2.3 From 011e2a7fd5e9e0c2fdba6b9466d53fc437f8bfaf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:43 -0400 Subject: NFS: Create a have_delegation rpc_op Delegations are a v4 feature, so push them out of the generic code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 9 ++------- fs/nfs/dir.c | 2 +- fs/nfs/file.c | 6 +++--- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 5 +++-- fs/nfs/proc.c | 6 ++++++ fs/nfs/write.c | 2 +- include/linux/nfs_xdr.h | 1 + 10 files changed, 25 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9a7a1b488af9..36c7c647a1d0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -47,7 +47,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t flags) { struct nfs_delegation *delegation; int ret = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 206db5679996..d134fc5fda70 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -56,14 +56,9 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs_have_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t flags); #else -static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) -{ - return 0; -} - static inline int nfs_inode_return_delegation(struct inode *inode) { nfs_wb_all(inode); @@ -73,7 +68,7 @@ static inline int nfs_inode_return_delegation(struct inode *inode) static inline int nfs_have_delegated_attributes(struct inode *inode) { - return nfs_have_delegation(inode, FMODE_READ) && + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057ff3b3..4a3e23aea143 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1144,7 +1144,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_bad; } - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8941ac41c59b..57a22a1533e2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -178,7 +178,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) * If we're holding a write delegation, then just start the i/o * but don't wait for completion (or send a commit). */ - if (nfs_have_delegation(inode, FMODE_WRITE)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return filemap_fdatawrite(file->f_mapping); /* Flush writes to the server and return any errors */ @@ -677,7 +677,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->fl_type = saved_type; - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) goto out_noconflict; if (is_local) @@ -772,7 +772,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) { + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { if (is_time_granular(&NFS_SERVER(inode)->time_delta)) __nfs_revalidate_inode(NFS_SERVER(inode), inode); else diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f7296983eba6..0f0b928ef252 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1457,7 +1457,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!nfs_have_delegation(inode, FMODE_READ) || + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0fd2bff..08f832634ef9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -877,6 +877,11 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } +static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -921,5 +926,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, + .have_delegation = nfs3_have_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9a8ad2df7af..86f428bb5e07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -294,7 +294,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case 0: return 0; case -NFS4ERR_OPENMODE: - if (inode && nfs_have_delegation(inode, FMODE_READ)) { + if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs_inode_return_delegation(inode); exception->retry = 1; return 0; @@ -3466,7 +3466,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) @@ -6804,6 +6804,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, + .have_delegation = nfs4_have_delegation, .init_client = nfs4_init_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c7419a08e..4aed3ddf9bba 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -734,6 +734,11 @@ out_einval: return -EINVAL; } +static int nfs_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -777,5 +782,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, + .have_delegation = nfs_have_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee929e5e1f7b..f163355b9618 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -410,7 +410,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) + if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5c0014d1c969..8787f77c64b3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1422,6 +1422,7 @@ struct nfs_rpc_ops { struct nfs_open_context *ctx, int open_flags, struct iattr *iattr); + int (*have_delegation)(struct inode *, fmode_t); struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t); -- cgit v1.2.3 From 57ec14c55dee2733330327499d16e40f8c23219e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:44 -0400 Subject: NFS: Create a return_delegation rpc op Delegations are a v4 feature, so push return_delegation out of the generic client by creating a new rpc_op and renaming the old function to be in the nfs v4 "namespace" Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 8 +------- fs/nfs/dir.c | 8 ++++---- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 7 +++++++ fs/nfs/nfs4proc.c | 7 ++++--- fs/nfs/proc.c | 7 +++++++ fs/nfs/unlink.c | 2 +- include/linux/nfs_xdr.h | 1 + 9 files changed, 27 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 36c7c647a1d0..81c5eec3cf38 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -388,7 +388,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) * * Returns zero on success, or a negative errno value. */ -int nfs_inode_return_delegation(struct inode *inode) +int nfs4_inode_return_delegation(struct inode *inode) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d134fc5fda70..1f3ccd934635 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -33,7 +33,7 @@ enum { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_return_delegation_noreclaim(struct inode *inode); @@ -58,12 +58,6 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); -#else -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} #endif static inline int nfs_have_delegated_attributes(struct inode *inode) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a3e23aea143..68e451f59305 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1778,7 +1778,7 @@ static int nfs_safe_remove(struct dentry *dentry) } if (inode != NULL) { - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) @@ -1906,7 +1906,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); @@ -1990,9 +1990,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - nfs_inode_return_delegation(old_inode); + NFS_PROTO(old_inode)->return_delegation(old_inode); if (new_inode != NULL) - nfs_inode_return_delegation(new_inode); + NFS_PROTO(new_inode)->return_delegation(new_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0f0b928ef252..28c9ebbe78a6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -430,7 +430,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) * Return any delegations if we're going to change ACLs */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) nfs_refresh_inode(inode, fattr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 08f832634ef9..4749a32e54be 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -882,6 +882,12 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags) return 0; } +static int nfs3_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -927,5 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86f428bb5e07..035f7a0829ec 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -295,7 +295,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc return 0; case -NFS4ERR_OPENMODE: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); exception->retry = 1; return 0; } @@ -1065,7 +1065,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo return; } rcu_read_unlock(); - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); } static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) @@ -3870,7 +3870,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -6805,6 +6805,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .init_client = nfs4_init_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4aed3ddf9bba..16632930abd2 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -739,6 +739,12 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags) return 0; } +static int nfs_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -783,5 +789,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3210a03342f9..13cea637eff8 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -501,7 +501,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); + NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode); sdentry = NULL; do { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8787f77c64b3..62235be07fb8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1423,6 +1423,7 @@ struct nfs_rpc_ops { int open_flags, struct iattr *iattr); int (*have_delegation)(struct inode *, fmode_t); + int (*return_delegation)(struct inode *); struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t); -- cgit v1.2.3 From cdb7ecedec766861e7c4cc35a203518f92023bff Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:45 -0400 Subject: NFS: Create a free_client rpc_op NFS v4 needs a way to shut down callbacks and sessions, but v2 and v3 don't. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 18 +++++++++--------- fs/nfs/internal.h | 1 + fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + include/linux/nfs_xdr.h | 1 + 7 files changed, 16 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index bf0f896284a8..82cb8a386a8f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -242,6 +242,12 @@ static void nfs4_shutdown_client(struct nfs_client *clp) kfree(clp->cl_implid); } +void nfs4_free_client(struct nfs_client *clp) +{ + nfs4_shutdown_client(clp); + nfs_free_client(clp); +} + /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -272,10 +278,6 @@ static void nfs4_destroy_server(struct nfs_server *server) } #else -static void nfs4_shutdown_client(struct nfs_client *clp) -{ -} - void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -293,12 +295,10 @@ static void pnfs_init_server(struct nfs_server *server) /* * Destroy a shared client record */ -static void nfs_free_client(struct nfs_client *clp) +void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs4_shutdown_client(clp); - nfs_fscache_release_client_cookie(clp); /* -EIO all pending I/O */ @@ -335,7 +335,7 @@ void nfs_put_client(struct nfs_client *clp) BUG_ON(!list_empty(&clp->cl_superblocks)); - nfs_free_client(clp); + clp->rpc_ops->free_client(clp); } } EXPORT_SYMBOL_GPL(nfs_put_client); @@ -574,7 +574,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (clp) { spin_unlock(&nn->nfs_client_lock); if (new) - nfs_free_client(new); + new->rpc_ops->free_client(new); return nfs_found_client(cl_init, clp); } if (new) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 18f99ef71343..93b732523342 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -151,6 +151,7 @@ extern void nfs_clients_init(struct net *net); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); +extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4749a32e54be..4ccb34bf1732 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -935,4 +935,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .have_delegation = nfs3_have_delegation, .return_delegation = nfs3_return_delegation, .init_client = nfs_init_client, + .free_client = nfs_free_client, }; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cc5900ac61b5..9889ee476e37 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -301,6 +301,8 @@ extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[2]; +void nfs4_free_client(struct nfs_client *); + /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 035f7a0829ec..f301c53926b2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6807,6 +6807,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .have_delegation = nfs4_have_delegation, .return_delegation = nfs4_inode_return_delegation, .init_client = nfs4_init_client, + .free_client = nfs4_free_client, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 16632930abd2..53620bf10969 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -791,4 +791,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .have_delegation = nfs_have_delegation, .return_delegation = nfs_return_delegation, .init_client = nfs_init_client, + .free_client = nfs_free_client, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 62235be07fb8..e61dc7235d5d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1427,6 +1427,7 @@ struct nfs_rpc_ops { struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t); + void (*free_client) (struct nfs_client *); }; /* -- cgit v1.2.3 From 6663ee7f8187708143255c057bc132bbc84c1894 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:46 -0400 Subject: NFS: Create an alloc_client rpc_op This gives NFS v4 a way to set up callbacks and sessions without v2 or v3 having to do them as well. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 40 ++++++++++++++++++++++++++-------------- fs/nfs/internal.h | 1 + fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + include/linux/nfs_xdr.h | 2 ++ 7 files changed, 34 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 82cb8a386a8f..254719c4a575 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -147,7 +147,7 @@ struct nfs_client_initdata { * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) +struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; struct rpc_cred *cred; @@ -177,18 +177,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_proto = cl_init->proto; clp->cl_net = get_net(cl_init->net); -#ifdef CONFIG_NFS_V4 - err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); - if (err) - goto error_cleanup; - - spin_lock_init(&clp->cl_lock); - INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; - clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; -#endif cred = rpc_lookup_machine_cred("*"); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; @@ -218,6 +206,30 @@ static void nfs4_shutdown_session(struct nfs_client *clp) } #endif /* CONFIG_NFS_V4_1 */ +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) +{ + int err; + struct nfs_client *clp = nfs_alloc_client(cl_init); + if (IS_ERR(clp)) + return clp; + + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error; + + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; + clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + return clp; + +error: + kfree(clp); + return ERR_PTR(err); +} + /* * Destroy the NFS4 callback service */ @@ -588,7 +600,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, spin_unlock(&nn->nfs_client_lock); - new = nfs_alloc_client(cl_init); + new = cl_init->rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 93b732523342..633af813984d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,6 +148,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); +extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4ccb34bf1732..77c7aac228bb 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -934,6 +934,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, .return_delegation = nfs3_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, }; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9889ee476e37..a0be2d1af04b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -303,6 +303,8 @@ extern const u32 nfs4_fs_locations_bitmap[2]; void nfs4_free_client(struct nfs_client *); +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); + /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f301c53926b2..7f39e7ecde6c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6806,6 +6806,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, .return_delegation = nfs4_inode_return_delegation, + .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 53620bf10969..99a002515dfe 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -790,6 +790,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, .return_delegation = nfs_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e61dc7235d5d..4d62b774ddaf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1353,6 +1353,7 @@ struct nfs_renamedata { struct nfs_access_entry; struct nfs_client; struct rpc_timeout; +struct nfs_client_initdata; /* * RPC procedure vector for NFSv2/NFSv3 demuxing @@ -1424,6 +1425,7 @@ struct nfs_rpc_ops { struct iattr *iattr); int (*have_delegation)(struct inode *, fmode_t); int (*return_delegation)(struct inode *); + struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t); -- cgit v1.2.3 From 1abb50886afe8a126705c93dab2b50c1252a9c19 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:47 -0400 Subject: NFS: Create an read_pageio_init() function pNFS needs to select a read function based on the layout driver currently in use, so I let each NFS version decide how to best handle initializing reads. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 11 +++++------ fs/nfs/pnfs.h | 6 +++--- fs/nfs/proc.c | 1 + fs/nfs/read.c | 16 +++------------- include/linux/nfs_xdr.h | 3 +++ 8 files changed, 18 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 633af813984d..b3121123b40d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -306,7 +306,7 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 77c7aac228bb..9864d05432da 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -921,6 +921,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f39e7ecde6c..f99cf71f4e36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6792,6 +6792,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_pageio_init = pnfs_pageio_init_read, .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bbc49caa7a82..9c830603a16c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1209,7 +1209,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1217,10 +1217,9 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, - server->rsize, 0); - return true; + nfs_pageio_init_read(pgio, inode, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } bool @@ -1427,7 +1426,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read_mds(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 64f90d845f6a..80ee8919dd5e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,7 +178,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); @@ -438,10 +438,10 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, +static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_read(pgio, inode, compl_ops); } static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 99a002515dfe..6fea6e107bc3 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -778,6 +778,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 86ced7836214..6267b873bbcb 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -20,8 +20,6 @@ #include #include -#include "pnfs.h" - #include "nfs4_fs.h" #include "internal.h" #include "iostat.h" @@ -108,7 +106,7 @@ int nfs_return_empty_page(struct page *page) return 0; } -void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { @@ -123,14 +121,6 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) - nfs_pageio_init_read_mds(pgio, inode, compl_ops); -} - int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -149,7 +139,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -652,7 +642,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4d62b774ddaf..e00b8b3c334e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1354,6 +1354,7 @@ struct nfs_access_entry; struct nfs_client; struct rpc_timeout; struct nfs_client_initdata; +struct nfs_pageio_descriptor; /* * RPC procedure vector for NFSv2/NFSv3 demuxing @@ -1407,6 +1408,8 @@ struct nfs_rpc_ops { int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); + void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, + const struct nfs_pgio_completion_ops *); void (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); -- cgit v1.2.3 From 57208fa7e51ca16cd68de8e8bf482f16b06d3ea1 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:48 -0400 Subject: NFS: Create an write_pageio_init() function pNFS needs to select a write function based on the layout driver currently in use, so I let each NFS version decide how to best handle initializing writes. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 11 +++++------ fs/nfs/pnfs.h | 6 +++--- fs/nfs/proc.c | 1 + fs/nfs/write.c | 18 ++++++------------ include/linux/nfs_xdr.h | 2 ++ 8 files changed, 20 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b3121123b40d..7edc172c371e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -320,7 +320,7 @@ extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9864d05432da..f3344f7f46a9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -925,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f99cf71f4e36..7d387cb8ceb5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6796,6 +6796,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_pageio_init = pnfs_pageio_init_write, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9c830603a16c..2617831afd39 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1222,7 +1222,7 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -bool +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) @@ -1231,10 +1231,9 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, - server->wsize, ioflags); - return true; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); } bool @@ -1271,7 +1270,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 80ee8919dd5e..592beb02c955 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,7 +180,7 @@ void put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); @@ -444,10 +444,10 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str nfs_pageio_init_read(pgio, inode, compl_ops); } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, +static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); } static inline int diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6fea6e107bc3..cf6499742b10 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -782,6 +782,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f163355b9618..c11fb0025f0b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -336,8 +336,10 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(page->mapping->host)->write_pageio_init(&pgio, + page->mapping->host, + wb_priority(wbc), + &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -380,8 +382,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -1202,7 +1203,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1217,13 +1218,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) - nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); -} void nfs_write_prepare(struct rpc_task *task, void *calldata) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e00b8b3c334e..8ed8ec628290 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1413,6 +1413,8 @@ struct nfs_rpc_ops { void (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); + void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int, + const struct nfs_pgio_completion_ops *); void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); -- cgit v1.2.3 From a8d8f02cf0c379693762107afe812b9e52090e39 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:49 -0400 Subject: NFS: Create custom NFS v4 write_inode() function This gives pnfs a chance to do a layout commit inside the v4 code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/super.c | 2 +- fs/nfs/write.c | 10 ++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a0be2d1af04b..3696ca7f5f4d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -202,6 +202,9 @@ struct nfs4_state_maintenance_ops { extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; +/* write.c */ +int nfs4_write_inode(struct inode *, struct writeback_control *); + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5a1c860743c3..9d33fb22f287 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -391,7 +391,7 @@ struct file_system_type nfs4_referral_fs_type = { static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, + .write_inode = nfs4_write_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, .evict_inode = nfs4_evict_inode, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c11fb0025f0b..f312860c15d0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1671,9 +1671,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - int ret; + return nfs_commit_unstable_pages(inode, wbc); +} + +#ifdef CONFIG_NFS_V4 +int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); - ret = nfs_commit_unstable_pages(inode, wbc); if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { int status; bool sync = true; @@ -1687,6 +1692,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) } return ret; } +#endif /* * flush the inode to disk. -- cgit v1.2.3 From 1c8457cadc9cefe7ec920a2f3537ff1fe20f4061 Mon Sep 17 00:00:00 2001 From: Aditya Kali Date: Sat, 30 Jun 2012 19:10:57 -0400 Subject: ext4: avoid uneeded calls to ext4_mb_load_buddy() while reading mb_groups Currently ext4_mb_load_buddy is called for every group, irrespective of whether the group info is already in memory, while reading /proc/fs/ext4//mb_groups proc file. For the purpose of mb_groups proc file, it is unnecessary to load the file group info from disk if it was loaded in past. These calls to ext4_mb_load_buddy make reading the mb_groups proc file expensive. Also, the locks around ext4_get_group_info are not required. This patch modifies the code to call ext4_mb_load_buddy only if the group info had never been loaded into memory in past. It also removes the mb group locking around ext4_get_group_info call. Signed-off-by: Aditya Kali Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1cd6994fc446..9f1e655979b9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2077,8 +2077,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct super_block *sb = seq->private; ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; - int err; + int err, buddy_loaded = 0; struct ext4_buddy e4b; + struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; ext4_grpblk_t counters[16]; @@ -2095,15 +2096,21 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + sizeof(struct ext4_group_info); - err = ext4_mb_load_buddy(sb, group, &e4b); - if (err) { - seq_printf(seq, "#%-5u: I/O error\n", group); - return 0; + grinfo = ext4_get_group_info(sb, group); + /* Load the group info in memory only if not already loaded. */ + if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { + err = ext4_mb_load_buddy(sb, group, &e4b); + if (err) { + seq_printf(seq, "#%-5u: I/O error\n", group); + return 0; + } + buddy_loaded = 1; } - ext4_lock_group(sb, group); + memcpy(&sg, ext4_get_group_info(sb, group), i); - ext4_unlock_group(sb, group); - ext4_mb_unload_buddy(&e4b); + + if (buddy_loaded) + ext4_mb_unload_buddy(&e4b); seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, sg.info.bb_fragments, sg.info.bb_first_free); -- cgit v1.2.3 From f4e95b3316c4daa43224753bb98f41456fef86c7 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sat, 30 Jun 2012 19:12:57 -0400 Subject: ext4: honor O_(D)SYNC semantic in ext4_fallocate() Ext4 must make sure the transaction to be commited to the disk when user opens a file with O_(D)SYNC flag and do a fallocate(2) call. This problem had been reported by Christoph Hellwig in this thread: http://www.spinics.net/lists/linux-btrfs/msg13621.html Reported-by: Christoph Hellwig Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91341ec6e06a..f1089cba913a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4420,6 +4420,8 @@ retry: ext4_falloc_update_inode(inode, mode, new_size, (map.m_flags & EXT4_MAP_NEW)); ext4_mark_inode_dirty(handle, inode); + if ((file->f_flags & O_SYNC) && ret >= max_blocks) + ext4_handle_sync(handle); ret2 = ext4_journal_stop(handle); if (ret2) break; -- cgit v1.2.3 From f6fb99cadcd44660c68e13f6eab28333653621e6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Jun 2012 19:14:57 -0400 Subject: ext4: pass a char * to ext4_count_free() instead of a buffer_head ptr Make it possible for ext4_count_free to operate on buffers and not just data in buffer_heads. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/balloc.c | 3 ++- fs/ext4/bitmap.c | 8 +++----- fs/ext4/ext4.h | 2 +- fs/ext4/ialloc.c | 3 ++- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index cee7812cc3cf..d23b31ca9d7a 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,7 +609,8 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) if (bitmap_bh == NULL) continue; - x = ext4_count_free(bitmap_bh, sb->s_blocksize); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_BLOCKS_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", i, ext4_free_group_clusters(sb, gdp), x); bitmap_count += x; diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index b319721da26a..7e86a6d28c64 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -15,15 +15,13 @@ static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) +unsigned int ext4_count_free(char *bitmap, unsigned int numchars) { unsigned int i, sum = 0; - if (!map) - return 0; for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; + sum += nibblemap[bitmap[i] & 0xf] + + nibblemap[(bitmap[i] >> 4) & 0xf]; return sum; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cfc4e01b3c83..293fa1ced21b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1852,7 +1852,7 @@ struct mmpd_data { # define NORET_AND noreturn, /* bitmap.c */ -extern unsigned int ext4_count_free(struct buffer_head *, unsigned); +extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d48e8b14928c..6866bc233e94 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1054,7 +1054,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) if (!bitmap_bh) continue; - x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; -- cgit v1.2.3 From 77c1a08fc9ece4cb130b9fd279738e799f0c2864 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:07 +1000 Subject: xfs: struct xfs_buf_log_format isn't variable sized. The struct xfs_buf_log_format wants to think the dirty bitmap is variable sized. In fact, it is variable size on disk simply due to the way we map it from the in-memory structure, but we still just use a fixed size memory allocation for the in-memory structure. Hence it makes no sense to set the function up as a variable sized structure when we already know it's maximum size, and we always allocate it as such. Simplify the structure by making the dirty bitmap a fixed sized array and just using the size of the structure for the allocation size. This will make it much simpler to allocate and manipulate an array of format structures for discontiguous buffer support. The previous struct xfs_buf_log_item size according to /proc/slabinfo was 224 bytes. pahole doesn't give the same size because of the variable size definition. With this modification, pahole reports the same as /proc/slabinfo: /* size: 224, cachelines: 4, members: 6 */ Because the xfs_buf_log_item size is now determined by the maximum supported block size we introduce a dependency on xfs_alloc_btree.h. Avoid this dependency by moving the idefines for the maximum block sizes supported to xfs_types.h with all the other max/min type defines to avoid any new dependencies. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.h | 14 -------------- fs/xfs/xfs_buf_item.c | 14 ++++++-------- fs/xfs/xfs_buf_item.h | 36 ++++++++++++++++++------------------ fs/xfs/xfs_super.c | 5 ++--- fs/xfs/xfs_types.h | 14 ++++++++++++++ 5 files changed, 40 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index a6caa0022c9b..359fb86ed876 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -50,20 +50,6 @@ typedef struct xfs_alloc_rec_incore { /* btree pointer type */ typedef __be32 xfs_alloc_ptr_t; -/* - * Minimum and maximum blocksize and sectorsize. - * The blocksize upper limit is pretty much arbitrary. - * The sectorsize upper limit is due to sizeof(sb_sectsize). - */ -#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ -#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) -#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) -#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ -#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) -#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) - /* * Block numbers in the AG: * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 45df2b857d48..52cd8f89ee72 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -240,15 +240,13 @@ xfs_buf_item_format( (bip->bli_flags & XFS_BLI_STALE)); /* - * The size of the base structure is the size of the - * declared structure plus the space for the extra words - * of the bitmap. We subtract one from the map size, because - * the first element of the bitmap is accounted for in the - * size of the base structure. + * Base size is the actual size of the ondisk structure - it reflects + * the actual size of the dirty bitmap rather than the size of the in + * memory structure. */ - base_size = - (uint)(sizeof(xfs_buf_log_format_t) + - ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); + base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + + (bip->bli_format.blf_map_size * + sizeof(bip->bli_format.blf_data_map[0])); vecp->i_addr = &bip->bli_format; vecp->i_len = base_size; vecp->i_type = XLOG_REG_TYPE_BFORMAT; diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index b6ecd2061e7c..ff2686780239 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -20,23 +20,6 @@ extern kmem_zone_t *xfs_buf_item_zone; -/* - * This is the structure used to lay out a buf log item in the - * log. The data map describes which 128 byte chunks of the buffer - * have been logged. - * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. - */ -typedef struct xfs_buf_log_format { - unsigned short blf_type; /* buf log item type indicator */ - unsigned short blf_size; /* size of this item */ - ushort blf_flags; /* misc state */ - ushort blf_len; /* number of blocks in this buf */ - __int64_t blf_blkno; /* starting blkno of this buf */ - unsigned int blf_map_size; /* size of data bitmap in words */ - unsigned int blf_data_map[1];/* variable size bitmap of */ - /* regions of buffer in this item */ -} xfs_buf_log_format_t; - /* * This flag indicates that the buffer contains on disk inodes * and requires special recovery handling. @@ -60,6 +43,23 @@ typedef struct xfs_buf_log_format { #define BIT_TO_WORD_SHIFT 5 #define NBWORD (NBBY * sizeof(unsigned int)) +/* + * This is the structure used to lay out a buf log item in the + * log. The data map describes which 128 byte chunks of the buffer + * have been logged. + */ +#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) + +typedef struct xfs_buf_log_format { + unsigned short blf_type; /* buf log item type indicator */ + unsigned short blf_size; /* size of this item */ + ushort blf_flags; /* misc state */ + ushort blf_len; /* number of blocks in this buf */ + __int64_t blf_blkno; /* starting blkno of this buf */ + unsigned int blf_map_size; /* used size of data bitmap in words */ + unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ +} xfs_buf_log_format_t; + /* * buf log item flags */ @@ -102,7 +102,7 @@ typedef struct xfs_buf_log_item { char *bli_orig; /* original buffer copy */ char *bli_logged; /* bytes logged (bitmap) */ #endif - xfs_buf_log_format_t bli_format; /* in-log header */ + struct xfs_buf_log_format bli_format; /* embedded in-log header */ } xfs_buf_log_item_t; void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0d9de41a7151..425f6e9d4c0c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1514,9 +1514,8 @@ xfs_init_zones(void) * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + - (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / - NBWORD) * sizeof(int))), "xfs_buf_item"); + xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item), + "xfs_buf_item"); if (!xfs_buf_item_zone) goto out_destroy_log_item_desc_zone; diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 398cf681d025..7a41874f4c20 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -132,6 +132,20 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ #define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ +/* + * Minimum and maximum blocksize and sectorsize. + * The blocksize upper limit is pretty much arbitrary. + * The sectorsize upper limit is due to sizeof(sb_sectsize). + */ +#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ +#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) +#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ +#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) +#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) + /* * Min numbers of data/attr fork btree root pointers. */ -- cgit v1.2.3 From cbb7baab285a540f173ef1ec3d5bcf9d0ad29d16 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:08 +1000 Subject: xfs: separate buffer indexing from block map To support discontiguous buffers in the buffer cache, we need to separate the cache index variables from the I/O map. While this is currently a 1:1 mapping, discontiguous buffer support will break this relationship. However, for caching purposes, we can still treat them the same as a contiguous buffer - the block number of the first block and the length of the buffer - as that is still a unique representation. Also, the only way we will ever access the discontiguous regions of buffers is via bulding the complete buffer in the first place, so using the initial block number and entire buffer length is a sane way to index the buffers. Add a block mapping vector construct to the xfs_buf and use it in the places where we are doing IO instead of the current b_bn/b_length variables. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 21 ++++++++++++--------- fs/xfs/xfs_buf.h | 27 +++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4beb421018a..a843873b0954 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -202,6 +202,8 @@ xfs_buf_alloc( bp->b_io_length = numblks; bp->b_flags = flags; bp->b_bn = blkno; + bp->b_map.bm_bn = blkno; + bp->b_map.bm_len = numblks; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); @@ -327,8 +329,9 @@ xfs_buf_allocate_memory( } use_alloc_page: - start = BBTOB(bp->b_bn) >> PAGE_SHIFT; - end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; + end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) + >> PAGE_SHIFT; page_count = end - start; error = _xfs_buf_get_pages(bp, page_count, flags); if (unlikely(error)) @@ -560,8 +563,6 @@ xfs_buf_get( if (bp != new_bp) xfs_buf_free(new_bp); - bp->b_io_length = bp->b_length; - found: if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); @@ -584,7 +585,7 @@ _xfs_buf_read( xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); @@ -665,8 +666,8 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ - XFS_BUF_SET_ADDR(bp, daddr); - XFS_BUF_READ(bp); + bp->b_map.bm_bn = daddr; + bp->b_flags |= XBF_READ; xfsbdstrat(target->bt_mount, bp); error = xfs_buf_iowait(bp); @@ -695,6 +696,8 @@ xfs_buf_set_empty( bp->b_length = numblks; bp->b_io_length = numblks; bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_map.bm_bn = XFS_BUF_DADDR_NULL; + bp->b_map.bm_len = bp->b_length; } static inline struct page * @@ -1159,7 +1162,7 @@ _xfs_buf_ioapply( struct bio *bio; int offset = bp->b_offset; int size = BBTOB(bp->b_io_length); - sector_t sector = bp->b_bn; + sector_t sector = bp->b_map.bm_bn; total_nr_pages = bp->b_page_count; map_i = 0; @@ -1564,7 +1567,7 @@ xfs_buf_cmp( struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); xfs_daddr_t diff; - diff = ap->b_bn - bp->b_bn; + diff = ap->b_map.bm_bn - bp->b_map.bm_bn; if (diff < 0) return -1; if (diff > 0) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7f1d1392ce37..c9c2ba90c53c 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -58,6 +58,7 @@ typedef enum { #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_COMPOUND (1 << 23)/* compound buffer */ typedef unsigned int xfs_buf_flags_t; @@ -75,7 +76,8 @@ typedef unsigned int xfs_buf_flags_t; { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" } + { _XBF_DELWRI_Q, "DELWRI_Q" }, \ + { _XBF_COMPOUND, "COMPOUND" } typedef struct xfs_buftarg { dev_t bt_dev; @@ -98,6 +100,11 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); #define XB_PAGES 2 +struct xfs_buf_map { + xfs_daddr_t bm_bn; /* block number for I/O */ + int bm_len; /* size of I/O */ +}; + typedef struct xfs_buf { /* * first cacheline holds all the fields needed for an uncontended cache @@ -107,7 +114,7 @@ typedef struct xfs_buf { * fast-path on locking. */ struct rb_node b_rbnode; /* rbtree node */ - xfs_daddr_t b_bn; /* block number for I/O */ + xfs_daddr_t b_bn; /* block number of buffer */ int b_length; /* size of buffer in BBs */ atomic_t b_hold; /* reference count */ atomic_t b_lru_ref; /* lru reclaim ref count */ @@ -127,12 +134,14 @@ typedef struct xfs_buf { struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ + struct xfs_buf_map b_map; /* compound buffer map */ int b_io_length; /* IO size in BBs */ atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ unsigned short b_error; /* error code on I/O */ + #ifdef XFS_BUF_LOCK_TRACKING int b_last_holder; #endif @@ -233,8 +242,18 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) -#define XFS_BUF_ADDR(bp) ((bp)->b_bn) -#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) +/* + * These macros use the IO block map rather than b_bn. b_bn is now really + * just for the buffer cache index for cached buffers. As IO does not use b_bn + * anymore, uncached buffers do not use b_bn at all and hence must modify the IO + * map directly. Uncached buffers are not allowed to be discontiguous, so this + * is safe to do. + * + * In future, uncached buffers will pass the block number directly to the io + * request function and hence these macros will go away at that point. + */ +#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn) +#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno)) static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { -- cgit v1.2.3 From 3e85c868a697805a3d4c7800a6bacdfc81d15cdf Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:09 +1000 Subject: xfs: convert internal buffer functions to pass maps While the external interface currently uses separate blockno/length variables, we need to move internal interfaces to passing and parsing vector maps. This will then allow us to add external interfaces to support discontiguous buffer maps as the internal code will already support them. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 202 ++++++++++++++++++++++++++++++++++++++++++------------- fs/xfs/xfs_buf.h | 43 +++++++++--- 2 files changed, 191 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a843873b0954..82bb8123ab2b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -164,14 +164,49 @@ xfs_buf_stale( ASSERT(atomic_read(&bp->b_hold) >= 1); } +static int +xfs_buf_get_maps( + struct xfs_buf *bp, + int map_count) +{ + ASSERT(bp->b_maps == NULL); + bp->b_map_count = map_count; + + if (map_count == 1) { + bp->b_maps = &bp->b_map; + return 0; + } + + bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), + KM_NOFS); + if (!bp->b_maps) + return ENOMEM; + return 0; +} + +/* + * Frees b_pages if it was allocated. + */ +static void +xfs_buf_free_maps( + struct xfs_buf *bp) +{ + if (bp->b_maps != &bp->b_map) { + kmem_free(bp->b_maps); + bp->b_maps = NULL; + } +} + struct xfs_buf * -xfs_buf_alloc( +_xfs_buf_alloc( struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { struct xfs_buf *bp; + int error; + int i; bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); if (unlikely(!bp)) @@ -192,18 +227,28 @@ xfs_buf_alloc( sema_init(&bp->b_sema, 0); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; + bp->b_flags = flags; /* * Set length and io_length to the same value initially. * I/O routines should use io_length, which will be the same in * most cases but may be reset (e.g. XFS recovery). */ - bp->b_length = numblks; - bp->b_io_length = numblks; - bp->b_flags = flags; - bp->b_bn = blkno; - bp->b_map.bm_bn = blkno; - bp->b_map.bm_len = numblks; + error = xfs_buf_get_maps(bp, nmaps); + if (error) { + kmem_zone_free(xfs_buf_zone, bp); + return NULL; + } + + bp->b_bn = map[0].bm_bn; + bp->b_length = 0; + for (i = 0; i < nmaps; i++) { + bp->b_maps[i].bm_bn = map[i].bm_bn; + bp->b_maps[i].bm_len = map[i].bm_len; + bp->b_length += map[i].bm_len; + } + bp->b_io_length = bp->b_length; + atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); @@ -282,6 +327,7 @@ xfs_buf_free( } else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); + xfs_buf_free_maps(bp); kmem_zone_free(xfs_buf_zone, bp); } @@ -428,8 +474,8 @@ _xfs_buf_map_pages( xfs_buf_t * _xfs_buf_find( struct xfs_buftarg *btp, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags, xfs_buf_t *new_bp) { @@ -438,7 +484,12 @@ _xfs_buf_find( struct rb_node **rbp; struct rb_node *parent; xfs_buf_t *bp; + xfs_daddr_t blkno = map[0].bm_bn; + int numblks = 0; + int i; + for (i = 0; i < nmaps; i++) + numblks += map[i].bm_len; numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ @@ -539,22 +590,23 @@ xfs_buf_get( struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); + bp = _xfs_buf_find(target, &map, 1, flags, NULL); if (likely(bp)) goto found; - new_bp = xfs_buf_alloc(target, blkno, numblks, flags); + new_bp = _xfs_buf_alloc(target, &map, 1, flags); if (unlikely(!new_bp)) return NULL; error = xfs_buf_allocate_memory(new_bp, flags); if (error) { - kmem_zone_free(xfs_buf_zone, new_bp); + xfs_buf_free(new_bp); return NULL; } - bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); + bp = _xfs_buf_find(target, &map, 1, flags, new_bp); if (!bp) { xfs_buf_free(new_bp); return NULL; @@ -666,7 +718,9 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ - bp->b_map.bm_bn = daddr; + ASSERT(bp->b_map_count == 1); + bp->b_bn = daddr; + bp->b_maps[0].bm_bn = daddr; bp->b_flags |= XBF_READ; xfsbdstrat(target->bt_mount, bp); @@ -695,9 +749,11 @@ xfs_buf_set_empty( bp->b_addr = NULL; bp->b_length = numblks; bp->b_io_length = numblks; + + ASSERT(bp->b_map_count == 1); bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_map.bm_bn = XFS_BUF_DADDR_NULL; - bp->b_map.bm_len = bp->b_length; + bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; + bp->b_maps[0].bm_len = bp->b_length; } static inline struct page * @@ -761,9 +817,10 @@ xfs_buf_get_uncached( { unsigned long page_count; int error, i; - xfs_buf_t *bp; + struct xfs_buf *bp; + DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); - bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0); + bp = _xfs_buf_alloc(target, &map, 1, 0); if (unlikely(bp == NULL)) goto fail; @@ -794,6 +851,7 @@ xfs_buf_get_uncached( __free_page(bp->b_pages[i]); _xfs_buf_free_pages(bp); fail_free_buf: + xfs_buf_free_maps(bp); kmem_zone_free(xfs_buf_zone, bp); fail: return NULL; @@ -1154,36 +1212,39 @@ xfs_buf_bio_end_io( bio_put(bio); } -STATIC void -_xfs_buf_ioapply( - xfs_buf_t *bp) +static void +xfs_buf_ioapply_map( + struct xfs_buf *bp, + int map, + int *buf_offset, + int *count, + int rw) { - int rw, map_i, total_nr_pages, nr_pages; - struct bio *bio; - int offset = bp->b_offset; - int size = BBTOB(bp->b_io_length); - sector_t sector = bp->b_map.bm_bn; + int page_index; + int total_nr_pages = bp->b_page_count; + int nr_pages; + struct bio *bio; + sector_t sector = bp->b_maps[map].bm_bn; + int size; + int offset; total_nr_pages = bp->b_page_count; - map_i = 0; - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; - } else { - rw = READ; + /* skip the pages in the buffer before the start offset */ + page_index = 0; + offset = *buf_offset; + while (offset >= PAGE_SIZE) { + page_index++; + offset -= PAGE_SIZE; } - /* we only use the buffer cache for meta-data */ - rw |= REQ_META; + /* + * Limit the IO size to the length of the current vector, and update the + * remaining IO count for the next time around. + */ + size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count); + *count -= size; + *buf_offset += size; next_chunk: atomic_inc(&bp->b_io_remaining); @@ -1198,13 +1259,14 @@ next_chunk: bio->bi_private = bp; - for (; size && nr_pages; nr_pages--, map_i++) { + for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; if (nbytes > size) nbytes = size; - rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); + rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes, + offset); if (rbytes < nbytes) break; @@ -1226,6 +1288,54 @@ next_chunk: xfs_buf_ioerror(bp, EIO); bio_put(bio); } + +} + +STATIC void +_xfs_buf_ioapply( + struct xfs_buf *bp) +{ + struct blk_plug plug; + int rw; + int offset; + int size; + int i; + + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; + } else { + rw = READ; + } + + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + + /* + * Walk all the vectors issuing IO on them. Set up the initial offset + * into the buffer and the desired IO size before we start - + * _xfs_buf_ioapply_vec() will modify them appropriately for each + * subsequent call. + */ + offset = bp->b_offset; + size = BBTOB(bp->b_io_length); + blk_start_plug(&plug); + for (i = 0; i < bp->b_map_count; i++) { + xfs_buf_ioapply_map(bp, i, &offset, &size, rw); + if (bp->b_error) + break; + if (size <= 0) + break; /* all done */ + } + blk_finish_plug(&plug); } void diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c9c2ba90c53c..67d134994ae4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -105,6 +105,9 @@ struct xfs_buf_map { int bm_len; /* size of I/O */ }; +#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ + struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; + typedef struct xfs_buf { /* * first cacheline holds all the fields needed for an uncontended cache @@ -134,7 +137,9 @@ typedef struct xfs_buf { struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ - struct xfs_buf_map b_map; /* compound buffer map */ + struct xfs_buf_map *b_maps; /* compound buffer map */ + struct xfs_buf_map b_map; /* inline compound buffer map */ + int b_map_count; int b_io_length; /* IO size in BBs */ atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ @@ -149,11 +154,35 @@ typedef struct xfs_buf { /* Finding and Reading Buffers */ -struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, xfs_buf_flags_t flags, - struct xfs_buf *new_bp); -#define xfs_incore(buftarg,blkno,len,lockit) \ - _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) +struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags, struct xfs_buf *new_bp); + +static inline struct xfs_buf * +xfs_incore( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return _xfs_buf_find(target, &map, 1, flags, NULL); +} + +struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags); + +static inline struct xfs_buf * +xfs_buf_alloc( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return _xfs_buf_alloc(target, &map, 1, flags); +} struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags); @@ -163,8 +192,6 @@ void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks); struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); -struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, xfs_buf_flags_t flags); void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); -- cgit v1.2.3 From 6dde27077eaf590eac279627f74b7e4e40b864b2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:10 +1000 Subject: xfs: add discontiguous buffer map interface With the internal interfaces supporting discontiguous buffer maps, add external lookup, read and get interfaces so they can start to be used. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 37 ++++++++++++++++++------------------- fs/xfs/xfs_buf.h | 46 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 82bb8123ab2b..39c5d7622dec 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -581,22 +581,21 @@ found: * more hits than misses. */ struct xfs_buf * -xfs_buf_get( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks, +xfs_buf_get_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - bp = _xfs_buf_find(target, &map, 1, flags, NULL); + bp = _xfs_buf_find(target, map, nmaps, flags, NULL); if (likely(bp)) goto found; - new_bp = _xfs_buf_alloc(target, &map, 1, flags); + new_bp = _xfs_buf_alloc(target, map, nmaps, flags); if (unlikely(!new_bp)) return NULL; @@ -606,7 +605,7 @@ xfs_buf_get( return NULL; } - bp = _xfs_buf_find(target, &map, 1, flags, new_bp); + bp = _xfs_buf_find(target, map, nmaps, flags, new_bp); if (!bp) { xfs_buf_free(new_bp); return NULL; @@ -649,17 +648,17 @@ _xfs_buf_read( } xfs_buf_t * -xfs_buf_read( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks, +xfs_buf_read_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { - xfs_buf_t *bp; + struct xfs_buf *bp; flags |= XBF_READ; - bp = xfs_buf_get(target, blkno, numblks, flags); + bp = xfs_buf_get_map(target, map, nmaps, flags); if (bp) { trace_xfs_buf_read(bp, flags, _RET_IP_); @@ -687,15 +686,15 @@ xfs_buf_read( * safe manner. */ void -xfs_buf_readahead( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks) +xfs_buf_readahead_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps) { if (bdi_read_congested(target->bt_bdi)) return; - xfs_buf_read(target, blkno, numblks, + xfs_buf_read_map(target, map, nmaps, XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 67d134994ae4..aa96bd410aed 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -184,12 +184,46 @@ xfs_buf_alloc( return _xfs_buf_alloc(target, &map, 1, flags); } -struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, xfs_buf_flags_t flags); -struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, xfs_buf_flags_t flags); -void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks); +struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags); +struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags); +void xfs_buf_readahead_map(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps); + +static inline struct xfs_buf * +xfs_buf_get( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_buf_get_map(target, &map, 1, flags); +} + +static inline struct xfs_buf * +xfs_buf_read( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_buf_read_map(target, &map, 1, flags); +} + +static inline void +xfs_buf_readahead( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_buf_readahead_map(target, &map, 1); +} struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); -- cgit v1.2.3 From de2a4f59190303ff5b82ead2969968a325e61230 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:11 +1000 Subject: xfs: add discontiguous buffer support to transactions Now that the buffer cache supports discontiguous buffers, add support to the transaction buffer interface for getting and reading buffers. Note that this patch does not convert the buffer item logging to support discontiguous buffers. That will be done as a separate commit. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_trans.h | 50 +++++++++++++++++++++++++++++++++---- fs/xfs/xfs_trans_buf.c | 68 +++++++++++++++++++++++++------------------------- 2 files changed, 79 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7c37b533aa8e..bc2afd52a0b7 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -448,11 +448,51 @@ xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); -struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t, - int, uint); -int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *, - struct xfs_buftarg *, xfs_daddr_t, int, uint, - struct xfs_buf **); + +struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + uint flags); + +static inline struct xfs_buf * +xfs_trans_get_buf( + struct xfs_trans *tp, + struct xfs_buftarg *target, + xfs_daddr_t blkno, + int numblks, + uint flags) +{ + struct xfs_buf_map map = { + .bm_bn = blkno, + .bm_len = numblks, + }; + return xfs_trans_get_buf_map(tp, target, &map, 1, flags); +} + +int xfs_trans_read_buf_map(struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp); + +static inline int +xfs_trans_read_buf( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buftarg *target, + xfs_daddr_t blkno, + int numblks, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + struct xfs_buf_map map = { + .bm_bn = blkno, + .bm_len = numblks, + }; + return xfs_trans_read_buf_map(mp, tp, target, &map, 1, flags, bpp); +} + struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 21c5a5e3700d..6311b99c267f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -41,20 +41,26 @@ STATIC struct xfs_buf * xfs_trans_buf_item_match( struct xfs_trans *tp, struct xfs_buftarg *target, - xfs_daddr_t blkno, - int len) + struct xfs_buf_map *map, + int nmaps) { struct xfs_log_item_desc *lidp; struct xfs_buf_log_item *blip; + int len = 0; + int i; + + for (i = 0; i < nmaps; i++) + len += map[i].bm_len; - len = BBTOB(len); list_for_each_entry(lidp, &tp->t_items, lid_trans) { blip = (struct xfs_buf_log_item *)lidp->lid_item; if (blip->bli_item.li_type == XFS_LI_BUF && blip->bli_buf->b_target == target && - XFS_BUF_ADDR(blip->bli_buf) == blkno && - BBTOB(blip->bli_buf->b_length) == len) + XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn && + blip->bli_buf->b_length == len) { + ASSERT(blip->bli_buf->b_map_count == nmaps); return blip->bli_buf; + } } return NULL; @@ -128,21 +134,19 @@ xfs_trans_bjoin( * If the transaction pointer is NULL, make this just a normal * get_buf() call. */ -xfs_buf_t * -xfs_trans_get_buf(xfs_trans_t *tp, - xfs_buftarg_t *target_dev, - xfs_daddr_t blkno, - int len, - uint flags) +struct xfs_buf * +xfs_trans_get_buf_map( + struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags) { xfs_buf_t *bp; xfs_buf_log_item_t *bip; - /* - * Default to a normal get_buf() call if the tp is NULL. - */ - if (tp == NULL) - return xfs_buf_get(target_dev, blkno, len, flags); + if (!tp) + return xfs_buf_get_map(target, map, nmaps, flags); /* * If we find the buffer in the cache with this transaction @@ -150,7 +154,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, * have it locked. In this case we just increment the lock * recursion count and return the buffer to the caller. */ - bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); + bp = xfs_trans_buf_item_match(tp, target, map, nmaps); if (bp != NULL) { ASSERT(xfs_buf_islocked(bp)); if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { @@ -167,7 +171,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, return (bp); } - bp = xfs_buf_get(target_dev, blkno, len, flags); + bp = xfs_buf_get_map(target, map, nmaps, flags); if (bp == NULL) { return NULL; } @@ -246,26 +250,22 @@ int xfs_error_mod = 33; * read_buf() call. */ int -xfs_trans_read_buf( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_buftarg_t *target, - xfs_daddr_t blkno, - int len, - uint flags, - xfs_buf_t **bpp) +xfs_trans_read_buf_map( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { xfs_buf_t *bp; xfs_buf_log_item_t *bip; int error; *bpp = NULL; - - /* - * Default to a normal get_buf() call if the tp is NULL. - */ - if (tp == NULL) { - bp = xfs_buf_read(target, blkno, len, flags); + if (!tp) { + bp = xfs_buf_read_map(target, map, nmaps, flags); if (!bp) return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); @@ -303,7 +303,7 @@ xfs_trans_read_buf( * If the buffer is not yet read in, then we read it in, increment * the lock recursion count, and return it to the caller. */ - bp = xfs_trans_buf_item_match(tp, target, blkno, len); + bp = xfs_trans_buf_item_match(tp, target, map, nmaps); if (bp != NULL) { ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); @@ -349,7 +349,7 @@ xfs_trans_read_buf( return 0; } - bp = xfs_buf_read(target, blkno, len, flags); + bp = xfs_buf_read_map(target, map, nmaps, flags); if (bp == NULL) { *bpp = NULL; return (flags & XBF_TRYLOCK) ? -- cgit v1.2.3 From 372cc85ec6820c91b4eeff303880f25cb5a00ab5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:12 +1000 Subject: xfs: support discontiguous buffers in the xfs_buf_log_item discontigous buffer in separate buffer format structures. This means log recovery will recover all the changes on a per segment basis without requiring any knowledge of the fact that it was logged from a compound buffer. To do this, we need to be able to determine what buffer segment any given offset into the compound buffer sits over. This enables us to translate the dirty bitmap in the number of separate buffer format structures required. We also need to be able to determine the number of bitmap elements that a given buffer segment has, as this determines the size of the buffer format structure. Hence we need to be able to determine the both the start offset into the buffer and the length of a given segment to be able to calculate this. With this information, we can preallocate, build and format the correct log vector array for each segment in a compound buffer to appear exactly the same as individually logged buffers in the log. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_buf_item.c | 335 ++++++++++++++++++++++++++++++++++++-------------- fs/xfs/xfs_buf_item.h | 2 + 2 files changed, 244 insertions(+), 93 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 52cd8f89ee72..e4a6e4b6fa03 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); * If the XFS_BLI_STALE flag has been set, then log nothing. */ STATIC uint -xfs_buf_item_size( - struct xfs_log_item *lip) +xfs_buf_item_size_segment( + struct xfs_buf_log_item *bip, + struct xfs_buf_log_format *blfp) { - struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; uint nvecs; int next_bit; int last_bit; - ASSERT(atomic_read(&bip->bli_refcount) > 0); - if (bip->bli_flags & XFS_BLI_STALE) { - /* - * The buffer is stale, so all we need to log - * is the buf log format structure with the - * cancel flag in it. - */ - trace_xfs_buf_item_size_stale(bip); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); - return 1; - } + last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); + if (last_bit == -1) + return 0; + + /* + * initial count for a dirty buffer is 2 vectors - the format structure + * and the first dirty region. + */ + nvecs = 2; - ASSERT(bip->bli_flags & XFS_BLI_LOGGED); - nvecs = 1; - last_bit = xfs_next_bit(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size, 0); - ASSERT(last_bit != -1); - nvecs++; while (last_bit != -1) { /* * This takes the bit number to start looking from and @@ -187,16 +179,15 @@ xfs_buf_item_size( * if there are no more bits set or the start bit is * beyond the end of the bitmap. */ - next_bit = xfs_next_bit(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size, - last_bit + 1); + next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, + last_bit + 1); /* * If we run out of bits, leave the loop, * else if we find a new set of bits bump the number of vecs, * else keep scanning the current set of bits. */ if (next_bit == -1) { - last_bit = -1; + break; } else if (next_bit != last_bit + 1) { last_bit = next_bit; nvecs++; @@ -210,22 +201,73 @@ xfs_buf_item_size( } } - trace_xfs_buf_item_size(bip); return nvecs; } /* - * This is called to fill in the vector of log iovecs for the - * given log buf item. It fills the first entry with a buf log - * format structure, and the rest point to contiguous chunks - * within the buffer. + * This returns the number of log iovecs needed to log the given buf log item. + * + * It calculates this as 1 iovec for the buf log format structure and 1 for each + * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged + * in a single iovec. + * + * Discontiguous buffers need a format structure per region that that is being + * logged. This makes the changes in the buffer appear to log recovery as though + * they came from separate buffers, just like would occur if multiple buffers + * were used instead of a single discontiguous buffer. This enables + * discontiguous buffers to be in-memory constructs, completely transparent to + * what ends up on disk. + * + * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log + * format structures. */ -STATIC void -xfs_buf_item_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) +STATIC uint +xfs_buf_item_size( + struct xfs_log_item *lip) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); + uint nvecs; + int i; + + ASSERT(atomic_read(&bip->bli_refcount) > 0); + if (bip->bli_flags & XFS_BLI_STALE) { + /* + * The buffer is stale, so all we need to log + * is the buf log format structure with the + * cancel flag in it. + */ + trace_xfs_buf_item_size_stale(bip); + ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + return bip->bli_format_count; + } + + ASSERT(bip->bli_flags & XFS_BLI_LOGGED); + + /* + * the vector count is based on the number of buffer vectors we have + * dirty bits in. This will only be greater than one when we have a + * compound buffer with more than one segment dirty. Hence for compound + * buffers we need to track which segment the dirty bits correspond to, + * and when we move from one segment to the next increment the vector + * count for the extra buf log format structure that will need to be + * written. + */ + nvecs = 0; + for (i = 0; i < bip->bli_format_count; i++) { + nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]); + } + + trace_xfs_buf_item_size(bip); + return nvecs; +} + +static struct xfs_log_iovec * +xfs_buf_item_format_segment( + struct xfs_buf_log_item *bip, + struct xfs_log_iovec *vecp, + uint offset, + struct xfs_buf_log_format *blfp) +{ struct xfs_buf *bp = bip->bli_buf; uint base_size; uint nvecs; @@ -235,9 +277,8 @@ xfs_buf_item_format( uint nbits; uint buffer_offset; - ASSERT(atomic_read(&bip->bli_refcount) > 0); - ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || - (bip->bli_flags & XFS_BLI_STALE)); + /* copy the flags across from the base format item */ + blfp->blf_flags = bip->bli_format.blf_flags; /* * Base size is the actual size of the ondisk structure - it reflects @@ -245,28 +286,13 @@ xfs_buf_item_format( * memory structure. */ base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + - (bip->bli_format.blf_map_size * - sizeof(bip->bli_format.blf_data_map[0])); - vecp->i_addr = &bip->bli_format; + (blfp->blf_map_size * sizeof(blfp->blf_data_map[0])); + vecp->i_addr = blfp; vecp->i_len = base_size; vecp->i_type = XLOG_REG_TYPE_BFORMAT; vecp++; nvecs = 1; - /* - * If it is an inode buffer, transfer the in-memory state to the - * format flags and clear the in-memory state. We do not transfer - * this state if the inode buffer allocation has not yet been committed - * to the log as setting the XFS_BLI_INODE_BUF flag will prevent - * correct replay of the inode allocation. - */ - if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && - xfs_log_item_in_current_chkpt(lip))) - bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; - bip->bli_flags &= ~XFS_BLI_INODE_BUF; - } - if (bip->bli_flags & XFS_BLI_STALE) { /* * The buffer is stale, so all we need to log @@ -274,16 +300,15 @@ xfs_buf_item_format( * cancel flag in it. */ trace_xfs_buf_item_format_stale(bip); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); - bip->bli_format.blf_size = nvecs; - return; + ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); + blfp->blf_size = nvecs; + return vecp; } /* * Fill in an iovec for each set of contiguous chunks. */ - first_bit = xfs_next_bit(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size, 0); + first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); ASSERT(first_bit != -1); last_bit = first_bit; nbits = 1; @@ -294,9 +319,8 @@ xfs_buf_item_format( * if there are no more bits set or the start bit is * beyond the end of the bitmap. */ - next_bit = xfs_next_bit(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size, - (uint)last_bit + 1); + next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, + (uint)last_bit + 1); /* * If we run out of bits fill in the last iovec and get * out of the loop. @@ -307,14 +331,14 @@ xfs_buf_item_format( * keep counting and scanning. */ if (next_bit == -1) { - buffer_offset = first_bit * XFS_BLF_CHUNK; + buffer_offset = offset + first_bit * XFS_BLF_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK; nvecs++; break; } else if (next_bit != last_bit + 1) { - buffer_offset = first_bit * XFS_BLF_CHUNK; + buffer_offset = offset + first_bit * XFS_BLF_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK; @@ -323,14 +347,17 @@ xfs_buf_item_format( first_bit = next_bit; last_bit = next_bit; nbits = 1; - } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) != - (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) + + } else if (xfs_buf_offset(bp, offset + + (next_bit << XFS_BLF_SHIFT)) != + (xfs_buf_offset(bp, offset + + (last_bit << XFS_BLF_SHIFT)) + XFS_BLF_CHUNK)) { - buffer_offset = first_bit * XFS_BLF_CHUNK; + buffer_offset = offset + first_bit * XFS_BLF_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK; -/* You would think we need to bump the nvecs here too, but we do not +/* + * You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here * nvecs++; @@ -345,6 +372,48 @@ xfs_buf_item_format( } } bip->bli_format.blf_size = nvecs; + return vecp; +} + +/* + * This is called to fill in the vector of log iovecs for the + * given log buf item. It fills the first entry with a buf log + * format structure, and the rest point to contiguous chunks + * within the buffer. + */ +STATIC void +xfs_buf_item_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *vecp) +{ + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; + uint offset = 0; + int i; + + ASSERT(atomic_read(&bip->bli_refcount) > 0); + ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || + (bip->bli_flags & XFS_BLI_STALE)); + + /* + * If it is an inode buffer, transfer the in-memory state to the + * format flags and clear the in-memory state. We do not transfer + * this state if the inode buffer allocation has not yet been committed + * to the log as setting the XFS_BLI_INODE_BUF flag will prevent + * correct replay of the inode allocation. + */ + if (bip->bli_flags & XFS_BLI_INODE_BUF) { + if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && + xfs_log_item_in_current_chkpt(lip))) + bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; + bip->bli_flags &= ~XFS_BLI_INODE_BUF; + } + + for (i = 0; i < bip->bli_format_count; i++) { + vecp = xfs_buf_item_format_segment(bip, vecp, offset, + &bip->bli_formats[i]); + offset += bp->b_maps[i].bm_len; + } /* * Check to make sure everything is consistent. @@ -620,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_committing = xfs_buf_item_committing }; +STATIC int +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->bli_format; + return 0; + } + + bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), + KM_SLEEP); + if (!bip->bli_formats) + return ENOMEM; + return 0; +} + +STATIC void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->bli_format) { + kmem_free(bip->bli_formats); + bip->bli_formats = NULL; + } +} /* * Allocate a new buf log item to go with the given buffer. @@ -637,6 +735,8 @@ xfs_buf_item_init( xfs_buf_log_item_t *bip; int chunks; int map_size; + int error; + int i; /* * Check to see if there is already a buf log item for @@ -648,25 +748,33 @@ xfs_buf_item_init( if (lip != NULL && lip->li_type == XFS_LI_BUF) return; - /* - * chunks is the number of XFS_BLF_CHUNK size pieces - * the buffer can be divided into. Make sure not to - * truncate any pieces. map_size is the size of the - * bitmap needed to describe the chunks of the buffer. - */ - chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >> - XFS_BLF_SHIFT); - map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); - - bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, - KM_SLEEP); + bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; xfs_buf_hold(bp); - bip->bli_format.blf_type = XFS_LI_BUF; - bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); - bip->bli_format.blf_len = (ushort)bp->b_length; - bip->bli_format.blf_map_size = map_size; + + /* + * chunks is the number of XFS_BLF_CHUNK size pieces the buffer + * can be divided into. Make sure not to truncate any pieces. + * map_size is the size of the bitmap needed to describe the + * chunks of the buffer. + * + * Discontiguous buffer support follows the layout of the underlying + * buffer. This makes the implementation as simple as possible. + */ + error = xfs_buf_item_get_format(bip, bp->b_map_count); + ASSERT(error == 0); + + for (i = 0; i < bip->bli_format_count; i++) { + chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), + XFS_BLF_CHUNK); + map_size = DIV_ROUND_UP(chunks, NBWORD); + + bip->bli_formats[i].blf_type = XFS_LI_BUF; + bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn; + bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len; + bip->bli_formats[i].blf_map_size = map_size; + } #ifdef XFS_TRANS_DEBUG /* @@ -697,10 +805,11 @@ xfs_buf_item_init( * item's bitmap. */ void -xfs_buf_item_log( - xfs_buf_log_item_t *bip, +xfs_buf_item_log_segment( + struct xfs_buf_log_item *bip, uint first, - uint last) + uint last, + uint *map) { uint first_bit; uint last_bit; @@ -712,12 +821,6 @@ xfs_buf_item_log( uint end_bit; uint mask; - /* - * Mark the item as having some dirty data for - * quick reference in xfs_buf_item_dirty. - */ - bip->bli_flags |= XFS_BLI_DIRTY; - /* * Convert byte offsets to bit numbers. */ @@ -734,7 +837,7 @@ xfs_buf_item_log( * to set a bit in. */ word_num = first_bit >> BIT_TO_WORD_SHIFT; - wordp = &(bip->bli_format.blf_data_map[word_num]); + wordp = &map[word_num]; /* * Calculate the starting bit in the first word. @@ -781,6 +884,51 @@ xfs_buf_item_log( xfs_buf_item_log_debug(bip, first, last); } +/* + * Mark bytes first through last inclusive as dirty in the buf + * item's bitmap. + */ +void +xfs_buf_item_log( + xfs_buf_log_item_t *bip, + uint first, + uint last) +{ + int i; + uint start; + uint end; + struct xfs_buf *bp = bip->bli_buf; + + /* + * Mark the item as having some dirty data for + * quick reference in xfs_buf_item_dirty. + */ + bip->bli_flags |= XFS_BLI_DIRTY; + + /* + * walk each buffer segment and mark them dirty appropriately. + */ + start = 0; + for (i = 0; i < bip->bli_format_count; i++) { + if (start > last) + break; + end = start + BBTOB(bp->b_maps[i].bm_len); + if (first > end) { + start += BBTOB(bp->b_maps[i].bm_len); + continue; + } + if (first < start) + first = start; + if (end > last) + end = last; + + xfs_buf_item_log_segment(bip, first, end, + &bip->bli_formats[i].blf_data_map[0]); + + start += bp->b_maps[i].bm_len; + } +} + /* * Return 1 if the buffer has some data that has been logged (at any @@ -802,6 +950,7 @@ xfs_buf_item_free( kmem_free(bip->bli_logged); #endif /* XFS_TRANS_DEBUG */ + xfs_buf_item_free_format(bip); kmem_zone_free(xfs_buf_item_zone, bip); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index ff2686780239..6850f49f4af3 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -102,6 +102,8 @@ typedef struct xfs_buf_log_item { char *bli_orig; /* original buffer copy */ char *bli_logged; /* bytes logged (bitmap) */ #endif + int bli_format_count; /* count of headers */ + struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ struct xfs_buf_log_format bli_format; /* embedded in-log header */ } xfs_buf_log_item_t; -- cgit v1.2.3 From 3605431fb9739a30ccd0c6380ae8e3c6f8e670a5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:13 +1000 Subject: xfs: use discontiguous xfs_buf support in dabuf wrappers First step in converting the directory code to use native discontiguous buffers and replacing the dabuf construct. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_da_btree.c | 538 ++++++++++++++++++++++---------------------------- fs/xfs/xfs_da_btree.h | 6 +- 2 files changed, 239 insertions(+), 305 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 015b946c5808..76e5dbaa95ea 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -85,7 +85,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, */ STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps); +STATIC xfs_dabuf_t *xfs_da_buf_make(xfs_buf_t *bp); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); @@ -1967,35 +1967,75 @@ xfs_da_map_covers_blocks( } /* - * Make a dabuf. - * Used for get_buf, read_buf, read_bufr, and reada_buf. + * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. + * + * For the single map case, it is assumed that the caller has provided a pointer + * to a valid xfs_buf_map. For the multiple map case, this function will + * allocate the xfs_buf_map to hold all the maps and replace the caller's single + * map pointer with the allocated map. */ -STATIC int -xfs_da_do_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t *mappedbnop, - xfs_dabuf_t **bpp, - int whichfork, - int caller) +static int +xfs_buf_map_from_irec( + struct xfs_mount *mp, + struct xfs_buf_map **mapp, + unsigned int *nmaps, + struct xfs_bmbt_irec *irecs, + unsigned int nirecs) { - xfs_buf_t *bp = NULL; - xfs_buf_t **bplist; - int error=0; - int i; - xfs_bmbt_irec_t map; - xfs_bmbt_irec_t *mapp; - xfs_daddr_t mappedbno; - xfs_mount_t *mp; - int nbplist=0; - int nfsb; - int nmap; - xfs_dabuf_t *rbp; + struct xfs_buf_map *map; + int i; + + ASSERT(*nmaps == 1); + ASSERT(nirecs >= 1); + + if (nirecs > 1) { + map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP); + if (!map) + return ENOMEM; + *mapp = map; + } + + *nmaps = nirecs; + map = *mapp; + for (i = 0; i < *nmaps; i++) { + ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && + irecs[i].br_startblock != HOLESTARTBLOCK); + map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); + map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); + } + return 0; +} + +/* + * Map the block we are given ready for reading. There are three possible return + * values: + * -1 - will be returned if we land in a hole and mappedbno == -2 so the + * caller knows not to execute a subsequent read. + * 0 - if we mapped the block successfully + * >0 - positive error number if there was an error. + */ +static int +xfs_dabuf_map( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + int whichfork, + struct xfs_buf_map **map, + int *nmaps) +{ + struct xfs_mount *mp = dp->i_mount; + int nfsb; + int error = 0; + struct xfs_bmbt_irec irec; + struct xfs_bmbt_irec *irecs = &irec; + int nirecs; + + ASSERT(map && *map); + ASSERT(*nmaps == 1); - mp = dp->i_mount; nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; - mappedbno = *mappedbnop; + /* * Caller doesn't have a mapping. -2 means don't complain * if we land in a hole. @@ -2004,112 +2044,152 @@ xfs_da_do_buf( /* * Optimize the one-block case. */ - if (nfsb == 1) - mapp = ↦ - else - mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); + if (nfsb != 1) + irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP); - nmap = nfsb; - error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp, - &nmap, xfs_bmapi_aflag(whichfork)); + nirecs = nfsb; + error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, + &nirecs, xfs_bmapi_aflag(whichfork)); if (error) - goto exit0; + goto out; } else { - map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); - map.br_startoff = (xfs_fileoff_t)bno; - map.br_blockcount = nfsb; - mapp = ↦ - nmap = 1; + irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); + irecs->br_startoff = (xfs_fileoff_t)bno; + irecs->br_blockcount = nfsb; + irecs->br_state = 0; + nirecs = 1; } - if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) { - error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); + + if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { + error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED); if (unlikely(error == EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { + int i; xfs_alert(mp, "%s: bno %lld dir: inode %lld", __func__, (long long)bno, (long long)dp->i_ino); - for (i = 0; i < nmap; i++) { + for (i = 0; i < *nmaps; i++) { xfs_alert(mp, "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", i, - (long long)mapp[i].br_startoff, - (long long)mapp[i].br_startblock, - (long long)mapp[i].br_blockcount, - mapp[i].br_state); + (long long)irecs[i].br_startoff, + (long long)irecs[i].br_startblock, + (long long)irecs[i].br_blockcount, + irecs[i].br_state); } } XFS_ERROR_REPORT("xfs_da_do_buf(1)", XFS_ERRLEVEL_LOW, mp); } - goto exit0; + goto out; } - if (caller != 3 && nmap > 1) { - bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP); - nbplist = 0; - } else - bplist = NULL; - /* - * Turn the mapping(s) into buffer(s). - */ - for (i = 0; i < nmap; i++) { - int nmapped; - - mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock); - if (i == 0) - *mappedbnop = mappedbno; - nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount); - switch (caller) { - case 0: - bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, - mappedbno, nmapped, 0); - error = bp ? bp->b_error : XFS_ERROR(EIO); - break; - case 1: - case 2: - bp = NULL; - error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp, - mappedbno, nmapped, 0, &bp); - break; - case 3: - xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); + error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); +out: + if (irecs != &irec) + kmem_free(irecs); + return error; +} + +/* + * Get a buffer for the dir/attr block. + */ +int +xfs_da_get_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, + int whichfork) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) error = 0; - bp = NULL; - break; - } - if (error) { - if (bp) - xfs_trans_brelse(trans, bp); - goto exit1; - } - if (!bp) - continue; - if (caller == 1) { - if (whichfork == XFS_ATTR_FORK) - xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); - else - xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - } - if (bplist) { - bplist[nbplist++] = bp; - } + goto out_free; } - /* - * Build a dabuf structure. - */ - if (bplist) { - rbp = xfs_da_buf_make(nbplist, bplist); - } else if (bp) - rbp = xfs_da_buf_make(1, &bp); + + bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, + mapp, nmap, 0); + error = bp ? bp->b_error : XFS_ERROR(EIO); + if (error) { + xfs_trans_brelse(trans, bp); + goto out_free; + } + + *bpp = xfs_da_buf_make(bp); + +out_free: + if (mapp != &map) + kmem_free(mapp); + + return error; +} + +/* + * Get a buffer for the dir/attr block, fill in the contents. + */ +int +xfs_da_read_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, + int whichfork) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + error = xfs_trans_read_buf_map(dp->i_mount, trans, + dp->i_mount->m_ddev_targp, + mapp, nmap, 0, &bp); + if (error) + goto out_free; + + if (whichfork == XFS_ATTR_FORK) + xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); else - rbp = NULL; + xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); + + *bpp = xfs_da_buf_make(bp); + /* - * For read_buf, check the magic number. + * This verification code will be moved to a CRC verification callback + * function so just leave it here unchanged until then. */ - if (caller == 1) { - xfs_dir2_data_hdr_t *hdr = rbp->data; - xfs_dir2_free_t *free = rbp->data; - xfs_da_blkinfo_t *info = rbp->data; + { + xfs_dir2_data_hdr_t *hdr = (*bpp)->data; + xfs_dir2_free_t *free = (*bpp)->data; + xfs_da_blkinfo_t *info = (*bpp)->data; uint magic, magic1; + struct xfs_mount *mp = dp->i_mount; magic = be16_to_cpu(info->magic); magic1 = be32_to_cpu(hdr->magic); @@ -2123,66 +2203,20 @@ xfs_da_do_buf( (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { - trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); + trace_xfs_da_btree_corrupt(bp, _RET_IP_); XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", XFS_ERRLEVEL_LOW, mp, info); error = XFS_ERROR(EFSCORRUPTED); - xfs_da_brelse(trans, rbp); - nbplist = 0; - goto exit1; + xfs_da_brelse(trans, *bpp); + goto out_free; } } - if (bplist) { - kmem_free(bplist); - } - if (mapp != &map) { - kmem_free(mapp); - } - if (bpp) - *bpp = rbp; - return 0; -exit1: - if (bplist) { - for (i = 0; i < nbplist; i++) - xfs_trans_brelse(trans, bplist[i]); - kmem_free(bplist); - } -exit0: + +out_free: if (mapp != &map) kmem_free(mapp); - if (bpp) - *bpp = NULL; - return error; -} - -/* - * Get a buffer for the dir/attr block. - */ -int -xfs_da_get_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, - int whichfork) -{ - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0); -} -/* - * Get a buffer for the dir/attr block, fill in the contents. - */ -int -xfs_da_read_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, - int whichfork) -{ - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1); + return error; } /* @@ -2190,18 +2224,38 @@ xfs_da_read_buf( */ xfs_daddr_t xfs_da_reada_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - int whichfork) + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + int whichfork) { - xfs_daddr_t rval; + xfs_daddr_t mappedbno = -1; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, -1, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } - rval = -1; - if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3)) + mappedbno = mapp[0].bm_bn; + xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap); + +out_free: + if (mapp != &map) + kmem_free(mapp); + + if (error) return -1; - else - return rval; + return mappedbno; } kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ @@ -2261,78 +2315,25 @@ xfs_da_state_free(xfs_da_state_t *state) */ /* ARGSUSED */ STATIC xfs_dabuf_t * -xfs_da_buf_make(int nbuf, xfs_buf_t **bps) +xfs_da_buf_make(xfs_buf_t *bp) { - xfs_buf_t *bp; xfs_dabuf_t *dabuf; - int i; - int off; - if (nbuf == 1) - dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); - else - dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); - dabuf->dirty = 0; - if (nbuf == 1) { - dabuf->nbuf = 1; - bp = bps[0]; - dabuf->bbcount = bp->b_length; - dabuf->data = bp->b_addr; - dabuf->bps[0] = bp; - } else { - dabuf->nbuf = nbuf; - for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { - dabuf->bps[i] = bp = bps[i]; - dabuf->bbcount += bp->b_length; - } - dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); - for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) { - bp = bps[i]; - memcpy((char *)dabuf->data + off, bp->b_addr, - BBTOB(bp->b_length)); - } - } + dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); + dabuf->bbcount = bp->b_length; + dabuf->data = bp->b_addr; + dabuf->bp = bp; return dabuf; } -/* - * Un-dirty a dabuf. - */ -STATIC void -xfs_da_buf_clean(xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - int i; - int off; - - if (dabuf->dirty) { - ASSERT(dabuf->nbuf > 1); - dabuf->dirty = 0; - for (i = off = 0; i < dabuf->nbuf; - i++, off += BBTOB(bp->b_length)) { - bp = dabuf->bps[i]; - memcpy(bp->b_addr, dabuf->data + off, - BBTOB(bp->b_length)); - } - } -} - /* * Release a dabuf. */ void xfs_da_buf_done(xfs_dabuf_t *dabuf) { - ASSERT(dabuf); - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if (dabuf->dirty) - xfs_da_buf_clean(dabuf); - if (dabuf->nbuf > 1) { - kmem_free(dabuf->data); - kmem_free(dabuf); - } else { - kmem_zone_free(xfs_dabuf_zone, dabuf); - } + ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); + kmem_zone_free(xfs_dabuf_zone, dabuf); } /* @@ -2341,41 +2342,9 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) void xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) { - xfs_buf_t *bp; - uint f; - int i; - uint l; - int off; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if (dabuf->nbuf == 1) { - ASSERT(dabuf->data == dabuf->bps[0]->b_addr); - xfs_trans_log_buf(tp, dabuf->bps[0], first, last); - return; - } - dabuf->dirty = 1; - ASSERT(first <= last); - for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) { - bp = dabuf->bps[i]; - f = off; - l = f + BBTOB(bp->b_length) - 1; - if (f < first) - f = first; - if (l > last) - l = last; - if (f <= l) - xfs_trans_log_buf(tp, bp, f - off, l - off); - /* - * B_DONE is set by xfs_trans_log buf. - * If we don't set it on a new buffer (get not read) - * then if we don't put anything in the buffer it won't - * be set, and at commit it it released into the cache, - * and then a read will fail. - */ - else if (!(XFS_BUF_ISDONE(bp))) - XFS_BUF_DONE(bp); - } - ASSERT(last < off); + ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); + ASSERT(dabuf->data == dabuf->bp->b_addr); + xfs_trans_log_buf(tp, dabuf->bp, first, last); } /* @@ -2386,24 +2355,9 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) void xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) { - xfs_buf_t *bp; - xfs_buf_t **bplist; - int i; - int nbuf; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); - memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - } + ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); + xfs_trans_brelse(tp, dabuf->bp); xfs_da_buf_done(dabuf); - for (i = 0; i < nbuf; i++) - xfs_trans_brelse(tp, bplist[i]); - if (bplist != &bp) - kmem_free(bplist); } /* @@ -2412,24 +2366,9 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) void xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) { - xfs_buf_t *bp; - xfs_buf_t **bplist; - int i; - int nbuf; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); - memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - } + ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); xfs_da_buf_done(dabuf); - for (i = 0; i < nbuf; i++) - xfs_trans_binval(tp, bplist[i]); - if (bplist != &bp) - kmem_free(bplist); + xfs_trans_binval(tp, dabuf->bp); } /* @@ -2438,7 +2377,6 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf) { - ASSERT(dabuf->nbuf); ASSERT(dabuf->data); - return XFS_BUF_ADDR(dabuf->bps[0]); + return XFS_BUF_ADDR(dabuf->bp); } diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index dbf7c074ae73..0b64c4a37afb 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -141,14 +141,10 @@ typedef struct xfs_da_args { * same place as the b_addr field for the buffer, else to kmem_alloced memory. */ typedef struct xfs_dabuf { - int nbuf; /* number of buffer pointers present */ - short dirty; /* data needs to be copied back */ short bbcount; /* how large is data in bbs */ void *data; /* pointer for buffers' data */ - struct xfs_buf *bps[1]; /* actually nbuf of these */ + struct xfs_buf *bp; /* actually nbuf of these */ } xfs_dabuf_t; -#define XFS_DA_BUF_SIZE(n) \ - (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) /* * Storage for holding state during Btree searches and split/join ops. -- cgit v1.2.3 From 1d9025e56143c0c4aebebdb62e46618d3d284218 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:14 +1000 Subject: xfs: remove struct xfs_dabuf and infrastructure The struct xfs_dabuf now only tracks a single xfs_buf and all the information it holds can be gained directly from the xfs_buf. Hence we can remove the struct dabuf and pass the xfs_buf around everywhere. Kill the struct dabuf and the associated infrastructure. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_attr.c | 78 ++++++----- fs/xfs/xfs_attr_leaf.c | 255 +++++++++++++++++++----------------- fs/xfs/xfs_attr_leaf.h | 21 ++- fs/xfs/xfs_da_btree.c | 337 +++++++++++++++++------------------------------- fs/xfs/xfs_da_btree.h | 32 +---- fs/xfs/xfs_dir2.c | 4 +- fs/xfs/xfs_dir2_block.c | 118 ++++++++--------- fs/xfs/xfs_dir2_data.c | 50 +++---- fs/xfs/xfs_dir2_leaf.c | 191 +++++++++++++-------------- fs/xfs/xfs_dir2_node.c | 236 ++++++++++++++------------------- fs/xfs/xfs_dir2_priv.h | 46 +++---- fs/xfs/xfs_dir2_sf.c | 4 +- fs/xfs/xfs_super.c | 9 +- 13 files changed, 602 insertions(+), 779 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index a17ff01b5adf..0ca1f0be62d2 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -893,7 +893,7 @@ STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args) { xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int retval, error, committed, forkoff; trace_xfs_attr_leaf_addname(args); @@ -915,11 +915,11 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ retval = xfs_attr_leaf_lookup_int(bp, args); if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(retval); } else if (retval == EEXIST) { if (args->flags & ATTR_CREATE) { /* pure create op */ - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(retval); } @@ -937,7 +937,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * if required. */ retval = xfs_attr_leaf_add(bp, args); - xfs_da_buf_done(bp); if (retval == ENOSPC) { /* * Promote the attribute list to the Btree format, then @@ -1065,8 +1064,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); - } else - xfs_da_buf_done(bp); + } /* * Commit the remove and start the next trans in series. @@ -1092,7 +1090,7 @@ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args) { xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error, committed, forkoff; trace_xfs_attr_leaf_removename(args); @@ -1111,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) ASSERT(bp != NULL); error = xfs_attr_leaf_lookup_int(bp, args); if (error == ENOATTR) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(error); } @@ -1141,8 +1139,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); - } else - xfs_da_buf_done(bp); + } return(0); } @@ -1155,7 +1152,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) { - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; args->blkno = 0; @@ -1167,11 +1164,11 @@ xfs_attr_leaf_get(xfs_da_args_t *args) error = xfs_attr_leaf_lookup_int(bp, args); if (error != EEXIST) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(error); } error = xfs_attr_leaf_getvalue(bp, args); - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { error = xfs_attr_rmtval_get(args); } @@ -1186,23 +1183,23 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) { xfs_attr_leafblock_t *leaf; int error; - xfs_dabuf_t *bp; + struct xfs_buf *bp; context->cursor->blkno = 0; error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK); if (error) return XFS_ERROR(error); ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return XFS_ERROR(EFSCORRUPTED); } error = xfs_attr_leaf_list_int(bp, context); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return XFS_ERROR(error); } @@ -1489,7 +1486,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) xfs_da_state_t *state; xfs_da_state_blk_t *blk; xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int retval, error, committed, forkoff; trace_xfs_attr_node_removename(args); @@ -1601,14 +1598,13 @@ xfs_attr_node_removename(xfs_da_args_t *args) */ ASSERT(state->path.active == 1); ASSERT(state->path.blk[0].bp); - xfs_da_buf_done(state->path.blk[0].bp); state->path.blk[0].bp = NULL; error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, XFS_ATTR_FORK); if (error) goto out; - ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) == + ASSERT((((xfs_attr_leafblock_t *)bp->b_addr)->hdr.info.magic) == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { @@ -1635,7 +1631,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) if (committed) xfs_trans_ijoin(args->trans, dp, 0); } else - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); } error = 0; @@ -1665,8 +1661,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = xfs_da_blkno(blk->bp); - xfs_da_buf_done(blk->bp); + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; @@ -1681,8 +1676,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = xfs_da_blkno(blk->bp); - xfs_da_buf_done(blk->bp); + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; @@ -1792,7 +1786,7 @@ xfs_attr_node_get(xfs_da_args_t *args) * If not in a transaction, we have to release all the buffers. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } @@ -1808,7 +1802,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) xfs_da_intnode_t *node; xfs_da_node_entry_t *btree; int error, i; - xfs_dabuf_t *bp; + struct xfs_buf *bp; cursor = context->cursor; cursor->initted = 1; @@ -1825,30 +1819,30 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if ((error != 0) && (error != EFSCORRUPTED)) return(error); if (bp) { - node = bp->data; + node = bp->b_addr; switch (be16_to_cpu(node->hdr.info.magic)) { case XFS_DA_NODE_MAGIC: trace_xfs_attr_list_wrong_blk(context); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); bp = NULL; break; case XFS_ATTR_LEAF_MAGIC: - leaf = bp->data; + leaf = bp->b_addr; if (cursor->hashval > be32_to_cpu(leaf->entries[ be16_to_cpu(leaf->hdr.count)-1].hashval)) { trace_xfs_attr_list_wrong_blk(context); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); bp = NULL; } else if (cursor->hashval <= be32_to_cpu(leaf->entries[0].hashval)) { trace_xfs_attr_list_wrong_blk(context); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); bp = NULL; } break; default: trace_xfs_attr_list_wrong_blk(context); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); bp = NULL; } } @@ -1873,7 +1867,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) context->dp->i_mount); return(XFS_ERROR(EFSCORRUPTED)); } - node = bp->data; + node = bp->b_addr; if (node->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) break; @@ -1883,7 +1877,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) XFS_ERRLEVEL_LOW, context->dp->i_mount, node); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return(XFS_ERROR(EFSCORRUPTED)); } btree = node->btree; @@ -1898,10 +1892,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) } } if (i == be16_to_cpu(node->hdr.count)) { - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return(0); } - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); } } ASSERT(bp != NULL); @@ -1912,24 +1906,24 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) * adding the information. */ for (;;) { - leaf = bp->data; + leaf = bp->b_addr; if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return(XFS_ERROR(EFSCORRUPTED)); } error = xfs_attr_leaf_list_int(bp, context); if (error) { - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return error; } if (context->seen_enough || leaf->hdr.info.forw == 0) break; cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) @@ -1941,7 +1935,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return(XFS_ERROR(EFSCORRUPTED)); } } - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return(0); } diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 7d89d800f517..d330111ca738 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -54,10 +54,10 @@ * Routines used for growing the Btree. */ STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, - xfs_dabuf_t **bpp); -STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args, - int freemap_index); -STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer); + struct xfs_buf **bpp); +STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, + xfs_da_args_t *args, int freemap_index); +STATIC void xfs_attr_leaf_compact(xfs_trans_t *tp, struct xfs_buf *leaf_buffer); STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_state_blk_t *blk2); @@ -71,9 +71,9 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state, * Routines used for shrinking the Btree. */ STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, - xfs_dabuf_t *bp, int level); + struct xfs_buf *bp, int level); STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, - xfs_dabuf_t *bp); + struct xfs_buf *bp); STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dablk_t blkno, int blkcnt); @@ -480,7 +480,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) char *tmpbuffer; int error, i, size; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_ifork_t *ifp; trace_xfs_attr_sf_to_leaf(args); @@ -550,8 +550,6 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) error = 0; out: - if(bp) - xfs_da_buf_done(bp); kmem_free(tmpbuffer); return(error); } @@ -737,14 +735,16 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) * a shortform attribute list. */ int -xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) +xfs_attr_shortform_allfit( + struct xfs_buf *bp, + struct xfs_inode *dp) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_local_t *name_loc; int bytes, i; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); entry = &leaf->entries[0]; @@ -774,7 +774,10 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) * Convert a leaf attribute list to shortform attribute list */ int -xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) +xfs_attr_leaf_to_shortform( + struct xfs_buf *bp, + xfs_da_args_t *args, + int forkoff) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; @@ -791,10 +794,10 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) ASSERT(tmpbuffer != NULL); ASSERT(bp != NULL); - memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); + memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); leaf = (xfs_attr_leafblock_t *)tmpbuffer; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); + memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); /* * Clean out the prior contents of the attribute list. @@ -855,7 +858,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_da_intnode_t *node; xfs_inode_t *dp; - xfs_dabuf_t *bp1, *bp2; + struct xfs_buf *bp1, *bp2; xfs_dablk_t blkno; int error; @@ -877,10 +880,9 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) if (error) goto out; ASSERT(bp2 != NULL); - memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount)); - xfs_da_buf_done(bp1); + memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); bp1 = NULL; - xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); /* * Set up the new root node. @@ -888,21 +890,17 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); if (error) goto out; - node = bp1->data; - leaf = bp2->data; + node = bp1->b_addr; + leaf = bp2->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); /* both on-disk, don't endian-flip twice */ node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; node->btree[0].before = cpu_to_be32(blkno); node->hdr.count = cpu_to_be16(1); - xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); error = 0; out: - if (bp1) - xfs_da_buf_done(bp1); - if (bp2) - xfs_da_buf_done(bp2); return(error); } @@ -916,12 +914,15 @@ out: * or a leaf in a node attribute list. */ STATIC int -xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) +xfs_attr_leaf_create( + xfs_da_args_t *args, + xfs_dablk_t blkno, + struct xfs_buf **bpp) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; trace_xfs_attr_leaf_create(args); @@ -933,7 +934,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) if (error) return(error); ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); hdr = &leaf->hdr; hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC); @@ -947,7 +948,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) - sizeof(xfs_attr_leaf_hdr_t)); - xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); *bpp = bp; return(0); @@ -1014,7 +1015,9 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * Add a name to the leaf attribute list structure. */ int -xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_add( + struct xfs_buf *bp, + struct xfs_da_args *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; @@ -1023,7 +1026,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) trace_xfs_attr_leaf_add(args); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(leaf->hdr.count))); @@ -1085,7 +1088,10 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) * Add a name to a leaf attribute list structure. */ STATIC int -xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) +xfs_attr_leaf_add_work( + struct xfs_buf *bp, + xfs_da_args_t *args, + int mapindex) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; @@ -1096,7 +1102,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) xfs_mount_t *mp; int tmp, i; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); @@ -1110,7 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) tmp = be16_to_cpu(hdr->count) - args->index; tmp *= sizeof(xfs_attr_leaf_entry_t); memmove((char *)(entry+1), (char *)entry, tmp); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); } be16_add_cpu(&hdr->count, 1); @@ -1142,7 +1148,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) args->index2++; } } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); ASSERT((args->index == 0) || (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); @@ -1174,7 +1180,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) args->rmtblkno = 1; args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), xfs_attr_leaf_entsize(leaf, args->index))); @@ -1198,7 +1204,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) } } be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); return(0); } @@ -1207,7 +1213,9 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) * Garbage collect a leaf attribute list block by copying it to a new buffer. */ STATIC void -xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) +xfs_attr_leaf_compact( + struct xfs_trans *trans, + struct xfs_buf *bp) { xfs_attr_leafblock_t *leaf_s, *leaf_d; xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; @@ -1217,14 +1225,14 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) mp = trans->t_mountp; tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); ASSERT(tmpbuffer != NULL); - memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp)); - memset(bp->data, 0, XFS_LBSIZE(mp)); + memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); + memset(bp->b_addr, 0, XFS_LBSIZE(mp)); /* * Copy basic information */ leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; - leaf_d = bp->data; + leaf_d = bp->b_addr; hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; hdr_d->info = hdr_s->info; /* struct copy */ @@ -1247,7 +1255,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) */ xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, be16_to_cpu(hdr_s->count), mp); - xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); kmem_free(tmpbuffer); } @@ -1279,8 +1287,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, */ ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); args = state->args; @@ -1298,8 +1306,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, tmp_blk = blk1; blk1 = blk2; blk2 = tmp_blk; - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; swap = 1; } hdr1 = &leaf1->hdr; @@ -1346,8 +1354,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count, leaf2, 0, count, state->mp); - xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); } else if (count > be16_to_cpu(hdr1->count)) { /* * I assert that since all callers pass in an empty @@ -1378,8 +1386,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_attr_leaf_moveents(leaf2, 0, leaf1, be16_to_cpu(hdr1->count), count, state->mp); - xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); } /* @@ -1448,8 +1456,8 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state, /* * Set up environment. */ - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; hdr1 = &leaf1->hdr; hdr2 = &leaf2->hdr; foundit = 0; @@ -1551,7 +1559,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) xfs_da_blkinfo_t *info; int count, bytes, forward, error, retval, i; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; /* * Check for the degenerate case of the block being over 50% full. @@ -1559,7 +1567,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) * to coalesce with a sibling. */ blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->data; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); @@ -1622,13 +1630,13 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) count = be16_to_cpu(leaf->hdr.count); bytes = state->blocksize - (state->blocksize>>2); bytes -= be16_to_cpu(leaf->hdr.usedbytes); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); count += be16_to_cpu(leaf->hdr.count); bytes -= be16_to_cpu(leaf->hdr.usedbytes); bytes -= count * sizeof(xfs_attr_leaf_entry_t); bytes -= sizeof(xfs_attr_leaf_hdr_t); - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) break; /* fits with at least 25% to spare */ } @@ -1666,7 +1674,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) * If two leaves are 37% full, when combined they will leave 25% free. */ int -xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_remove( + struct xfs_buf *bp, + xfs_da_args_t *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; @@ -1676,7 +1686,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) int tablesize, tmp, i; xfs_mount_t *mp; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; mp = args->trans->t_mountp; @@ -1769,7 +1779,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) */ memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize); be16_add_cpu(&hdr->usedbytes, -entsize); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), entsize)); @@ -1777,7 +1787,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) * sizeof(xfs_attr_leaf_entry_t); memmove((char *)entry, (char *)(entry+1), tmp); be16_add_cpu(&hdr->count, -1); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); entry = &leaf->entries[be16_to_cpu(hdr->count)]; memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t)); @@ -1807,7 +1817,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) } else { hdr->holes = 1; /* mark as needing compaction */ } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); /* @@ -1840,8 +1850,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, mp = state->mp; ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); - drop_leaf = drop_blk->bp->data; - save_leaf = save_blk->bp->data; + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); drop_hdr = &drop_leaf->hdr; @@ -1906,7 +1916,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, kmem_free(tmpbuffer); } - xfs_da_log_buf(state->args->trans, save_blk->bp, 0, + xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, state->blocksize - 1); /* @@ -1934,7 +1944,9 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, * Don't change the args->value unless we find the attribute. */ int -xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_lookup_int( + struct xfs_buf *bp, + xfs_da_args_t *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; @@ -1945,7 +1957,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) trace_xfs_attr_leaf_lookup(args); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -2041,7 +2053,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) * list structure. */ int -xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_getvalue( + struct xfs_buf *bp, + xfs_da_args_t *args) { int valuelen; xfs_attr_leafblock_t *leaf; @@ -2049,7 +2063,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_name_local_t *name_loc; xfs_attr_leaf_name_remote_t *name_rmt; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -2247,12 +2261,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, * Return 0 unless leaf2 should go before leaf1. */ int -xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) +xfs_attr_leaf_order( + struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp) { xfs_attr_leafblock_t *leaf1, *leaf2; - leaf1 = leaf1_bp->data; - leaf2 = leaf2_bp->data; + leaf1 = leaf1_bp->b_addr; + leaf2 = leaf2_bp->b_addr; ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) && (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC))); if ((be16_to_cpu(leaf1->hdr.count) > 0) && @@ -2272,11 +2288,13 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) * Pick up the last hashvalue from a leaf block. */ xfs_dahash_t -xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) +xfs_attr_leaf_lasthash( + struct xfs_buf *bp, + int *count) { xfs_attr_leafblock_t *leaf; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); @@ -2337,7 +2355,9 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local) * Copy out attribute list entries for attr_list(), for leaf attribute lists. */ int -xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) +xfs_attr_leaf_list_int( + struct xfs_buf *bp, + xfs_attr_list_context_t *context) { attrlist_cursor_kern_t *cursor; xfs_attr_leafblock_t *leaf; @@ -2345,7 +2365,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) int retval, i; ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; cursor = context->cursor; cursor->initted = 1; @@ -2463,7 +2483,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; #ifdef DEBUG xfs_attr_leaf_name_local_t *name_loc; @@ -2482,7 +2502,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) } ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); @@ -2505,7 +2525,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) #endif /* DEBUG */ entry->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); if (args->rmtblkno) { @@ -2513,10 +2533,9 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp); /* * Commit the flag value change and start the next trans in series. @@ -2533,7 +2552,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; trace_xfs_attr_leaf_setflag(args); @@ -2548,7 +2567,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) } ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); @@ -2556,16 +2575,15 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); entry->flags |= XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); if ((entry->flags & XFS_ATTR_LOCAL) == 0) { name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = 0; name_rmt->valuelen = 0; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp); /* * Commit the flag value change and start the next trans in series. @@ -2586,7 +2604,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf1, *leaf2; xfs_attr_leaf_entry_t *entry1, *entry2; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp1, *bp2; + struct xfs_buf *bp1, *bp2; int error; #ifdef DEBUG xfs_attr_leaf_name_local_t *name_loc; @@ -2620,13 +2638,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) bp2 = bp1; } - leaf1 = bp1->data; + leaf1 = bp1->b_addr; ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); ASSERT(args->index >= 0); entry1 = &leaf1->entries[ args->index ]; - leaf2 = bp2->data; + leaf2 = bp2->b_addr; ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); ASSERT(args->index2 >= 0); @@ -2660,30 +2678,27 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0); entry1->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp1, + xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); if (args->rmtblkno) { ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); - xfs_da_log_buf(args->trans, bp1, + xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); } entry2->flags |= XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp2, + xfs_trans_log_buf(args->trans, bp2, XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); name_rmt->valueblk = 0; name_rmt->valuelen = 0; - xfs_da_log_buf(args->trans, bp2, + xfs_trans_log_buf(args->trans, bp2, XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp1); - if (bp1 != bp2) - xfs_da_buf_done(bp2); /* * Commit the flag value change and start the next trans in series. @@ -2706,7 +2721,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) { xfs_da_blkinfo_t *info; xfs_daddr_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; /* @@ -2718,20 +2733,20 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) error = xfs_da_read_buf(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); if (error) return(error); - blkno = xfs_da_blkno(bp); + blkno = XFS_BUF_ADDR(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. * This is a depth-first traversal! */ - info = bp->data; + info = bp->b_addr; if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { error = xfs_attr_node_inactive(trans, dp, bp, 1); } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { error = xfs_attr_leaf_inactive(trans, dp, bp); } else { error = XFS_ERROR(EIO); - xfs_da_brelse(*trans, bp); + xfs_trans_brelse(*trans, bp); } if (error) return(error); @@ -2742,7 +2757,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK); if (error) return(error); - xfs_da_binval(*trans, bp); /* remove from cache */ + xfs_trans_binval(*trans, bp); /* remove from cache */ /* * Commit the invalidate and start the next transaction. */ @@ -2756,34 +2771,37 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * We're doing a depth-first traversal in order to invalidate everything. */ STATIC int -xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, - int level) +xfs_attr_node_inactive( + struct xfs_trans **trans, + struct xfs_inode *dp, + struct xfs_buf *bp, + int level) { xfs_da_blkinfo_t *info; xfs_da_intnode_t *node; xfs_dablk_t child_fsb; xfs_daddr_t parent_blkno, child_blkno; int error, count, i; - xfs_dabuf_t *child_bp; + struct xfs_buf *child_bp; /* * Since this code is recursive (gasp!) we must protect ourselves. */ if (level > XFS_DA_NODE_MAXDEPTH) { - xfs_da_brelse(*trans, bp); /* no locks for later trans */ + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ return(XFS_ERROR(EIO)); } - node = bp->data; + node = bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - parent_blkno = xfs_da_blkno(bp); /* save for re-read later */ + parent_blkno = XFS_BUF_ADDR(bp); /* save for re-read later */ count = be16_to_cpu(node->hdr.count); if (!count) { - xfs_da_brelse(*trans, bp); + xfs_trans_brelse(*trans, bp); return(0); } child_fsb = be32_to_cpu(node->btree[0].before); - xfs_da_brelse(*trans, bp); /* no locks for later trans */ + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ /* * If this is the node level just above the leaves, simply loop @@ -2803,12 +2821,12 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, return(error); if (child_bp) { /* save for re-read later */ - child_blkno = xfs_da_blkno(child_bp); + child_blkno = XFS_BUF_ADDR(child_bp); /* * Invalidate the subtree, however we have to. */ - info = child_bp->data; + info = child_bp->b_addr; if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { error = xfs_attr_node_inactive(trans, dp, child_bp, level+1); @@ -2817,7 +2835,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, child_bp); } else { error = XFS_ERROR(EIO); - xfs_da_brelse(*trans, child_bp); + xfs_trans_brelse(*trans, child_bp); } if (error) return(error); @@ -2830,7 +2848,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, &child_bp, XFS_ATTR_FORK); if (error) return(error); - xfs_da_binval(*trans, child_bp); + xfs_trans_binval(*trans, child_bp); } /* @@ -2843,7 +2861,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, if (error) return(error); child_fsb = be32_to_cpu(node->btree[i+1].before); - xfs_da_brelse(*trans, bp); + xfs_trans_brelse(*trans, bp); } /* * Atomically commit the whole invalidate stuff. @@ -2863,7 +2881,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * caught holding something that the logging code wants to flush to disk. */ STATIC int -xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) +xfs_attr_leaf_inactive( + struct xfs_trans **trans, + struct xfs_inode *dp, + struct xfs_buf *bp) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; @@ -2871,7 +2892,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) xfs_attr_inactive_list_t *list, *lp; int error, count, size, tmp, i; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); /* @@ -2892,7 +2913,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) * If there are no "remote" values, we're done. */ if (count == 0) { - xfs_da_brelse(*trans, bp); + xfs_trans_brelse(*trans, bp); return(0); } @@ -2919,7 +2940,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) } } } - xfs_da_brelse(*trans, bp); /* unlock for trans. in freextent() */ + xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */ /* * Invalidate each of the "remote" value extents. diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 9c7d22fdcf4d..dea17722945e 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -31,7 +31,6 @@ struct attrlist; struct attrlist_cursor_kern; struct xfs_attr_list_context; -struct xfs_dabuf; struct xfs_da_args; struct xfs_da_state; struct xfs_da_state_blk; @@ -215,7 +214,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_list(struct xfs_attr_list_context *context); -int xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp); +int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); @@ -223,7 +222,7 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); * Internal routines when attribute fork size == XFS_LBSIZE(mp). */ int xfs_attr_leaf_to_node(struct xfs_da_args *args); -int xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp, +int xfs_attr_leaf_to_shortform(struct xfs_buf *bp, struct xfs_da_args *args, int forkoff); int xfs_attr_leaf_clearflag(struct xfs_da_args *args); int xfs_attr_leaf_setflag(struct xfs_da_args *args); @@ -235,14 +234,14 @@ int xfs_attr_leaf_flipflags(xfs_da_args_t *args); int xfs_attr_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); -int xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf, +int xfs_attr_leaf_lookup_int(struct xfs_buf *leaf, struct xfs_da_args *args); -int xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args); -int xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer, +int xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); +int xfs_attr_leaf_add(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -int xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer, +int xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -int xfs_attr_leaf_list_int(struct xfs_dabuf *bp, +int xfs_attr_leaf_list_int(struct xfs_buf *bp, struct xfs_attr_list_context *context); /* @@ -257,9 +256,9 @@ int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); /* * Utility routines. */ -xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count); -int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); +xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count); +int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp); int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local); #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 76e5dbaa95ea..7bfb7dd334fc 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -83,9 +83,9 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, /* * Utility routines. */ -STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); -STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(xfs_buf_t *bp); +STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count); +STATIC int xfs_da_node_order(struct xfs_buf *node1_bp, + struct xfs_buf *node2_bp); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); @@ -100,10 +100,10 @@ STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); */ int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, - xfs_dabuf_t **bpp, int whichfork) + struct xfs_buf **bpp, int whichfork) { xfs_da_intnode_t *node; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; xfs_trans_t *tp; @@ -114,7 +114,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, if (error) return(error); ASSERT(bp != NULL); - node = bp->data; + node = bp->b_addr; node->hdr.info.forw = 0; node->hdr.info.back = 0; node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC); @@ -122,7 +122,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, node->hdr.count = 0; node->hdr.level = cpu_to_be16(level); - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); *bpp = bp; @@ -138,7 +138,7 @@ xfs_da_split(xfs_da_state_t *state) { xfs_da_state_blk_t *oldblk, *newblk, *addblk; xfs_da_intnode_t *node; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int max, action, error, i; trace_xfs_da_split(state->args); @@ -203,7 +203,6 @@ xfs_da_split(xfs_da_state_t *state) case XFS_DA_NODE_MAGIC: error = xfs_da_node_split(state, oldblk, newblk, addblk, max - i, &action); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; if (error) return(error); /* GROT: dir is inconsistent */ @@ -221,13 +220,6 @@ xfs_da_split(xfs_da_state_t *state) * Update the btree to show the new hashval for this child. */ xfs_da_fixhashpath(state, &state->path); - /* - * If we won't need this block again, it's getting dropped - * from the active path by the loop control, so we need - * to mark it done now. - */ - if (i > 0 || !addblk) - xfs_da_buf_done(oldblk->bp); } if (!addblk) return(0); @@ -239,8 +231,6 @@ xfs_da_split(xfs_da_state_t *state) oldblk = &state->path.blk[0]; error = xfs_da_root_split(state, oldblk, addblk); if (error) { - xfs_da_buf_done(oldblk->bp); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; return(error); /* GROT: dir is inconsistent */ } @@ -252,7 +242,7 @@ xfs_da_split(xfs_da_state_t *state) * and the original block 0 could be at any position in the list. */ - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { bp = addblk->bp; @@ -260,13 +250,13 @@ xfs_da_split(xfs_da_state_t *state) ASSERT(state->extravalid); bp = state->extrablk.bp; } - node = bp->data; + node = bp->b_addr; node->hdr.info.back = cpu_to_be32(oldblk->blkno); - xfs_da_log_buf(state->args->trans, bp, + xfs_trans_log_buf(state->args->trans, bp, XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info))); } - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { bp = addblk->bp; @@ -274,14 +264,12 @@ xfs_da_split(xfs_da_state_t *state) ASSERT(state->extravalid); bp = state->extrablk.bp; } - node = bp->data; + node = bp->b_addr; node->hdr.info.forw = cpu_to_be32(oldblk->blkno); - xfs_da_log_buf(state->args->trans, bp, + xfs_trans_log_buf(state->args->trans, bp, XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info))); } - xfs_da_buf_done(oldblk->bp); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; return(0); } @@ -298,7 +286,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_intnode_t *node, *oldroot; xfs_da_args_t *args; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error, size; xfs_inode_t *dp; xfs_trans_t *tp; @@ -323,8 +311,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, if (error) return(error); ASSERT(bp != NULL); - node = bp->data; - oldroot = blk1->bp->data; + node = bp->b_addr; + oldroot = blk1->bp->b_addr; if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); @@ -335,8 +323,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, (char *)leaf); } memcpy(node, oldroot, size); - xfs_da_log_buf(tp, bp, 0, size - 1); - xfs_da_buf_done(blk1->bp); + xfs_trans_log_buf(tp, bp, 0, size - 1); blk1->bp = bp; blk1->blkno = blkno; @@ -348,7 +335,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); if (error) return(error); - node = bp->data; + node = bp->b_addr; node->btree[0].hashval = cpu_to_be32(blk1->hashval); node->btree[0].before = cpu_to_be32(blk1->blkno); node->btree[1].hashval = cpu_to_be32(blk2->hashval); @@ -365,10 +352,9 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, #endif /* Header is already logged by xfs_da_node_create */ - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, XFS_DA_LOGRANGE(node, node->btree, sizeof(xfs_da_node_entry_t) * 2)); - xfs_da_buf_done(bp); return(0); } @@ -389,7 +375,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, trace_xfs_da_node_split(state->args); - node = oldblk->bp->data; + node = oldblk->bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); /* @@ -436,7 +422,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * * If we had double-split op below us, then add the extra block too. */ - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (oldblk->index <= be16_to_cpu(node->hdr.count)) { oldblk->index++; xfs_da_node_add(state, oldblk, addblk); @@ -477,8 +463,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, trace_xfs_da_node_rebalance(state->args); - node1 = blk1->bp->data; - node2 = blk2->bp->data; + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; /* * Figure out how many entries need to move, and in which direction. * Swap the nodes around if that makes it simpler. @@ -532,7 +518,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; memcpy(btree_d, btree_s, tmp); be16_add_cpu(&node1->hdr.count, count); - xfs_da_log_buf(tp, blk1->bp, + xfs_trans_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, btree_d, tmp)); /* @@ -549,9 +535,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, /* * Log header of node 1 and all current bits of node 2. */ - xfs_da_log_buf(tp, blk1->bp, + xfs_trans_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); - xfs_da_log_buf(tp, blk2->bp, + xfs_trans_log_buf(tp, blk2->bp, XFS_DA_LOGRANGE(node2, &node2->hdr, sizeof(node2->hdr) + sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count))); @@ -560,8 +546,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, * Record the last hashval from each block for upward propagation. * (note: don't use the swapped node pointers) */ - node1 = blk1->bp->data; - node2 = blk2->bp->data; + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval); blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval); @@ -587,7 +573,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, trace_xfs_da_node_add(state->args); - node = oldblk->bp->data; + node = oldblk->bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); @@ -606,10 +592,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, } btree->hashval = cpu_to_be32(newblk->hashval); btree->before = cpu_to_be32(newblk->blkno); - xfs_da_log_buf(state->args->trans, oldblk->bp, + xfs_trans_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); be16_add_cpu(&node->hdr.count, 1); - xfs_da_log_buf(state->args->trans, oldblk->bp, + xfs_trans_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); /* @@ -735,7 +721,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) xfs_da_intnode_t *oldroot; xfs_da_args_t *args; xfs_dablk_t child; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; trace_xfs_da_root_join(state->args); @@ -743,7 +729,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) args = state->args; ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); - oldroot = root_blk->bp->data; + oldroot = root_blk->bp->b_addr; ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT(!oldroot->hdr.info.forw); ASSERT(!oldroot->hdr.info.back); @@ -765,11 +751,11 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) if (error) return(error); ASSERT(bp != NULL); - xfs_da_blkinfo_onlychild_validate(bp->data, + xfs_da_blkinfo_onlychild_validate(bp->b_addr, be16_to_cpu(oldroot->hdr.level)); - memcpy(root_blk->bp->data, bp->data, state->blocksize); - xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); + memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); + xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); error = xfs_da_shrink_inode(args, child, bp); return(error); } @@ -791,7 +777,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) xfs_da_blkinfo_t *info; int count, forward, error, retval, i; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; /* * Check for the degenerate case of the block being over 50% full. @@ -799,7 +785,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) * to coalesce with a sibling. */ blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->data; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); node = (xfs_da_intnode_t *)info; count = be16_to_cpu(node->hdr.count); @@ -859,10 +845,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) count = state->node_ents; count -= state->node_ents >> 2; count -= be16_to_cpu(node->hdr.count); - node = bp->data; + node = bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); count -= be16_to_cpu(node->hdr.count); - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); if (count >= 0) break; /* fits with at least 25% to spare */ } @@ -934,14 +920,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) break; } for (blk--, level--; level >= 0; blk--, level--) { - node = blk->bp->data; + node = blk->bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); btree = &node->btree[ blk->index ]; if (be32_to_cpu(btree->hashval) == lasthash) break; blk->hashval = lasthash; btree->hashval = cpu_to_be32(lasthash); - xfs_da_log_buf(state->args->trans, blk->bp, + xfs_trans_log_buf(state->args->trans, blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); @@ -960,7 +946,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) trace_xfs_da_node_remove(state->args); - node = drop_blk->bp->data; + node = drop_blk->bp->b_addr; ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); ASSERT(drop_blk->index >= 0); @@ -972,15 +958,15 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(btree, btree + 1, tmp); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, tmp)); btree = &node->btree[be16_to_cpu(node->hdr.count)-1]; } memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); be16_add_cpu(&node->hdr.count, -1); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); /* @@ -1005,8 +991,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, trace_xfs_da_node_unbalance(state->args); - drop_node = drop_blk->bp->data; - save_node = save_blk->bp->data; + drop_node = drop_blk->bp->b_addr; + save_node = save_blk->bp->b_addr; ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); tp = state->args->trans; @@ -1023,13 +1009,13 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); memmove(btree, &save_node->btree[0], tmp); btree = &save_node->btree[0]; - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, btree, (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * sizeof(xfs_da_node_entry_t))); } else { btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, btree, be16_to_cpu(drop_node->hdr.count) * sizeof(xfs_da_node_entry_t))); @@ -1042,7 +1028,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, memcpy(btree, &drop_node->btree[0], tmp); be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, sizeof(save_node->hdr))); @@ -1100,7 +1086,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) state->path.active--; return(error); } - curr = blk->bp->data; + curr = blk->bp->b_addr; blk->magic = be16_to_cpu(curr->magic); ASSERT(blk->magic == XFS_DA_NODE_MAGIC || blk->magic == XFS_DIR2_LEAFN_MAGIC || @@ -1110,7 +1096,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) * Search an intermediate node for a match. */ if (blk->magic == XFS_DA_NODE_MAGIC) { - node = blk->bp->data; + node = blk->bp->b_addr; max = be16_to_cpu(node->hdr.count); blk->hashval = be32_to_cpu(node->btree[max-1].hashval); @@ -1216,15 +1202,15 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; xfs_da_args_t *args; int before=0, error; - xfs_dabuf_t *bp; + struct xfs_buf *bp; /* * Set up environment. */ args = state->args; ASSERT(args != NULL); - old_info = old_blk->bp->data; - new_info = new_blk->bp->data; + old_info = old_blk->bp->b_addr; + new_info = new_blk->bp->b_addr; ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || old_blk->magic == XFS_DIR2_LEAFN_MAGIC || old_blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1261,12 +1247,11 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic)); ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); tmp_info->forw = cpu_to_be32(new_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - xfs_da_buf_done(bp); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); } old_info->back = cpu_to_be32(new_blk->blkno); } else { @@ -1283,18 +1268,17 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); tmp_info->back = cpu_to_be32(new_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - xfs_da_buf_done(bp); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); } old_info->forw = cpu_to_be32(new_blk->blkno); } - xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); - xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); return(0); } @@ -1302,12 +1286,14 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, * Compare two intermediate nodes for "order". */ STATIC int -xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) +xfs_da_node_order( + struct xfs_buf *node1_bp, + struct xfs_buf *node2_bp) { xfs_da_intnode_t *node1, *node2; - node1 = node1_bp->data; - node2 = node2_bp->data; + node1 = node1_bp->b_addr; + node2 = node2_bp->b_addr; ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) && node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && @@ -1324,11 +1310,13 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) * Pick up the last hashvalue from an intermediate node. */ STATIC uint -xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) +xfs_da_node_lasthash( + struct xfs_buf *bp, + int *count) { xfs_da_intnode_t *node; - node = bp->data; + node = bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (count) *count = be16_to_cpu(node->hdr.count); @@ -1346,7 +1334,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, { xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; xfs_da_args_t *args; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; /* @@ -1354,8 +1342,8 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, */ args = state->args; ASSERT(args != NULL); - save_info = save_blk->bp->data; - drop_info = drop_blk->bp->data; + save_info = save_blk->bp->b_addr; + drop_info = drop_blk->bp->b_addr; ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || save_blk->magic == XFS_DIR2_LEAFN_MAGIC || save_blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1380,13 +1368,12 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); tmp_info->forw = cpu_to_be32(save_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info) - 1); - xfs_da_buf_done(bp); } } else { trace_xfs_da_unlink_forward(args); @@ -1398,17 +1385,16 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); tmp_info->back = cpu_to_be32(save_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info) - 1); - xfs_da_buf_done(bp); } } - xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); + xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); return(0); } @@ -1443,7 +1429,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, level = (path->active-1) - 1; /* skip bottom layer in path */ for (blk = &path->blk[level]; level >= 0; blk--, level--) { ASSERT(blk->bp != NULL); - node = blk->bp->data; + node = blk->bp->b_addr; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { blk->index++; @@ -1471,7 +1457,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, * (if it's dirty, trans won't actually let go) */ if (release) - xfs_da_brelse(args->trans, blk->bp); + xfs_trans_brelse(args->trans, blk->bp); /* * Read the next child block. @@ -1482,7 +1468,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, if (error) return(error); ASSERT(blk->bp != NULL); - info = blk->bp->data; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); @@ -1702,11 +1688,13 @@ xfs_da_grow_inode( * a bmap btree split to do that. */ STATIC int -xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, - xfs_dabuf_t **dead_bufp) +xfs_da_swap_lastblock( + xfs_da_args_t *args, + xfs_dablk_t *dead_blknop, + struct xfs_buf **dead_bufp) { xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; - xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf; + struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf; xfs_fileoff_t lastoff; xfs_inode_t *ip; xfs_trans_t *tp; @@ -1744,9 +1732,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, /* * Copy the last block into the dead buffer and log it. */ - memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize); - xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); - dead_info = dead_buf->data; + memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize); + xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); + dead_info = dead_buf->b_addr; /* * Get values from the moved block. */ @@ -1767,7 +1755,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if ((sib_blkno = be32_to_cpu(dead_info->back))) { if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) goto done; - sib_info = sib_buf->data; + sib_info = sib_buf->b_addr; if (unlikely( be32_to_cpu(sib_info->forw) != last_blkno || sib_info->magic != dead_info->magic)) { @@ -1777,10 +1765,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, goto done; } sib_info->forw = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, sib_buf, + xfs_trans_log_buf(tp, sib_buf, XFS_DA_LOGRANGE(sib_info, &sib_info->forw, sizeof(sib_info->forw))); - xfs_da_buf_done(sib_buf); sib_buf = NULL; } /* @@ -1789,7 +1776,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if ((sib_blkno = be32_to_cpu(dead_info->forw))) { if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) goto done; - sib_info = sib_buf->data; + sib_info = sib_buf->b_addr; if (unlikely( be32_to_cpu(sib_info->back) != last_blkno || sib_info->magic != dead_info->magic)) { @@ -1799,10 +1786,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, goto done; } sib_info->back = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, sib_buf, + xfs_trans_log_buf(tp, sib_buf, XFS_DA_LOGRANGE(sib_info, &sib_info->back, sizeof(sib_info->back))); - xfs_da_buf_done(sib_buf); sib_buf = NULL; } par_blkno = mp->m_dirleafblk; @@ -1813,7 +1799,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, for (;;) { if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) goto done; - par_node = par_buf->data; + par_node = par_buf->b_addr; if (unlikely(par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC) || (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { @@ -1837,7 +1823,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, par_blkno = be32_to_cpu(par_node->btree[entno].before); if (level == dead_level + 1) break; - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); par_buf = NULL; } /* @@ -1853,7 +1839,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if (entno < be16_to_cpu(par_node->hdr.count)) break; par_blkno = be32_to_cpu(par_node->hdr.info.forw); - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); par_buf = NULL; if (unlikely(par_blkno == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", @@ -1863,7 +1849,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, } if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) goto done; - par_node = par_buf->data; + par_node = par_buf->b_addr; if (unlikely( be16_to_cpu(par_node->hdr.level) != level || par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) { @@ -1878,20 +1864,18 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, * Update the parent entry pointing to the moved block. */ par_node->btree[entno].before = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, par_buf, + xfs_trans_log_buf(tp, par_buf, XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, sizeof(par_node->btree[entno].before))); - xfs_da_buf_done(par_buf); - xfs_da_buf_done(dead_buf); *dead_blknop = last_blkno; *dead_bufp = last_buf; return 0; done: if (par_buf) - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); if (sib_buf) - xfs_da_brelse(tp, sib_buf); - xfs_da_brelse(tp, last_buf); + xfs_trans_brelse(tp, sib_buf); + xfs_trans_brelse(tp, last_buf); return error; } @@ -1899,8 +1883,10 @@ done: * Remove a btree block from a directory or attribute. */ int -xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, - xfs_dabuf_t *dead_buf) +xfs_da_shrink_inode( + xfs_da_args_t *args, + xfs_dablk_t dead_blkno, + struct xfs_buf *dead_buf) { xfs_inode_t *dp; int done, error, w, count; @@ -1935,7 +1921,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, break; } } - xfs_da_binval(tp, dead_buf); + xfs_trans_binval(tp, dead_buf); return error; } @@ -2099,7 +2085,7 @@ xfs_da_get_buf( struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, + struct xfs_buf **bpp, int whichfork) { struct xfs_buf *bp; @@ -2128,7 +2114,7 @@ xfs_da_get_buf( goto out_free; } - *bpp = xfs_da_buf_make(bp); + *bpp = bp; out_free: if (mapp != &map) @@ -2146,7 +2132,7 @@ xfs_da_read_buf( struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, + struct xfs_buf **bpp, int whichfork) { struct xfs_buf *bp; @@ -2178,16 +2164,14 @@ xfs_da_read_buf( else xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - *bpp = xfs_da_buf_make(bp); - /* * This verification code will be moved to a CRC verification callback * function so just leave it here unchanged until then. */ { - xfs_dir2_data_hdr_t *hdr = (*bpp)->data; - xfs_dir2_free_t *free = (*bpp)->data; - xfs_da_blkinfo_t *info = (*bpp)->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_free_t *free = bp->b_addr; + xfs_da_blkinfo_t *info = bp->b_addr; uint magic, magic1; struct xfs_mount *mp = dp->i_mount; @@ -2207,11 +2191,11 @@ xfs_da_read_buf( XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", XFS_ERRLEVEL_LOW, mp, info); error = XFS_ERROR(EFSCORRUPTED); - xfs_da_brelse(trans, *bpp); + xfs_trans_brelse(trans, bp); goto out_free; } } - + *bpp = bp; out_free: if (mapp != &map) kmem_free(mapp); @@ -2259,7 +2243,6 @@ out_free: } kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ -kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ /* * Allocate a dir-state structure. @@ -2279,13 +2262,8 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state) { int i; - for (i = 0; i < state->altpath.active; i++) { - if (state->altpath.blk[i].bp) { - if (state->altpath.blk[i].bp != state->path.blk[i].bp) - xfs_da_buf_done(state->altpath.blk[i].bp); - state->altpath.blk[i].bp = NULL; - } - } + for (i = 0; i < state->altpath.active; i++) + state->altpath.blk[i].bp = NULL; state->altpath.active = 0; } @@ -2295,88 +2273,9 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state) void xfs_da_state_free(xfs_da_state_t *state) { - int i; - xfs_da_state_kill_altpath(state); - for (i = 0; i < state->path.active; i++) { - if (state->path.blk[i].bp) - xfs_da_buf_done(state->path.blk[i].bp); - } - if (state->extravalid && state->extrablk.bp) - xfs_da_buf_done(state->extrablk.bp); #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ kmem_zone_free(xfs_da_state_zone, state); } - -/* - * Create a dabuf. - */ -/* ARGSUSED */ -STATIC xfs_dabuf_t * -xfs_da_buf_make(xfs_buf_t *bp) -{ - xfs_dabuf_t *dabuf; - - dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); - dabuf->bbcount = bp->b_length; - dabuf->data = bp->b_addr; - dabuf->bp = bp; - return dabuf; -} - -/* - * Release a dabuf. - */ -void -xfs_da_buf_done(xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); - kmem_zone_free(xfs_dabuf_zone, dabuf); -} - -/* - * Log transaction from a dabuf. - */ -void -xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) -{ - ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); - ASSERT(dabuf->data == dabuf->bp->b_addr); - xfs_trans_log_buf(tp, dabuf->bp, first, last); -} - -/* - * Release dabuf from a transaction. - * Have to free up the dabuf before the buffers are released, - * since the synchronization on the dabuf is really the lock on the buffer. - */ -void -xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); - xfs_trans_brelse(tp, dabuf->bp); - xfs_da_buf_done(dabuf); -} - -/* - * Invalidate dabuf from a transaction. - */ -void -xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf->data && dabuf->bbcount && dabuf->bp); - xfs_da_buf_done(dabuf); - xfs_trans_binval(tp, dabuf->bp); -} - -/* - * Get the first daddr from a dabuf. - */ -xfs_daddr_t -xfs_da_blkno(xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf->data); - return XFS_BUF_ADDR(dabuf->bp); -} diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 0b64c4a37afb..9f37aa03eb38 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -132,20 +132,6 @@ typedef struct xfs_da_args { { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" } -/* - * Structure to describe buffer(s) for a block. - * This is needed in the directory version 2 format case, when - * multiple non-contiguous fsblocks might be needed to cover one - * logical directory block. - * If the buffer count is 1 then the data pointer points to the - * same place as the b_addr field for the buffer, else to kmem_alloced memory. - */ -typedef struct xfs_dabuf { - short bbcount; /* how large is data in bbs */ - void *data; /* pointer for buffers' data */ - struct xfs_buf *bp; /* actually nbuf of these */ -} xfs_dabuf_t; - /* * Storage for holding state during Btree searches and split/join ops. * @@ -154,7 +140,7 @@ typedef struct xfs_dabuf { * which is slightly more than enough. */ typedef struct xfs_da_state_blk { - xfs_dabuf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ xfs_dablk_t blkno; /* filesystem blkno of buffer */ xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */ int index; /* relevant index into block */ @@ -207,7 +193,7 @@ struct xfs_nameops { * Routines used for growing the Btree. */ int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, - xfs_dabuf_t **bpp, int whichfork); + struct xfs_buf **bpp, int whichfork); int xfs_da_split(xfs_da_state_t *state); /* @@ -237,14 +223,14 @@ int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bp, int whichfork); + struct xfs_buf **bp, int whichfork); int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, int whichfork); + struct xfs_buf **bpp, int whichfork); xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, int whichfork); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, - xfs_dabuf_t *dead_buf); + struct xfs_buf *dead_buf); uint xfs_da_hashname(const __uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, @@ -254,15 +240,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); -void xfs_da_buf_done(xfs_dabuf_t *dabuf); -void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first, - uint last); -void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf); -void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf); -xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); - extern struct kmem_zone *xfs_da_state_zone; -extern struct kmem_zone *xfs_dabuf_zone; extern const struct xfs_nameops xfs_default_nameops; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 67a250c36d41..b26a50f9921d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -592,7 +592,7 @@ int xfs_dir2_shrink_inode( xfs_da_args_t *args, xfs_dir2_db_t db, - xfs_dabuf_t *bp) + struct xfs_buf *bp) { xfs_fileoff_t bno; /* directory file offset */ xfs_dablk_t da; /* directory file offset */ @@ -634,7 +634,7 @@ xfs_dir2_shrink_inode( /* * Invalidate the buffer from the transaction. */ - xfs_da_binval(tp, bp); + xfs_trans_binval(tp, bp); /* * If it's not a data block, we're done. */ diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 586732f2d80d..e93ca8f054f4 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -37,10 +37,10 @@ /* * Local function prototypes. */ -static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, xfs_dabuf_t *bp, int first, - int last); -static void xfs_dir2_block_log_tail(xfs_trans_t *tp, xfs_dabuf_t *bp); -static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp, +static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp, + int first, int last); +static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp); +static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp, int *entno); static int xfs_dir2_block_sort(const void *a, const void *b); @@ -66,7 +66,7 @@ xfs_dir2_block_addname( xfs_dir2_data_free_t *bf; /* bestfree table in block */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* buffer for block */ + struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ int compact; /* need to compact leaf ents */ xfs_dir2_data_entry_t *dep; /* block data entry */ @@ -102,14 +102,14 @@ xfs_dir2_block_addname( return error; } ASSERT(bp != NULL); - hdr = bp->data; + hdr = bp->b_addr; /* * Check the magic number, corrupted if wrong. */ if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", XFS_ERRLEVEL_LOW, mp, hdr); - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } len = xfs_dir2_data_entsize(args->namelen); @@ -212,7 +212,7 @@ xfs_dir2_block_addname( * If this isn't a real add, we're done with the buffer. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); /* * If we don't have space for the new entry & leaf ... */ @@ -228,7 +228,6 @@ xfs_dir2_block_addname( * Then add the new entry in that format. */ error = xfs_dir2_block_to_leaf(args, bp); - xfs_da_buf_done(bp); if (error) return error; return xfs_dir2_leaf_addname(args); @@ -422,7 +421,6 @@ xfs_dir2_block_addname( xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_log_entry(tp, bp, dep); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } @@ -437,7 +435,7 @@ xfs_dir2_block_getdents( filldir_t filldir) { xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dabuf_t *bp; /* buffer for block */ + struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_dir2_data_unused_t *dup; /* block unused entry */ @@ -469,7 +467,7 @@ xfs_dir2_block_getdents( * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(mp, *offset); - hdr = bp->data; + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. @@ -514,7 +512,7 @@ xfs_dir2_block_getdents( cook & 0x7fffffff, be64_to_cpu(dep->inumber), DT_UNKNOWN)) { *offset = cook & 0x7fffffff; - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return 0; } } @@ -525,7 +523,7 @@ xfs_dir2_block_getdents( */ *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & 0x7fffffff; - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return 0; } @@ -535,17 +533,17 @@ xfs_dir2_block_getdents( static void xfs_dir2_block_log_leaf( xfs_trans_t *tp, /* transaction structure */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_buf *bp, /* block buffer */ int first, /* index of first logged leaf */ int last) /* index of last logged leaf */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; xfs_dir2_leaf_entry_t *blp; xfs_dir2_block_tail_t *btp; btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); blp = xfs_dir2_block_leaf_p(btp); - xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), + xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); } @@ -555,13 +553,13 @@ xfs_dir2_block_log_leaf( static void xfs_dir2_block_log_tail( xfs_trans_t *tp, /* transaction structure */ - xfs_dabuf_t *bp) /* block buffer */ + struct xfs_buf *bp) /* block buffer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; xfs_dir2_block_tail_t *btp; btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); - xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), + xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), (uint)((char *)(btp + 1) - (char *)hdr - 1)); } @@ -575,7 +573,7 @@ xfs_dir2_block_lookup( { xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -593,7 +591,7 @@ xfs_dir2_block_lookup( return error; dp = args->dp; mp = dp->i_mount; - hdr = bp->data; + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); @@ -607,7 +605,7 @@ xfs_dir2_block_lookup( */ args->inumber = be64_to_cpu(dep->inumber); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return XFS_ERROR(error); } @@ -617,13 +615,13 @@ xfs_dir2_block_lookup( static int /* error */ xfs_dir2_block_lookup_int( xfs_da_args_t *args, /* dir lookup arguments */ - xfs_dabuf_t **bpp, /* returned block buffer */ + struct xfs_buf **bpp, /* returned block buffer */ int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -647,7 +645,7 @@ xfs_dir2_block_lookup_int( return error; } ASSERT(bp != NULL); - hdr = bp->data; + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); @@ -666,7 +664,7 @@ xfs_dir2_block_lookup_int( high = mid - 1; if (low > high) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return XFS_ERROR(ENOENT); } } @@ -714,7 +712,7 @@ xfs_dir2_block_lookup_int( /* * No match, release the buffer and return ENOENT. */ - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return XFS_ERROR(ENOENT); } @@ -728,7 +726,7 @@ xfs_dir2_block_removename( { xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -753,7 +751,7 @@ xfs_dir2_block_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - hdr = bp->data; + hdr = bp->b_addr; btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -790,10 +788,9 @@ xfs_dir2_block_removename( * See if the size as a shortform is good enough. */ size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { - xfs_da_buf_done(bp); + if (size > XFS_IFORK_DSIZE(dp)) return 0; - } + /* * If it works, do the conversion. */ @@ -810,7 +807,7 @@ xfs_dir2_block_replace( { xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -829,7 +826,7 @@ xfs_dir2_block_replace( } dp = args->dp; mp = dp->i_mount; - hdr = bp->data; + hdr = bp->b_addr; btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -844,7 +841,6 @@ xfs_dir2_block_replace( dep->inumber = cpu_to_be64(args->inumber); xfs_dir2_data_log_entry(args->trans, bp, dep); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } @@ -871,8 +867,8 @@ xfs_dir2_block_sort( int /* error */ xfs_dir2_leaf_to_block( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp, /* leaf buffer */ - xfs_dabuf_t *dbp) /* data buffer */ + struct xfs_buf *lbp, /* leaf buffer */ + struct xfs_buf *dbp) /* data buffer */ { __be16 *bestsp; /* leaf bests table */ xfs_dir2_data_hdr_t *hdr; /* block header */ @@ -898,7 +894,7 @@ xfs_dir2_leaf_to_block( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = lbp->data; + leaf = lbp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* @@ -914,11 +910,9 @@ xfs_dir2_leaf_to_block( if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) - goto out; - } else { - error = 0; - goto out; - } + return error; + } else + return 0; } /* * Read the data block if we don't already have it, give up if it fails. @@ -926,9 +920,9 @@ xfs_dir2_leaf_to_block( if (dbp == NULL && (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp, XFS_DATA_FORK))) { - goto out; + return error; } - hdr = dbp->data; + hdr = dbp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); /* * Size of the "leaf" area in the block. @@ -944,10 +938,9 @@ xfs_dir2_leaf_to_block( * If it's not free or is too short we can't do it. */ if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG || - be16_to_cpu(dup->length) < size) { - error = 0; - goto out; - } + be16_to_cpu(dup->length) < size) + return 0; + /* * Start converting it to block form. */ @@ -989,25 +982,17 @@ xfs_dir2_leaf_to_block( * Pitch the old leaf block. */ error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); - lbp = NULL; - if (error) { - goto out; - } + if (error) + return error; + /* * Now see if the resulting block can be shrunken to shortform. */ size = xfs_dir2_block_sfsize(dp, hdr, &sfh); - if (size > XFS_IFORK_DSIZE(dp)) { - error = 0; - goto out; - } + if (size > XFS_IFORK_DSIZE(dp)) + return 0; + return xfs_dir2_block_to_sf(args, dbp, size, &sfh); -out: - if (lbp) - xfs_da_buf_done(lbp); - if (dbp) - xfs_da_buf_done(dbp); - return error; } /* @@ -1020,7 +1005,7 @@ xfs_dir2_sf_to_block( xfs_dir2_db_t blkno; /* dir-relative block # (0) */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ @@ -1088,7 +1073,7 @@ xfs_dir2_sf_to_block( kmem_free(sfp); return error; } - hdr = bp->data; + hdr = bp->b_addr; hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. @@ -1217,6 +1202,5 @@ xfs_dir2_sf_to_block( xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 2046988e9eb2..44ffd4d6bc91 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -42,8 +42,8 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); */ void xfs_dir2_data_check( - xfs_inode_t *dp, /* incore inode pointer */ - xfs_dabuf_t *bp) /* data block's buffer */ + struct xfs_inode *dp, /* incore inode pointer */ + struct xfs_buf *bp) /* data block's buffer */ { xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ xfs_dir2_data_free_t *bf; /* bestfree table */ @@ -65,7 +65,7 @@ xfs_dir2_data_check( struct xfs_name name; mp = dp->i_mount; - hdr = bp->data; + hdr = bp->b_addr; bf = hdr->bestfree; p = (char *)(hdr + 1); @@ -389,9 +389,9 @@ int /* error */ xfs_dir2_data_init( xfs_da_args_t *args, /* directory operation args */ xfs_dir2_db_t blkno, /* logical dir block number */ - xfs_dabuf_t **bpp) /* output block buffer */ + struct xfs_buf **bpp) /* output block buffer */ { - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ @@ -417,7 +417,7 @@ xfs_dir2_data_init( /* * Initialize the header. */ - hdr = bp->data; + hdr = bp->b_addr; hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { @@ -449,16 +449,16 @@ xfs_dir2_data_init( */ void xfs_dir2_data_log_entry( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), + xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - (char *)hdr - 1)); } @@ -468,15 +468,15 @@ xfs_dir2_data_log_entry( */ void xfs_dir2_data_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1); + xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1); } /* @@ -484,11 +484,11 @@ xfs_dir2_data_log_header( */ void xfs_dir2_data_log_unused( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { - xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); @@ -496,13 +496,13 @@ xfs_dir2_data_log_unused( /* * Log the first part of the unused entry. */ - xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), + xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); @@ -514,8 +514,8 @@ xfs_dir2_data_log_unused( */ void xfs_dir2_data_make_free( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ int *needlogp, /* out: log header */ @@ -531,7 +531,7 @@ xfs_dir2_data_make_free( xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ mp = tp->t_mountp; - hdr = bp->data; + hdr = bp->b_addr; /* * Figure out where the end of the data area is. @@ -696,8 +696,8 @@ xfs_dir2_data_make_free( */ void xfs_dir2_data_use_free( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* data block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ xfs_dir2_data_aoff_t len, /* length to use */ @@ -713,7 +713,7 @@ xfs_dir2_data_use_free( xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - hdr = bp->data; + hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 397ffbcbab1d..69accf6cbc46 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -38,15 +38,15 @@ * Local function declarations. */ #ifdef DEBUG -static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp); +static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir2_leaf_check(dp, bp) #endif -static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp, - int *indexp, xfs_dabuf_t **dbpp); -static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, +static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, + int *indexp, struct xfs_buf **dbpp); +static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, int first, int last); -static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); +static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); /* @@ -55,7 +55,7 @@ static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); int /* error */ xfs_dir2_block_to_leaf( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *dbp) /* input block's buffer */ + struct xfs_buf *dbp) /* input block's buffer */ { __be16 *bestsp; /* leaf's bestsp entries */ xfs_dablk_t blkno; /* leaf block's bno */ @@ -64,7 +64,7 @@ xfs_dir2_block_to_leaf( xfs_dir2_block_tail_t *btp; /* block's tail */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ - xfs_dabuf_t *lbp; /* leaf block's buffer */ + struct xfs_buf *lbp; /* leaf block's buffer */ xfs_dir2_db_t ldb; /* leaf block's bno */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ @@ -95,8 +95,8 @@ xfs_dir2_block_to_leaf( return error; } ASSERT(lbp != NULL); - leaf = lbp->data; - hdr = dbp->data; + leaf = lbp->b_addr; + hdr = dbp->b_addr; xfs_dir2_data_check(dp, dbp); btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); @@ -143,7 +143,6 @@ xfs_dir2_block_to_leaf( xfs_dir2_leaf_check(dp, lbp); xfs_dir2_data_check(dp, dbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, 0); - xfs_da_buf_done(lbp); return 0; } @@ -282,7 +281,7 @@ xfs_dir2_leaf_addname( __be16 *bestsp; /* freespace table in leaf */ int compact; /* need to compact leaves */ xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* data unused entry */ @@ -291,7 +290,7 @@ xfs_dir2_leaf_addname( int highstale; /* index of next stale leaf */ int i; /* temporary, index */ int index; /* leaf table position */ - xfs_dabuf_t *lbp; /* leaf's buffer */ + struct xfs_buf *lbp; /* leaf's buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ int length; /* length of new entry */ xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ @@ -328,7 +327,7 @@ xfs_dir2_leaf_addname( * But if there are dup hash values the index is of the first of those. */ index = xfs_dir2_leaf_search_hash(args, lbp); - leaf = lbp->data; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); length = xfs_dir2_data_entsize(args->namelen); @@ -402,14 +401,13 @@ xfs_dir2_leaf_addname( */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOSPC); } /* * Convert to node form. */ error = xfs_dir2_leaf_to_node(args, lbp); - xfs_da_buf_done(lbp); if (error) return error; /* @@ -427,7 +425,7 @@ xfs_dir2_leaf_addname( * a new data block. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; } /* @@ -435,7 +433,7 @@ xfs_dir2_leaf_addname( * changed anything. */ if (args->total == 0 && use_block == -1) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOSPC); } /* @@ -466,14 +464,14 @@ xfs_dir2_leaf_addname( */ if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &use_block))) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } /* * Initialize the block. */ if ((error = xfs_dir2_data_init(args, use_block, &dbp))) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } /* @@ -493,7 +491,7 @@ xfs_dir2_leaf_addname( */ else xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); - hdr = dbp->data; + hdr = dbp->b_addr; bestsp[use_block] = hdr->bestfree[0].length; grown = 1; } @@ -505,10 +503,10 @@ xfs_dir2_leaf_addname( if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block), -1, &dbp, XFS_DATA_FORK))) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } - hdr = dbp->data; + hdr = dbp->b_addr; grown = 0; } xfs_dir2_data_check(dp, dbp); @@ -570,9 +568,7 @@ xfs_dir2_leaf_addname( xfs_dir2_leaf_log_header(tp, lbp); xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); xfs_dir2_leaf_check(dp, lbp); - xfs_da_buf_done(lbp); xfs_dir2_data_check(dp, dbp); - xfs_da_buf_done(dbp); return 0; } @@ -583,8 +579,8 @@ xfs_dir2_leaf_addname( */ STATIC void xfs_dir2_leaf_check( - xfs_inode_t *dp, /* incore directory inode */ - xfs_dabuf_t *bp) /* leaf's buffer */ + struct xfs_inode *dp, /* incore directory inode */ + struct xfs_buf *bp) /* leaf's buffer */ { int i; /* leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -592,7 +588,7 @@ xfs_dir2_leaf_check( xfs_mount_t *mp; /* filesystem mount point */ int stale; /* count of stale leaves */ - leaf = bp->data; + leaf = bp->b_addr; mp = dp->i_mount; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); /* @@ -628,14 +624,14 @@ xfs_dir2_leaf_check( void xfs_dir2_leaf_compact( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_buf *bp) /* leaf buffer */ { int from; /* source leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ int loglow; /* first leaf entry to log */ int to; /* target leaf index */ - leaf = bp->data; + leaf = bp->b_addr; if (!leaf->hdr.stale) { return; } @@ -677,7 +673,7 @@ xfs_dir2_leaf_compact( */ void xfs_dir2_leaf_compact_x1( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int *indexp, /* insertion index */ int *lowstalep, /* out: stale entry before us */ int *highstalep, /* out: stale entry after us */ @@ -693,7 +689,7 @@ xfs_dir2_leaf_compact_x1( int newindex=0; /* new insertion index */ int to; /* destination copy index */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); index = *indexp; @@ -775,7 +771,7 @@ xfs_dir2_leaf_getdents( xfs_off_t *offset, filldir_t filldir) { - xfs_dabuf_t *bp; /* data block buffer */ + struct xfs_buf *bp; /* data block buffer */ int byteoff; /* offset in current block */ xfs_dir2_db_t curdb; /* db for current block */ xfs_dir2_off_t curoff; /* current overall offset */ @@ -839,13 +835,13 @@ xfs_dir2_leaf_getdents( * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ - if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) { + if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) { /* * If we have a buffer, we need to release it and * take it out of the mapping. */ if (bp) { - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); bp = NULL; map_blocks -= mp->m_dirblkfsbs; /* @@ -1035,7 +1031,7 @@ xfs_dir2_leaf_getdents( else if (curoff > newoff) ASSERT(xfs_dir2_byte_to_db(mp, curoff) == curdb); - hdr = bp->data; + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); /* * Find our position in the block. @@ -1119,7 +1115,7 @@ xfs_dir2_leaf_getdents( *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; kmem_free(map); if (bp) - xfs_da_brelse(NULL, bp); + xfs_trans_brelse(NULL, bp); return error; } @@ -1130,10 +1126,10 @@ int xfs_dir2_leaf_init( xfs_da_args_t *args, /* operation arguments */ xfs_dir2_db_t bno, /* directory block number */ - xfs_dabuf_t **bpp, /* out: leaf buffer */ + struct xfs_buf **bpp, /* out: leaf buffer */ int magic) /* magic number for block */ { - xfs_dabuf_t *bp; /* leaf buffer */ + struct xfs_buf *bp; /* leaf buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -1156,7 +1152,7 @@ xfs_dir2_leaf_init( return error; } ASSERT(bp != NULL); - leaf = bp->data; + leaf = bp->b_addr; /* * Initialize the header. */ @@ -1186,7 +1182,7 @@ xfs_dir2_leaf_init( static void xfs_dir2_leaf_log_bests( xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ { @@ -1195,12 +1191,12 @@ xfs_dir2_leaf_log_bests( xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; - xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); } @@ -1210,7 +1206,7 @@ xfs_dir2_leaf_log_bests( void xfs_dir2_leaf_log_ents( xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ { @@ -1218,12 +1214,12 @@ xfs_dir2_leaf_log_ents( xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); firstlep = &leaf->ents[first]; lastlep = &leaf->ents[last]; - xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); } @@ -1232,15 +1228,15 @@ xfs_dir2_leaf_log_ents( */ void xfs_dir2_leaf_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); - xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), (uint)(sizeof(leaf->hdr) - 1)); } @@ -1249,18 +1245,18 @@ xfs_dir2_leaf_log_header( */ STATIC void xfs_dir2_leaf_log_tail( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ xfs_mount_t *mp; /* filesystem mount point */ mp = tp->t_mountp; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); - xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); } @@ -1273,12 +1269,12 @@ int xfs_dir2_leaf_lookup( xfs_da_args_t *args) /* operation arguments */ { - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* found entry index */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ @@ -1294,7 +1290,7 @@ xfs_dir2_leaf_lookup( tp = args->trans; dp = args->dp; xfs_dir2_leaf_check(dp, lbp); - leaf = lbp->data; + leaf = lbp->b_addr; /* * Get to the leaf entry and contained data entry address. */ @@ -1303,15 +1299,15 @@ xfs_dir2_leaf_lookup( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->data + + ((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); /* * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_da_brelse(tp, dbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(error); } @@ -1324,17 +1320,17 @@ xfs_dir2_leaf_lookup( static int /* error */ xfs_dir2_leaf_lookup_int( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t **lbpp, /* out: leaf buffer */ + struct xfs_buf **lbpp, /* out: leaf buffer */ int *indexp, /* out: index in leaf block */ - xfs_dabuf_t **dbpp) /* out: data buffer */ + struct xfs_buf **dbpp) /* out: data buffer */ { xfs_dir2_db_t curdb = -1; /* current data block number */ - xfs_dabuf_t *dbp = NULL; /* data buffer */ + struct xfs_buf *dbp = NULL; /* data buffer */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* index in leaf block */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ @@ -1354,7 +1350,7 @@ xfs_dir2_leaf_lookup_int( if (error) return error; *lbpp = lbp; - leaf = lbp->data; + leaf = lbp->b_addr; xfs_dir2_leaf_check(dp, lbp); /* * Look for the first leaf entry with our hash value. @@ -1382,12 +1378,12 @@ xfs_dir2_leaf_lookup_int( */ if (newdb != curdb) { if (dbp) - xfs_da_brelse(tp, dbp); + xfs_trans_brelse(tp, dbp); error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, newdb), -1, &dbp, XFS_DATA_FORK); if (error) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } xfs_dir2_data_check(dp, dbp); @@ -1396,7 +1392,7 @@ xfs_dir2_leaf_lookup_int( /* * Point to the data entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)dbp->data + + dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * Compare name and if it's an exact match, return the index @@ -1424,12 +1420,12 @@ xfs_dir2_leaf_lookup_int( if (args->cmpresult == XFS_CMP_CASE) { ASSERT(cidb != -1); if (cidb != curdb) { - xfs_da_brelse(tp, dbp); + xfs_trans_brelse(tp, dbp); error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, cidb), -1, &dbp, XFS_DATA_FORK); if (error) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } } @@ -1441,8 +1437,8 @@ xfs_dir2_leaf_lookup_int( */ ASSERT(cidb == -1); if (dbp) - xfs_da_brelse(tp, dbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOENT); } @@ -1456,13 +1452,13 @@ xfs_dir2_leaf_removename( __be16 *bestsp; /* leaf block best freespace */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry structure */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_db_t i; /* temporary data block # */ int index; /* index into leaf entries */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ @@ -1483,8 +1479,8 @@ xfs_dir2_leaf_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = lbp->data; - hdr = dbp->data; + leaf = lbp->b_addr; + hdr = dbp->b_addr; xfs_dir2_data_check(dp, dbp); /* * Point to the leaf entry, use that to point to the data entry. @@ -1541,12 +1537,9 @@ xfs_dir2_leaf_removename( * Just go on, returning success, leaving the * empty block in place. */ - if (error == ENOSPC && args->total == 0) { - xfs_da_buf_done(dbp); + if (error == ENOSPC && args->total == 0) error = 0; - } xfs_dir2_leaf_check(dp, lbp); - xfs_da_buf_done(lbp); return error; } dbp = NULL; @@ -1577,10 +1570,9 @@ xfs_dir2_leaf_removename( /* * If the data block was not the first one, drop it. */ - else if (db != mp->m_dirdatablk && dbp != NULL) { - xfs_da_buf_done(dbp); + else if (db != mp->m_dirdatablk) dbp = NULL; - } + xfs_dir2_leaf_check(dp, lbp); /* * See if we can convert to block form. @@ -1595,12 +1587,12 @@ int /* error */ xfs_dir2_leaf_replace( xfs_da_args_t *args) /* operation arguments */ { - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* index of leaf entry */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ @@ -1614,7 +1606,7 @@ xfs_dir2_leaf_replace( return error; } dp = args->dp; - leaf = lbp->data; + leaf = lbp->b_addr; /* * Point to the leaf entry, get data address from it. */ @@ -1623,7 +1615,7 @@ xfs_dir2_leaf_replace( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->data + + ((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* @@ -1632,9 +1624,8 @@ xfs_dir2_leaf_replace( dep->inumber = cpu_to_be64(args->inumber); tp = args->trans; xfs_dir2_data_log_entry(tp, dbp, dep); - xfs_da_buf_done(dbp); xfs_dir2_leaf_check(dp, lbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return 0; } @@ -1646,7 +1637,7 @@ xfs_dir2_leaf_replace( int /* index value */ xfs_dir2_leaf_search_hash( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp) /* leaf buffer */ + struct xfs_buf *lbp) /* leaf buffer */ { xfs_dahash_t hash=0; /* hash from this entry */ xfs_dahash_t hashwant; /* hash value looking for */ @@ -1656,7 +1647,7 @@ xfs_dir2_leaf_search_hash( xfs_dir2_leaf_entry_t *lep; /* leaf entry */ int mid=0; /* current leaf index */ - leaf = lbp->data; + leaf = lbp->b_addr; #ifndef __KERNEL__ if (!leaf->hdr.count) return 0; @@ -1699,11 +1690,11 @@ xfs_dir2_leaf_search_hash( int /* error */ xfs_dir2_leaf_trim_data( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp, /* leaf buffer */ + struct xfs_buf *lbp, /* leaf buffer */ xfs_dir2_db_t db) /* data block number */ { __be16 *bestsp; /* leaf bests table */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -1722,12 +1713,12 @@ xfs_dir2_leaf_trim_data( return error; } - leaf = lbp->data; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); #ifdef DEBUG { - struct xfs_dir2_data_hdr *hdr = dbp->data; + struct xfs_dir2_data_hdr *hdr = dbp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); ASSERT(be16_to_cpu(hdr->bestfree[0].length) == @@ -1741,7 +1732,7 @@ xfs_dir2_leaf_trim_data( */ if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { ASSERT(error != ENOSPC); - xfs_da_brelse(tp, dbp); + xfs_trans_brelse(tp, dbp); return error; } /* @@ -1781,10 +1772,10 @@ xfs_dir2_node_to_leaf( xfs_da_args_t *args; /* operation arguments */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ - xfs_dabuf_t *fbp; /* buffer for freespace block */ + struct xfs_buf *fbp; /* buffer for freespace block */ xfs_fileoff_t fo; /* freespace file offset */ xfs_dir2_free_t *free; /* freespace structure */ - xfs_dabuf_t *lbp; /* buffer for leaf block */ + struct xfs_buf *lbp; /* buffer for leaf block */ xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ @@ -1838,7 +1829,7 @@ xfs_dir2_node_to_leaf( if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) return 0; lbp = state->path.blk[0].bp; - leaf = lbp->data; + leaf = lbp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Read the freespace block. @@ -1847,7 +1838,7 @@ xfs_dir2_node_to_leaf( XFS_DATA_FORK))) { return error; } - free = fbp->data; + free = fbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(!free->hdr.firstdb); @@ -1857,7 +1848,7 @@ xfs_dir2_node_to_leaf( */ if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > mp->m_dirblksize) { - xfs_da_brelse(tp, fbp); + xfs_trans_brelse(tp, fbp); return 0; } diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index b0f26780449d..6c7052406605 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -36,20 +36,20 @@ /* * Function declarations. */ -static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp); -static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index); +static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, + int index); #ifdef DEBUG -static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp); +static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir2_leafn_check(dp, bp) #endif -static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s, - int start_s, xfs_dabuf_t *bp_d, int start_d, - int count); +static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s, + int start_s, struct xfs_buf *bp_d, + int start_d, int count); static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_state_blk_t *blk2); -static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp, +static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, int index, xfs_da_state_blk_t *dblk, int *rval); static int xfs_dir2_node_addname_int(xfs_da_args_t *args, @@ -60,16 +60,16 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, */ STATIC void xfs_dir2_free_log_bests( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* freespace buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ { xfs_dir2_free_t *free; /* freespace structure */ - free = bp->data; + free = bp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, (uint)((char *)&free->bests[first] - (char *)free), (uint)((char *)&free->bests[last] - (char *)free + sizeof(free->bests[0]) - 1)); @@ -80,14 +80,14 @@ xfs_dir2_free_log_bests( */ static void xfs_dir2_free_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* freespace buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_free_t *free; /* freespace structure */ - free = bp->data; + free = bp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); - xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), + xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); } @@ -99,11 +99,11 @@ xfs_dir2_free_log_header( int /* error */ xfs_dir2_leaf_to_node( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp) /* leaf buffer */ + struct xfs_buf *lbp) /* leaf buffer */ { xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ - xfs_dabuf_t *fbp; /* freespace buffer */ + struct xfs_buf *fbp; /* freespace buffer */ xfs_dir2_db_t fdb; /* freespace block number */ xfs_dir2_free_t *free; /* freespace structure */ __be16 *from; /* pointer to freespace entry */ @@ -136,8 +136,8 @@ xfs_dir2_leaf_to_node( return error; } ASSERT(fbp != NULL); - free = fbp->data; - leaf = lbp->data; + free = fbp->b_addr; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * Initialize the freespace block header. @@ -164,7 +164,6 @@ xfs_dir2_leaf_to_node( xfs_dir2_leaf_log_header(tp, lbp); xfs_dir2_free_log_header(tp, fbp); xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1); - xfs_da_buf_done(fbp); xfs_dir2_leafn_check(dp, lbp); return 0; } @@ -175,7 +174,7 @@ xfs_dir2_leaf_to_node( */ static int /* error */ xfs_dir2_leafn_add( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int index) /* insertion pt for new entry */ { @@ -195,7 +194,7 @@ xfs_dir2_leafn_add( dp = args->dp; mp = dp->i_mount; tp = args->trans; - leaf = bp->data; + leaf = bp->b_addr; /* * Quick check just to make sure we are not going to index @@ -261,15 +260,15 @@ xfs_dir2_leafn_add( */ void xfs_dir2_leafn_check( - xfs_inode_t *dp, /* incore directory inode */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_inode *dp, + struct xfs_buf *bp) { int i; /* leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ int stale; /* count of stale leaves */ - leaf = bp->data; + leaf = bp->b_addr; mp = dp->i_mount; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); @@ -291,12 +290,12 @@ xfs_dir2_leafn_check( */ xfs_dahash_t /* hash value */ xfs_dir2_leafn_lasthash( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ { xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); @@ -311,12 +310,12 @@ xfs_dir2_leafn_lasthash( */ STATIC int xfs_dir2_leafn_lookup_for_addname( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ { - xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ xfs_dir2_db_t curdb = -1; /* current data block number */ xfs_dir2_db_t curfdb = -1; /* current free block number */ xfs_inode_t *dp; /* incore directory inode */ @@ -335,7 +334,7 @@ xfs_dir2_leafn_lookup_for_addname( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); @@ -352,7 +351,7 @@ xfs_dir2_leafn_lookup_for_addname( /* If so, it's a free block buffer, get the block number. */ curbp = state->extrablk.bp; curfdb = state->extrablk.blkno; - free = curbp->data; + free = curbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } length = xfs_dir2_data_entsize(args->namelen); @@ -394,7 +393,7 @@ xfs_dir2_leafn_lookup_for_addname( * If we had one before, drop it. */ if (curbp) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); /* * Read the free block. */ @@ -403,7 +402,7 @@ xfs_dir2_leafn_lookup_for_addname( -1, &curbp, XFS_DATA_FORK); if (error) return error; - free = curbp->data; + free = curbp->b_addr; ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT((be32_to_cpu(free->hdr.firstdb) % @@ -424,7 +423,7 @@ xfs_dir2_leafn_lookup_for_addname( XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); return XFS_ERROR(EFSCORRUPTED); } curfdb = newfdb; @@ -459,12 +458,12 @@ out: */ STATIC int xfs_dir2_leafn_lookup_for_entry( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ { - xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ xfs_dir2_db_t curdb = -1; /* current data block number */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ @@ -480,7 +479,7 @@ xfs_dir2_leafn_lookup_for_entry( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); @@ -525,7 +524,7 @@ xfs_dir2_leafn_lookup_for_entry( */ if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || curdb != state->extrablk.blkno)) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); /* * If needing the block that is saved with a CI match, * use it otherwise read in the new data block. @@ -547,7 +546,7 @@ xfs_dir2_leafn_lookup_for_entry( /* * Point to the data entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)curbp->data + + dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * Compare the entry and if it's an exact match, return @@ -559,7 +558,7 @@ xfs_dir2_leafn_lookup_for_entry( /* If there is a CI match block, drop it */ if (args->cmpresult != XFS_CMP_DIFFERENT && curdb != state->extrablk.blkno) - xfs_da_brelse(tp, state->extrablk.bp); + xfs_trans_brelse(tp, state->extrablk.bp); args->cmpresult = cmp; args->inumber = be64_to_cpu(dep->inumber); *indexp = index; @@ -567,7 +566,7 @@ xfs_dir2_leafn_lookup_for_entry( state->extrablk.bp = curbp; state->extrablk.blkno = curdb; state->extrablk.index = (int)((char *)dep - - (char *)curbp->data); + (char *)curbp->b_addr); state->extrablk.magic = XFS_DIR2_DATA_MAGIC; if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); @@ -586,7 +585,7 @@ xfs_dir2_leafn_lookup_for_entry( } else { /* If the curbp is not the CI match block, drop it */ if (state->extrablk.bp != curbp) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); } } else { state->extravalid = 0; @@ -602,7 +601,7 @@ xfs_dir2_leafn_lookup_for_entry( */ int xfs_dir2_leafn_lookup_int( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ @@ -620,9 +619,9 @@ xfs_dir2_leafn_lookup_int( static void xfs_dir2_leafn_moveents( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp_s, /* source leaf buffer */ + struct xfs_buf *bp_s, /* source leaf buffer */ int start_s, /* source leaf index */ - xfs_dabuf_t *bp_d, /* destination leaf buffer */ + struct xfs_buf *bp_d, /* destination leaf buffer */ int start_d, /* destination leaf index */ int count) /* count of leaves to copy */ { @@ -640,8 +639,8 @@ xfs_dir2_leafn_moveents( return; } tp = args->trans; - leaf_s = bp_s->data; - leaf_d = bp_d->data; + leaf_s = bp_s->b_addr; + leaf_d = bp_d->b_addr; /* * If the destination index is not the end of the current * destination leaf entries, open up a hole in the destination @@ -702,14 +701,14 @@ xfs_dir2_leafn_moveents( */ int /* sort order */ xfs_dir2_leafn_order( - xfs_dabuf_t *leaf1_bp, /* leaf1 buffer */ - xfs_dabuf_t *leaf2_bp) /* leaf2 buffer */ + struct xfs_buf *leaf1_bp, /* leaf1 buffer */ + struct xfs_buf *leaf2_bp) /* leaf2 buffer */ { xfs_dir2_leaf_t *leaf1; /* leaf1 structure */ xfs_dir2_leaf_t *leaf2; /* leaf2 structure */ - leaf1 = leaf1_bp->data; - leaf2 = leaf2_bp->data; + leaf1 = leaf1_bp->b_addr; + leaf2 = leaf2_bp->b_addr; ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (be16_to_cpu(leaf1->hdr.count) > 0 && @@ -757,8 +756,8 @@ xfs_dir2_leafn_rebalance( blk1 = blk2; blk2 = tmp; } - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count); #ifdef DEBUG oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale); @@ -834,14 +833,14 @@ xfs_dir2_leafn_rebalance( static int /* error */ xfs_dir2_leafn_remove( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int index, /* leaf entry index */ xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -858,7 +857,7 @@ xfs_dir2_leafn_remove( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Point to the entry we're removing. @@ -884,7 +883,7 @@ xfs_dir2_leafn_remove( * in the data block in case it changes. */ dbp = dblk->bp; - hdr = dbp->data; + hdr = dbp->b_addr; dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); longest = be16_to_cpu(hdr->bestfree[0].length); needlog = needscan = 0; @@ -905,7 +904,7 @@ xfs_dir2_leafn_remove( */ if (longest < be16_to_cpu(hdr->bestfree[0].length)) { int error; /* error return value */ - xfs_dabuf_t *fbp; /* freeblock buffer */ + struct xfs_buf *fbp; /* freeblock buffer */ xfs_dir2_db_t fdb; /* freeblock block number */ int findex; /* index in freeblock entries */ xfs_dir2_free_t *free; /* freeblock structure */ @@ -920,7 +919,7 @@ xfs_dir2_leafn_remove( -1, &fbp, XFS_DATA_FORK))) { return error; } - free = fbp->data; + free = fbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(be32_to_cpu(free->hdr.firstdb) == xfs_dir2_free_max_bests(mp) * @@ -948,9 +947,7 @@ xfs_dir2_leafn_remove( * In this case just drop the buffer and some one else * will eventually get rid of the empty block. */ - else if (error == ENOSPC && args->total == 0) - xfs_da_buf_done(dbp); - else + else if (!(error == ENOSPC && args->total == 0)) return error; } /* @@ -1018,11 +1015,6 @@ xfs_dir2_leafn_remove( */ if (logfree) xfs_dir2_free_log_bests(tp, fbp, findex, findex); - /* - * Drop the buffer if we still have it. - */ - if (fbp) - xfs_da_buf_done(fbp); } xfs_dir2_leafn_check(dp, bp); /* @@ -1114,7 +1106,7 @@ xfs_dir2_leafn_toosmall( { xfs_da_state_blk_t *blk; /* leaf block */ xfs_dablk_t blkno; /* leaf block number */ - xfs_dabuf_t *bp; /* leaf buffer */ + struct xfs_buf *bp; /* leaf buffer */ int bytes; /* bytes in use */ int count; /* leaf live entry count */ int error; /* error return value */ @@ -1130,7 +1122,7 @@ xfs_dir2_leafn_toosmall( * to coalesce with a sibling. */ blk = &state->path.blk[state->path.active - 1]; - info = blk->bp->data; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); @@ -1189,7 +1181,7 @@ xfs_dir2_leafn_toosmall( leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = state->blocksize - (state->blocksize >> 2); - leaf = bp->data; + leaf = bp->b_addr; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes -= count * (uint)sizeof(leaf->ents[0]); @@ -1198,7 +1190,7 @@ xfs_dir2_leafn_toosmall( */ if (bytes >= 0) break; - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); } /* * Didn't like either block, give up. @@ -1207,11 +1199,7 @@ xfs_dir2_leafn_toosmall( *action = 0; return 0; } - /* - * Done with the sibling leaf block here, drop the dabuf - * so path_shift can get it. - */ - xfs_da_buf_done(bp); + /* * Make altpath point to the block we want to keep (the lower * numbered block) and path point to the block we want to drop. @@ -1247,8 +1235,8 @@ xfs_dir2_leafn_unbalance( args = state->args; ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); - drop_leaf = drop_blk->bp->data; - save_leaf = save_blk->bp->data; + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* @@ -1356,13 +1344,13 @@ xfs_dir2_node_addname_int( { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t dbno; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ int error; /* error return value */ xfs_dir2_db_t fbno; /* freespace block number */ - xfs_dabuf_t *fbp; /* freespace buffer */ + struct xfs_buf *fbp; /* freespace buffer */ int findex; /* freespace entry index */ xfs_dir2_free_t *free=NULL; /* freespace block structure */ xfs_dir2_db_t ifbno; /* initial freespace block no */ @@ -1390,7 +1378,7 @@ xfs_dir2_node_addname_int( * Remember initial freespace block number. */ ifbno = fblk->blkno; - free = fbp->data; + free = fbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = fblk->index; /* @@ -1474,7 +1462,7 @@ xfs_dir2_node_addname_int( if (unlikely(fbp == NULL)) { continue; } - free = fbp->data; + free = fbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = 0; } @@ -1492,7 +1480,7 @@ xfs_dir2_node_addname_int( /* * Drop the block. */ - xfs_da_brelse(tp, fbp); + xfs_trans_brelse(tp, fbp); fbp = NULL; if (fblk && fblk->bp) fblk->bp = NULL; @@ -1507,36 +1495,23 @@ xfs_dir2_node_addname_int( /* * Not allowed to allocate, return failure. */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || - args->total == 0) { - /* - * Drop the freespace buffer unless it came from our - * caller. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) return XFS_ERROR(ENOSPC); - } + /* * Allocate and initialize the new data block. */ if (unlikely((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &dbno)) || - (error = xfs_dir2_data_init(args, dbno, &dbp)))) { - /* - * Drop the freespace buffer unless it came from our - * caller. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + (error = xfs_dir2_data_init(args, dbno, &dbp)))) return error; - } + /* * If (somehow) we have a freespace block, get rid of it. */ if (fbp) - xfs_da_brelse(tp, fbp); + xfs_trans_brelse(tp, fbp); if (fblk && fblk->bp) fblk->bp = NULL; @@ -1547,10 +1522,9 @@ xfs_dir2_node_addname_int( fbno = xfs_dir2_db_to_fdb(mp, dbno); if (unlikely(error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno), -2, &fbp, - XFS_DATA_FORK))) { - xfs_da_buf_done(dbp); + XFS_DATA_FORK))) return error; - } + /* * If there wasn't a freespace block, the read will * return a NULL fbp. Allocate and initialize a new one. @@ -1598,7 +1572,7 @@ xfs_dir2_node_addname_int( * Initialize the new block to be empty, and remember * its first slot as our empty slot. */ - free = fbp->data; + free = fbp->b_addr; free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); free->hdr.firstdb = cpu_to_be32( (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * @@ -1606,7 +1580,7 @@ xfs_dir2_node_addname_int( free->hdr.nvalid = 0; free->hdr.nused = 0; } else { - free = fbp->data; + free = fbp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } @@ -1639,7 +1613,7 @@ xfs_dir2_node_addname_int( * We haven't allocated the data entry yet so this will * change again. */ - hdr = dbp->data; + hdr = dbp->b_addr; free->bests[findex] = hdr->bestfree[0].length; logfree = 1; } @@ -1650,22 +1624,17 @@ xfs_dir2_node_addname_int( /* * If just checking, we succeeded. */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; - } + /* * Read the data block in. */ - if (unlikely( - error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno), - -1, &dbp, XFS_DATA_FORK))) { - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno), + -1, &dbp, XFS_DATA_FORK); + if (error) return error; - } - hdr = dbp->data; + hdr = dbp->b_addr; logfree = 0; } ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); @@ -1713,17 +1682,11 @@ xfs_dir2_node_addname_int( */ if (logfree) xfs_dir2_free_log_bests(tp, fbp, findex, findex); - /* - * If the caller didn't hand us the freespace block, drop it. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); /* * Return the data block and offset in args, then drop the data block. */ args->blkno = (xfs_dablk_t)dbno; args->index = be16_to_cpu(*tagp); - xfs_da_buf_done(dbp); return 0; } @@ -1761,22 +1724,23 @@ xfs_dir2_node_lookup( /* If a CI match, dup the actual name and return EEXIST */ xfs_dir2_data_entry_t *dep; - dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp-> - data + state->extrablk.index); + dep = (xfs_dir2_data_entry_t *) + ((char *)state->extrablk.bp->b_addr + + state->extrablk.index); rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); } /* * Release the btree blocks and leaf block. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } /* * Release the data block if we have it. */ if (state->extravalid && state->extrablk.bp) { - xfs_da_brelse(args->trans, state->extrablk.bp); + xfs_trans_brelse(args->trans, state->extrablk.bp); state->extrablk.bp = NULL; } xfs_da_state_free(state); @@ -1893,13 +1857,13 @@ xfs_dir2_node_replace( */ blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - leaf = blk->bp->data; + leaf = blk->bp->b_addr; lep = &leaf->ents[blk->index]; ASSERT(state->extravalid); /* * Point to the data entry. */ - hdr = state->extrablk.bp->data; + hdr = state->extrablk.bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); dep = (xfs_dir2_data_entry_t *) ((char *)hdr + @@ -1916,14 +1880,14 @@ xfs_dir2_node_replace( * Didn't find it, and we're holding a data block. Drop it. */ else if (state->extravalid) { - xfs_da_brelse(args->trans, state->extrablk.bp); + xfs_trans_brelse(args->trans, state->extrablk.bp); state->extrablk.bp = NULL; } /* * Release all the buffers in the cursor. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } xfs_da_state_free(state); @@ -1940,7 +1904,7 @@ xfs_dir2_node_trim_free( xfs_fileoff_t fo, /* free block number */ int *rvalp) /* out: did something */ { - xfs_dabuf_t *bp; /* freespace buffer */ + struct xfs_buf *bp; /* freespace buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_free_t *free; /* freespace structure */ @@ -1965,13 +1929,13 @@ xfs_dir2_node_trim_free( if (bp == NULL) { return 0; } - free = bp->data; + free = bp->b_addr; ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); /* * If there are used entries, there's nothing to do. */ if (be32_to_cpu(free->hdr.nused) > 0) { - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); *rvalp = 0; return 0; } @@ -1987,7 +1951,7 @@ xfs_dir2_node_trim_free( * pieces. This is the last block of an extent. */ ASSERT(error != ENOSPC); - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return error; } /* diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 067f403ecf8a..3523d3e15aa8 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -25,7 +25,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, xfs_dir2_db_t *dbp); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, - struct xfs_dabuf *bp); + struct xfs_buf *bp); extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const unsigned char *name, int len); @@ -37,11 +37,11 @@ extern int xfs_dir2_block_lookup(struct xfs_da_args *args); extern int xfs_dir2_block_removename(struct xfs_da_args *args); extern int xfs_dir2_block_replace(struct xfs_da_args *args); extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); + struct xfs_buf *lbp, struct xfs_buf *dbp); /* xfs_dir2_data.c */ #ifdef DEBUG -extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); +extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir2_data_check(dp,bp) #endif @@ -51,43 +51,43 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, extern void xfs_dir2_data_freescan(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr, int *loghead); extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_dabuf **bpp); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_buf **bpp); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); extern void xfs_dir2_data_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_buf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); /* xfs_dir2_leaf.c */ extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_dabuf *dbp); + struct xfs_buf *dbp); extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, - struct xfs_dabuf *bp); -extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, + struct xfs_buf *bp); +extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_dabuf **bpp, int magic); -extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_buf **bpp, int magic); +extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, int first, int last); extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); + struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_dabuf *lbp); + struct xfs_buf *lbp); extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_dabuf *lbp, xfs_dir2_db_t db); + struct xfs_buf *lbp, xfs_dir2_db_t db); extern struct xfs_dir2_leaf_entry * xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, int lowstale, int highstale, @@ -96,13 +96,13 @@ extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, + struct xfs_buf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, struct xfs_da_args *args, int *indexp, struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); +extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp); extern int xfs_dir2_leafn_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); @@ -122,7 +122,7 @@ extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep); extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, int size, xfs_dir2_sf_hdr_t *sfhp); extern int xfs_dir2_sf_addname(struct xfs_da_args *args); extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 19bf0c5e38f4..1b9fc3ec7e4b 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -222,7 +222,7 @@ xfs_dir2_block_sfsize( int /* error */ xfs_dir2_block_to_sf( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_buf *bp, int size, /* shortform directory size */ xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { @@ -249,7 +249,7 @@ xfs_dir2_block_to_sf( * and add local data. */ hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(hdr, bp->data, mp->m_dirblksize); + memcpy(hdr, bp->b_addr, mp->m_dirblksize); logflags = XFS_ILOG_CORE; if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { ASSERT(error != ENOSPC); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 425f6e9d4c0c..07f70e17c745 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1491,13 +1491,9 @@ xfs_init_zones(void) if (!xfs_da_state_zone) goto out_destroy_btree_cur_zone; - xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); - if (!xfs_dabuf_zone) - goto out_destroy_da_state_zone; - xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); if (!xfs_ifork_zone) - goto out_destroy_dabuf_zone; + goto out_destroy_da_state_zone; xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); if (!xfs_trans_zone) @@ -1560,8 +1556,6 @@ xfs_init_zones(void) kmem_zone_destroy(xfs_trans_zone); out_destroy_ifork_zone: kmem_zone_destroy(xfs_ifork_zone); - out_destroy_dabuf_zone: - kmem_zone_destroy(xfs_dabuf_zone); out_destroy_da_state_zone: kmem_zone_destroy(xfs_da_state_zone); out_destroy_btree_cur_zone: @@ -1589,7 +1583,6 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_log_item_desc_zone); kmem_zone_destroy(xfs_trans_zone); kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_dabuf_zone); kmem_zone_destroy(xfs_da_state_zone); kmem_zone_destroy(xfs_btree_cur_zone); kmem_zone_destroy(xfs_bmap_free_item_zone); -- cgit v1.2.3 From 9b73bd7b61f320ffe7bda0126592ccf836d7ef90 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Jun 2012 18:50:15 +1000 Subject: xfs: factor buffer reading from xfs_dir2_leaf_getdents The buffer reading code in xfs_dir2_leaf_getdents is complex and difficult to follow due to the readahead and all the context is carries. it is also badly indented and so difficult to read. Factor it out into a separate function to make it easier to understand and optimise in future patches. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_dir2_leaf.c | 434 ++++++++++++++++++++++++++----------------------- 1 file changed, 232 insertions(+), 202 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 69accf6cbc46..0b296253bd01 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -759,6 +759,218 @@ xfs_dir2_leaf_compact_x1( *highstalep = highstale; } +struct xfs_dir2_leaf_map_info { + xfs_extlen_t map_blocks; /* number of fsbs in map */ + xfs_dablk_t map_off; /* last mapped file offset */ + int map_size; /* total entries in *map */ + int map_valid; /* valid entries in *map */ + int nmap; /* mappings to ask xfs_bmapi */ + xfs_dir2_db_t curdb; /* db for current block */ + int ra_current; /* number of read-ahead blks */ + int ra_index; /* *map index for read-ahead */ + int ra_offset; /* map entry offset for ra */ + int ra_want; /* readahead count wanted */ + struct xfs_bmbt_irec map[]; /* map vector for blocks */ +}; + +STATIC int +xfs_dir2_leaf_readbuf( + struct xfs_inode *dp, + size_t bufsize, + struct xfs_dir2_leaf_map_info *mip, + xfs_dir2_off_t *curoff, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *bp = *bpp; + struct xfs_bmbt_irec *map = mip->map; + int error = 0; + int length; + int i; + int j; + + /* + * If we have a buffer, we need to release it and + * take it out of the mapping. + */ + + if (bp) { + xfs_trans_brelse(NULL, bp); + bp = NULL; + mip->map_blocks -= mp->m_dirblkfsbs; + /* + * Loop to get rid of the extents for the + * directory block. + */ + for (i = mp->m_dirblkfsbs; i > 0; ) { + j = min_t(int, map->br_blockcount, i); + map->br_blockcount -= j; + map->br_startblock += j; + map->br_startoff += j; + /* + * If mapping is done, pitch it from + * the table. + */ + if (!map->br_blockcount && --mip->map_valid) + memmove(&map[0], &map[1], + sizeof(map[0]) * mip->map_valid); + i -= j; + } + } + + /* + * Recalculate the readahead blocks wanted. + */ + mip->ra_want = howmany(bufsize + mp->m_dirblksize, + mp->m_sb.sb_blocksize) - 1; + ASSERT(mip->ra_want >= 0); + + /* + * If we don't have as many as we want, and we haven't + * run out of data blocks, get some more mappings. + */ + if (1 + mip->ra_want > mip->map_blocks && + mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { + /* + * Get more bmaps, fill in after the ones + * we already have in the table. + */ + mip->nmap = mip->map_size - mip->map_valid; + error = xfs_bmapi_read(dp, mip->map_off, + xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - + mip->map_off, + &map[mip->map_valid], &mip->nmap, 0); + + /* + * Don't know if we should ignore this or try to return an + * error. The trouble with returning errors is that readdir + * will just stop without actually passing the error through. + */ + if (error) + goto out; /* XXX */ + + /* + * If we got all the mappings we asked for, set the final map + * offset based on the last bmap value received. Otherwise, + * we've reached the end. + */ + if (mip->nmap == mip->map_size - mip->map_valid) { + i = mip->map_valid + mip->nmap - 1; + mip->map_off = map[i].br_startoff + map[i].br_blockcount; + } else + mip->map_off = xfs_dir2_byte_to_da(mp, + XFS_DIR2_LEAF_OFFSET); + + /* + * Look for holes in the mapping, and eliminate them. Count up + * the valid blocks. + */ + for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) { + if (map[i].br_startblock == HOLESTARTBLOCK) { + mip->nmap--; + length = mip->map_valid + mip->nmap - i; + if (length) + memmove(&map[i], &map[i + 1], + sizeof(map[i]) * length); + } else { + mip->map_blocks += map[i].br_blockcount; + i++; + } + } + mip->map_valid += mip->nmap; + } + + /* + * No valid mappings, so no more data blocks. + */ + if (!mip->map_valid) { + *curoff = xfs_dir2_da_to_byte(mp, mip->map_off); + goto out; + } + + /* + * Read the directory block starting at the first mapping. + */ + mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); + error = xfs_da_read_buf(NULL, dp, map->br_startoff, + map->br_blockcount >= mp->m_dirblkfsbs ? + XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, + &bp, XFS_DATA_FORK); + + /* + * Should just skip over the data block instead of giving up. + */ + if (error) + goto out; /* XXX */ + + /* + * Adjust the current amount of read-ahead: we just read a block that + * was previously ra. + */ + if (mip->ra_current) + mip->ra_current -= mp->m_dirblkfsbs; + + /* + * Do we need more readahead? + */ + for (mip->ra_index = mip->ra_offset = i = 0; + mip->ra_want > mip->ra_current && i < mip->map_blocks; + i += mp->m_dirblkfsbs) { + ASSERT(mip->ra_index < mip->map_valid); + /* + * Read-ahead a contiguous directory block. + */ + if (i > mip->ra_current && + map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { + xfs_buf_readahead(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, + map[mip->ra_index].br_startblock + + mip->ra_offset), + (int)BTOBB(mp->m_dirblksize)); + mip->ra_current = i; + } + + /* + * Read-ahead a non-contiguous directory block. This doesn't + * use our mapping, but this is a very rare case. + */ + else if (i > mip->ra_current) { + xfs_da_reada_buf(NULL, dp, + map[mip->ra_index].br_startoff + + mip->ra_offset, + XFS_DATA_FORK); + mip->ra_current = i; + } + + /* + * Advance offset through the mapping table. + */ + for (j = 0; j < mp->m_dirblkfsbs; j++) { + /* + * The rest of this extent but not more than a dir + * block. + */ + length = min_t(int, mp->m_dirblkfsbs, + map[mip->ra_index].br_blockcount - + mip->ra_offset); + j += length; + mip->ra_offset += length; + + /* + * Advance to the next mapping if this one is used up. + */ + if (mip->ra_offset == map[mip->ra_index].br_blockcount) { + mip->ra_offset = 0; + mip->ra_index++; + } + } + } + +out: + *bpp = bp; + return error; +} + /* * Getdents (readdir) for leaf and node directories. * This reads the data blocks only, so is the same for both forms. @@ -771,30 +983,18 @@ xfs_dir2_leaf_getdents( xfs_off_t *offset, filldir_t filldir) { - struct xfs_buf *bp; /* data block buffer */ - int byteoff; /* offset in current block */ - xfs_dir2_db_t curdb; /* db for current block */ - xfs_dir2_off_t curoff; /* current overall offset */ + struct xfs_buf *bp = NULL; /* data block buffer */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ - int i; /* temporary loop index */ - int j; /* temporary loop index */ int length; /* temporary length value */ - xfs_bmbt_irec_t *map; /* map vector for blocks */ - xfs_extlen_t map_blocks; /* number of fsbs in map */ - xfs_dablk_t map_off; /* last mapped file offset */ - int map_size; /* total entries in *map */ - int map_valid; /* valid entries in *map */ xfs_mount_t *mp; /* filesystem mount point */ + int byteoff; /* offset in current block */ + xfs_dir2_off_t curoff; /* current overall offset */ xfs_dir2_off_t newoff; /* new curoff after new blk */ - int nmap; /* mappings to ask xfs_bmapi */ char *ptr = NULL; /* pointer to current data */ - int ra_current; /* number of read-ahead blks */ - int ra_index; /* *map index for read-ahead */ - int ra_offset; /* map entry offset for ra */ - int ra_want; /* readahead count wanted */ + struct xfs_dir2_leaf_map_info *map_info; /* * If the offset is at or past the largest allowed value, @@ -810,10 +1010,12 @@ xfs_dir2_leaf_getdents( * buffer size, the directory block size, and the filesystem * block size. */ - map_size = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize); - map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP); - map_valid = ra_index = ra_offset = ra_current = map_blocks = 0; - bp = NULL; + length = howmany(bufsize + mp->m_dirblksize, + mp->m_sb.sb_blocksize); + map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + + (length * sizeof(struct xfs_bmbt_irec)), + KM_SLEEP); + map_info->map_size = length; /* * Inside the loop we keep the main offset value as a byte offset @@ -825,7 +1027,9 @@ xfs_dir2_leaf_getdents( * Force this conversion through db so we truncate the offset * down to get the start of the data block. */ - map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff)); + map_info->map_off = xfs_dir2_db_to_da(mp, + xfs_dir2_byte_to_db(mp, curoff)); + /* * Loop over directory entries until we reach the end offset. * Get more blocks and readahead as necessary. @@ -836,190 +1040,16 @@ xfs_dir2_leaf_getdents( * current buffer, need to get another one. */ if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) { - /* - * If we have a buffer, we need to release it and - * take it out of the mapping. - */ - if (bp) { - xfs_trans_brelse(NULL, bp); - bp = NULL; - map_blocks -= mp->m_dirblkfsbs; - /* - * Loop to get rid of the extents for the - * directory block. - */ - for (i = mp->m_dirblkfsbs; i > 0; ) { - j = MIN((int)map->br_blockcount, i); - map->br_blockcount -= j; - map->br_startblock += j; - map->br_startoff += j; - /* - * If mapping is done, pitch it from - * the table. - */ - if (!map->br_blockcount && --map_valid) - memmove(&map[0], &map[1], - sizeof(map[0]) * - map_valid); - i -= j; - } - } - /* - * Recalculate the readahead blocks wanted. - */ - ra_want = howmany(bufsize + mp->m_dirblksize, - mp->m_sb.sb_blocksize) - 1; - ASSERT(ra_want >= 0); - /* - * If we don't have as many as we want, and we haven't - * run out of data blocks, get some more mappings. - */ - if (1 + ra_want > map_blocks && - map_off < - xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { - /* - * Get more bmaps, fill in after the ones - * we already have in the table. - */ - nmap = map_size - map_valid; - error = xfs_bmapi_read(dp, map_off, - xfs_dir2_byte_to_da(mp, - XFS_DIR2_LEAF_OFFSET) - map_off, - &map[map_valid], &nmap, 0); - /* - * Don't know if we should ignore this or - * try to return an error. - * The trouble with returning errors - * is that readdir will just stop without - * actually passing the error through. - */ - if (error) - break; /* XXX */ - /* - * If we got all the mappings we asked for, - * set the final map offset based on the - * last bmap value received. - * Otherwise, we've reached the end. - */ - if (nmap == map_size - map_valid) - map_off = - map[map_valid + nmap - 1].br_startoff + - map[map_valid + nmap - 1].br_blockcount; - else - map_off = - xfs_dir2_byte_to_da(mp, - XFS_DIR2_LEAF_OFFSET); - /* - * Look for holes in the mapping, and - * eliminate them. Count up the valid blocks. - */ - for (i = map_valid; i < map_valid + nmap; ) { - if (map[i].br_startblock == - HOLESTARTBLOCK) { - nmap--; - length = map_valid + nmap - i; - if (length) - memmove(&map[i], - &map[i + 1], - sizeof(map[i]) * - length); - } else { - map_blocks += - map[i].br_blockcount; - i++; - } - } - map_valid += nmap; - } - /* - * No valid mappings, so no more data blocks. - */ - if (!map_valid) { - curoff = xfs_dir2_da_to_byte(mp, map_off); + error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info, + &curoff, &bp); + if (error || !map_info->map_valid) break; - } - /* - * Read the directory block starting at the first - * mapping. - */ - curdb = xfs_dir2_da_to_db(mp, map->br_startoff); - error = xfs_da_read_buf(NULL, dp, map->br_startoff, - map->br_blockcount >= mp->m_dirblkfsbs ? - XFS_FSB_TO_DADDR(mp, map->br_startblock) : - -1, - &bp, XFS_DATA_FORK); - /* - * Should just skip over the data block instead - * of giving up. - */ - if (error) - break; /* XXX */ - /* - * Adjust the current amount of read-ahead: we just - * read a block that was previously ra. - */ - if (ra_current) - ra_current -= mp->m_dirblkfsbs; - /* - * Do we need more readahead? - */ - for (ra_index = ra_offset = i = 0; - ra_want > ra_current && i < map_blocks; - i += mp->m_dirblkfsbs) { - ASSERT(ra_index < map_valid); - /* - * Read-ahead a contiguous directory block. - */ - if (i > ra_current && - map[ra_index].br_blockcount >= - mp->m_dirblkfsbs) { - xfs_buf_readahead(mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, - map[ra_index].br_startblock + - ra_offset), - (int)BTOBB(mp->m_dirblksize)); - ra_current = i; - } - /* - * Read-ahead a non-contiguous directory block. - * This doesn't use our mapping, but this - * is a very rare case. - */ - else if (i > ra_current) { - (void)xfs_da_reada_buf(NULL, dp, - map[ra_index].br_startoff + - ra_offset, XFS_DATA_FORK); - ra_current = i; - } - /* - * Advance offset through the mapping table. - */ - for (j = 0; j < mp->m_dirblkfsbs; j++) { - /* - * The rest of this extent but not - * more than a dir block. - */ - length = MIN(mp->m_dirblkfsbs, - (int)(map[ra_index].br_blockcount - - ra_offset)); - j += length; - ra_offset += length; - /* - * Advance to the next mapping if - * this one is used up. - */ - if (ra_offset == - map[ra_index].br_blockcount) { - ra_offset = 0; - ra_index++; - } - } - } + /* * Having done a read, we need to set a new offset. */ - newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0); + newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0); /* * Start of the current block. */ @@ -1030,7 +1060,7 @@ xfs_dir2_leaf_getdents( */ else if (curoff > newoff) ASSERT(xfs_dir2_byte_to_db(mp, curoff) == - curdb); + map_info->curdb); hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); /* @@ -1113,7 +1143,7 @@ xfs_dir2_leaf_getdents( *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; - kmem_free(map); + kmem_free(map_info); if (bp) xfs_trans_brelse(NULL, bp); return error; -- cgit v1.2.3 From b7a9e5dd40f17a48a72f249b8bbc989b63bae5fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 27 Jun 2012 12:24:08 -0700 Subject: libceph: set peer name on con_open, not init The peer name may change on each open attempt, even when the connection is reused. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 7 ++++--- include/linux/ceph/messenger.h | 4 ++-- net/ceph/messenger.c | 12 +++++++----- net/ceph/mon_client.c | 4 ++-- net/ceph/osd_client.c | 10 ++++++---- 5 files changed, 21 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ecd7f15741c1..5ac6434185ae 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -394,8 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, - CEPH_ENTITY_TYPE_MDS, mds); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; @@ -437,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, mdsc->sessions[mds] = s; atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, + ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); return s; @@ -2529,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_seq = 0; ceph_con_open(&session->s_con, + CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); /* replay unsafe requests */ diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index dd4ef1f8ec93..478f814f2100 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -232,9 +232,9 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, - struct ceph_messenger *msgr, __u8 entity_type, - __u64 entity_num); + struct ceph_messenger *msgr); extern void ceph_con_open(struct ceph_connection *con, + __u8 entity_type, __u64 entity_num, struct ceph_entity_addr *addr); extern bool ceph_con_opened(struct ceph_connection *con); extern void ceph_con_close(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index dcc50e4cd5cd..ae082d95fc72 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -523,12 +523,17 @@ EXPORT_SYMBOL(ceph_con_close); /* * Reopen a closed connection, with a new peer address. */ -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) +void ceph_con_open(struct ceph_connection *con, + __u8 entity_type, __u64 entity_num, + struct ceph_entity_addr *addr) { dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); set_bit(OPENING, &con->state); WARN_ON(!test_and_clear_bit(CLOSED, &con->state)); + con->peer_name.type = (__u8) entity_type; + con->peer_name.num = cpu_to_le64(entity_num); + memcpy(&con->peer_addr, addr, sizeof(*addr)); con->delay = 0; /* reset backoff memory */ queue_con(con); @@ -548,7 +553,7 @@ bool ceph_con_opened(struct ceph_connection *con) */ void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, - struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num) + struct ceph_messenger *msgr) { dout("con_init %p\n", con); memset(con, 0, sizeof(*con)); @@ -558,9 +563,6 @@ void ceph_con_init(struct ceph_connection *con, void *private, con_sock_state_init(con); - con->peer_name.type = (__u8) entity_type; - con->peer_name.num = cpu_to_le64(entity_num); - mutex_init(&con->mutex); INIT_LIST_HEAD(&con->out_queue); INIT_LIST_HEAD(&con->out_sent); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index e9db3de20b2e..bcc80a0e2a98 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -143,11 +143,11 @@ static int __open_session(struct ceph_mon_client *monc) monc->want_next_osdmap = !!monc->want_next_osdmap; ceph_con_init(&monc->con, monc, &mon_con_ops, - &monc->client->msgr, - CEPH_ENTITY_TYPE_MON, monc->cur_mon); + &monc->client->msgr); dout("open_session mon%d opening\n", monc->cur_mon); ceph_con_open(&monc->con, + CEPH_ENTITY_TYPE_MON, monc->cur_mon, &monc->monmap->mon_inst[monc->cur_mon].addr); /* initiatiate authentication handshake */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index db2da54f7336..c2527113d2ae 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -639,8 +639,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, - CEPH_ENTITY_TYPE_OSD, onum); + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); INIT_LIST_HEAD(&osd->o_keepalive_item); return osd; @@ -750,7 +749,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ret = -EAGAIN; } else { ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, + &osdc->osdmap->osd_addr[osd->o_osd]); osd->o_incarnation++; } return ret; @@ -1005,7 +1005,9 @@ static int __map_request(struct ceph_osd_client *osdc, dout("map_request osd %p is osd%d\n", req->r_osd, o); __insert_osd(osdc, req->r_osd); - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); + ceph_con_open(&req->r_osd->o_con, + CEPH_ENTITY_TYPE_OSD, o, + &osdc->osdmap->osd_addr[o]); } if (req->r_osd) { -- cgit v1.2.3 From 069ddcda37b2cf5bb4b6031a944c0e9359213262 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 11 Jun 2012 15:42:32 -0700 Subject: eCryptfs: Copy up POSIX ACL and read-only flags from lower mount When the eCryptfs mount options do not include '-o acl', but the lower filesystem's mount options do include 'acl', the MS_POSIXACL flag is not flipped on in the eCryptfs super block flags. This flag is what the VFS checks in do_last() when deciding if the current umask should be applied to a newly created inode's mode or not. When a default POSIX ACL mask is set on a directory, the current umask is incorrectly applied to new inodes created in the directory. This patch ignores the MS_POSIXACL flag passed into ecryptfs_mount() and sets the flag on the eCryptfs super block depending on the flag's presence on the lower super block. Additionally, it is incorrect to allow a writeable eCryptfs mount on top of a read-only lower mount. This missing check did not allow writes to the read-only lower mount because permissions checks are still performed on the lower filesystem's objects but it is best to simply not allow a rw mount on top of ro mount. However, a ro eCryptfs mount on top of a rw mount is valid and still allowed. https://launchpad.net/bugs/1009207 Signed-off-by: Tyler Hicks Reported-by: Stefan Beller Cc: John Johansen --- fs/ecryptfs/main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 68954937a071..df217dc9f1d9 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -505,7 +505,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; @@ -541,6 +540,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags } ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + /** + * Set the POSIX ACL flag based on whether they're enabled in the lower + * mount. Force a read-only eCryptfs mount if the lower mount is ro. + * Allow a ro eCryptfs mount even when the lower mount is rw. + */ + s->s_flags = flags & ~MS_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; s->s_magic = ECRYPTFS_SUPER_MAGIC; -- cgit v1.2.3 From 566968866555a19d0a78e0bfa845cd249a7eeae2 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 11 Jun 2012 09:39:54 -0700 Subject: eCryptfs: Remove unused messaging declarations and function These are no longer needed. Signed-off-by: Tyler Hicks Cc: Sasha Levin --- fs/ecryptfs/ecryptfs_kernel.h | 6 ------ fs/ecryptfs/messaging.c | 31 ------------------------------- 2 files changed, 37 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 867b64c5d84f..01a1f8575950 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -385,8 +385,6 @@ struct ecryptfs_msg_ctx { struct mutex mux; }; -struct ecryptfs_daemon; - struct ecryptfs_daemon { #define ECRYPTFS_DAEMON_IN_READ 0x00000001 #define ECRYPTFS_DAEMON_IN_POLL 0x00000002 @@ -621,10 +619,6 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, - struct pid *pid); -int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, - struct pid *pid); int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, struct user_namespace *user_ns, struct pid *pid, u32 seq); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index a750f957b145..c11911decdc1 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -215,37 +215,6 @@ out: return rc; } -/** - * ecryptfs_process_quit - * @euid: The user ID owner of the message - * @user_ns: The namespace in which @euid applies - * @pid: The process ID for the userspace program that sent the - * message - * - * Deletes the corresponding daemon for the given euid and pid, if - * it is the registered that is requesting the deletion. Returns zero - * after deleting the desired daemon; non-zero otherwise. - */ -int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, - struct pid *pid) -{ - struct ecryptfs_daemon *daemon; - int rc; - - mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns); - if (rc || !daemon) { - rc = -EINVAL; - printk(KERN_ERR "Received request from user [%d] to " - "unregister unrecognized daemon [0x%p]\n", euid, pid); - goto out_unlock; - } - rc = ecryptfs_exorcise_daemon(daemon); -out_unlock: - mutex_unlock(&ecryptfs_daemon_hash_mux); - return rc; -} - /** * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check -- cgit v1.2.3 From 2ecaf55db6dcf4dd25e1ef8d5eb6068e3286a20f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 11 Jun 2012 09:47:47 -0700 Subject: eCryptfs: Make all miscdev functions use daemon ptr in file private_data Now that a pointer to a valid struct ecryptfs_daemon is stored in the private_data of an opened /dev/ecryptfs file, the remaining miscdev functions can utilize the pointer rather than looking up the ecryptfs_daemon at the beginning of each operation. The security model of /dev/ecryptfs is simplified a little bit with this patch. Upon opening /dev/ecryptfs, a per-user ecryptfs_daemon is registered. Another daemon cannot be registered for that user until the last file reference is released. During the lifetime of the ecryptfs_daemon, access checks are not performed on the /dev/ecryptfs operations because it is assumed that the application securely handles the opened file descriptor and does not unintentionally leak it to processes that are not trusted. Signed-off-by: Tyler Hicks Cc: Sasha Levin --- fs/ecryptfs/ecryptfs_kernel.h | 16 ++----- fs/ecryptfs/messaging.c | 105 +++++++----------------------------------- fs/ecryptfs/miscdev.c | 98 +++++++++++---------------------------- 3 files changed, 47 insertions(+), 172 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 01a1f8575950..0deb4f24957a 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -392,10 +392,7 @@ struct ecryptfs_daemon { #define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 u32 flags; u32 num_queued_msg_ctx; - struct pid *pid; - uid_t euid; - struct user_namespace *user_ns; - struct task_struct *task; + struct file *file; struct mutex mux; struct list_head msg_ctx_out_queue; wait_queue_head_t wait; @@ -619,9 +616,8 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, - struct user_namespace *user_ns, struct pid *pid, - u32 seq); +int ecryptfs_process_response(struct ecryptfs_daemon *daemon, + struct ecryptfs_message *msg, u32 seq); int ecryptfs_send_message(char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, @@ -666,8 +662,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, @@ -679,8 +674,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, u16 msg_flags, struct ecryptfs_daemon *daemon); void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns, struct pid *pid); +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index c11911decdc1..b29bb8bfa8d9 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -32,8 +32,8 @@ static struct mutex ecryptfs_msg_ctx_lists_mux; static struct hlist_head *ecryptfs_daemon_hash; struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_bits; -#define ecryptfs_uid_hash(uid) \ - hash_long((unsigned long)uid, ecryptfs_hash_bits) +#define ecryptfs_current_euid_hash(uid) \ + hash_long((unsigned long)current_euid(), ecryptfs_hash_bits) static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; @@ -105,26 +105,24 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) /** * ecryptfs_find_daemon_by_euid - * @euid: The effective user id which maps to the desired daemon id - * @user_ns: The namespace in which @euid applies * @daemon: If return value is zero, points to the desired daemon pointer * * Must be called with ecryptfs_daemon_hash_mux held. * - * Search the hash list for the given user id. + * Search the hash list for the current effective user id. * * Returns zero if the user id exists in the list; non-zero otherwise. */ -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) { struct hlist_node *elem; int rc; hlist_for_each_entry(*daemon, elem, - &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], - euid_chain) { - if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) { + &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], + euid_chain) { + if ((*daemon)->file->f_cred->euid == current_euid() && + (*daemon)->file->f_cred->user_ns == current_user_ns()) { rc = 0; goto out; } @@ -137,9 +135,7 @@ out: /** * ecryptfs_spawn_daemon - Create and initialize a new daemon struct * @daemon: Pointer to set to newly allocated daemon struct - * @euid: Effective user id for the daemon - * @user_ns: The namespace in which @euid applies - * @pid: Process id for the daemon + * @file: File used when opening /dev/ecryptfs * * Must be called ceremoniously while in possession of * ecryptfs_sacred_daemon_hash_mux @@ -147,8 +143,7 @@ out: * Returns zero on success; non-zero otherwise */ int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns, struct pid *pid) +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file) { int rc = 0; @@ -159,16 +154,13 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); goto out; } - (*daemon)->euid = euid; - (*daemon)->user_ns = get_user_ns(user_ns); - (*daemon)->pid = get_pid(pid); - (*daemon)->task = current; + (*daemon)->file = file; mutex_init(&(*daemon)->mux); INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); init_waitqueue_head(&(*daemon)->wait); (*daemon)->num_queued_msg_ctx = 0; hlist_add_head(&(*daemon)->euid_chain, - &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]); + &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()]); out: return rc; } @@ -188,9 +180,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ) || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { rc = -EBUSY; - printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " - "[0x%p], but it is in the midst of a read or a poll\n", - __func__, daemon->pid); mutex_unlock(&daemon->mux); goto out; } @@ -203,12 +192,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) ecryptfs_msg_ctx_alloc_to_free(msg_ctx); } hlist_del(&daemon->euid_chain); - if (daemon->task) - wake_up_process(daemon->task); - if (daemon->pid) - put_pid(daemon->pid); - if (daemon->user_ns) - put_user_ns(daemon->user_ns); mutex_unlock(&daemon->mux); kzfree(daemon); out: @@ -219,8 +202,6 @@ out: * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check * msg->data_len and free the memory - * @pid: The process ID of the userspace application that sent the - * message * @seq: The sequence number of the message; must match the sequence * number for the existing message context waiting for this * response @@ -239,16 +220,11 @@ out: * * Returns zero on success; non-zero otherwise */ -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, - struct user_namespace *user_ns, struct pid *pid, - u32 seq) +int ecryptfs_process_response(struct ecryptfs_daemon *daemon, + struct ecryptfs_message *msg, u32 seq) { - struct ecryptfs_daemon *uninitialized_var(daemon); struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; - struct nsproxy *nsproxy; - struct user_namespace *tsk_user_ns; - uid_t ctx_euid; int rc; if (msg->index >= ecryptfs_message_buf_len) { @@ -261,51 +237,6 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, } msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); - mutex_lock(&ecryptfs_daemon_hash_mux); - rcu_read_lock(); - nsproxy = task_nsproxy(msg_ctx->task); - if (nsproxy == NULL) { - rc = -EBADMSG; - printk(KERN_ERR "%s: Receiving process is a zombie. Dropping " - "message.\n", __func__); - rcu_read_unlock(); - mutex_unlock(&ecryptfs_daemon_hash_mux); - goto wake_up; - } - tsk_user_ns = __task_cred(msg_ctx->task)->user_ns; - ctx_euid = task_euid(msg_ctx->task); - rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); - rcu_read_unlock(); - mutex_unlock(&ecryptfs_daemon_hash_mux); - if (rc) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: User [%d] received a " - "message response from process [0x%p] but does " - "not have a registered daemon\n", __func__, - ctx_euid, pid); - goto wake_up; - } - if (ctx_euid != euid) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: Received message from user " - "[%d]; expected message from user [%d]\n", __func__, - euid, ctx_euid); - goto unlock; - } - if (tsk_user_ns != user_ns) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: Received message from user_ns " - "[0x%p]; expected message from user_ns [0x%p]\n", - __func__, user_ns, tsk_user_ns); - goto unlock; - } - if (daemon->pid != pid) { - rc = -EBADMSG; - printk(KERN_ERR "%s: User [%d] sent a message response " - "from an unrecognized process [0x%p]\n", - __func__, ctx_euid, pid); - goto unlock; - } if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { rc = -EINVAL; printk(KERN_WARNING "%s: Desired context element is not " @@ -328,9 +259,8 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, } memcpy(msg_ctx->msg, msg, msg_size); msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; - rc = 0; -wake_up: wake_up_process(msg_ctx->task); + rc = 0; unlock: mutex_unlock(&msg_ctx->mux); out: @@ -352,14 +282,11 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) { struct ecryptfs_daemon *daemon; - uid_t euid = current_euid(); int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); + rc = ecryptfs_find_daemon_by_euid(&daemon); if (rc || !daemon) { rc = -ENOTCONN; - printk(KERN_ERR "%s: User [%d] does not have a daemon " - "registered\n", __func__, euid); goto out; } mutex_lock(&ecryptfs_msg_ctx_lists_mux); diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index c0038f6566d4..412e6eda25f8 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -33,7 +33,7 @@ static atomic_t ecryptfs_num_miscdev_opens; /** * ecryptfs_miscdev_poll - * @file: dev file (ignored) + * @file: dev file * @pt: dev poll table (ignored) * * Returns the poll mask @@ -41,20 +41,10 @@ static atomic_t ecryptfs_num_miscdev_opens; static unsigned int ecryptfs_miscdev_poll(struct file *file, poll_table *pt) { - struct ecryptfs_daemon *daemon; + struct ecryptfs_daemon *daemon = file->private_data; unsigned int mask = 0; - uid_t euid = current_euid(); - int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EINVAL; - } mutex_lock(&daemon->mux); - mutex_unlock(&ecryptfs_daemon_hash_mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { printk(KERN_WARNING "%s: Attempt to poll on zombified " "daemon\n", __func__); @@ -79,7 +69,7 @@ out_unlock_daemon: /** * ecryptfs_miscdev_open * @inode: inode of miscdev handle (ignored) - * @file: file for miscdev handle (ignored) + * @file: file for miscdev handle * * Returns zero on success; non-zero otherwise */ @@ -87,7 +77,6 @@ static int ecryptfs_miscdev_open(struct inode *inode, struct file *file) { struct ecryptfs_daemon *daemon = NULL; - uid_t euid = current_euid(); int rc; mutex_lock(&ecryptfs_daemon_hash_mux); @@ -98,30 +87,20 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), - task_pid(current)); - if (rc) { - printk(KERN_ERR "%s: Error attempting to spawn daemon; " - "rc = [%d]\n", __func__, rc); - goto out_module_put_unlock_daemon_list; - } - } - mutex_lock(&daemon->mux); - if (daemon->pid != task_pid(current)) { + rc = ecryptfs_find_daemon_by_euid(&daemon); + if (!rc) { rc = -EINVAL; - printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], " - "but pid [0x%p] has attempted to open the handle " - "instead\n", __func__, daemon->pid, daemon->euid, - task_pid(current)); - goto out_unlock_daemon; + goto out_unlock_daemon_list; + } + rc = ecryptfs_spawn_daemon(&daemon, file); + if (rc) { + printk(KERN_ERR "%s: Error attempting to spawn daemon; " + "rc = [%d]\n", __func__, rc); + goto out_module_put_unlock_daemon_list; } + mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { rc = -EBUSY; - printk(KERN_ERR "%s: Miscellaneous device handle may only be " - "opened once per daemon; pid [0x%p] already has this " - "handle open\n", __func__, daemon->pid); goto out_unlock_daemon; } daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; @@ -140,7 +119,7 @@ out_unlock_daemon_list: /** * ecryptfs_miscdev_release * @inode: inode of fs/ecryptfs/euid handle (ignored) - * @file: file for fs/ecryptfs/euid handle (ignored) + * @file: file for fs/ecryptfs/euid handle * * This keeps the daemon registered until the daemon sends another * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters. @@ -150,20 +129,18 @@ out_unlock_daemon_list: static int ecryptfs_miscdev_release(struct inode *inode, struct file *file) { - struct ecryptfs_daemon *daemon = NULL; - uid_t euid = current_euid(); + struct ecryptfs_daemon *daemon = file->private_data; int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) - daemon = file->private_data; mutex_lock(&daemon->mux); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); mutex_unlock(&daemon->mux); + + mutex_lock(&ecryptfs_daemon_hash_mux); rc = ecryptfs_exorcise_daemon(daemon); + mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { printk(KERN_CRIT "%s: Fatal error whilst attempting to " "shut down daemon; rc = [%d]. Please report this " @@ -171,7 +148,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) BUG(); } module_put(THIS_MODULE); - mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } @@ -248,7 +224,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, /** * ecryptfs_miscdev_read - format and send message from queue - * @file: fs/ecryptfs/euid miscdevfs handle (ignored) + * @file: miscdevfs handle * @buf: User buffer into which to copy the next message on the daemon queue * @count: Amount of space available in @buf * @ppos: Offset in file (ignored) @@ -262,43 +238,27 @@ static ssize_t ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct ecryptfs_daemon *daemon; + struct ecryptfs_daemon *daemon = file->private_data; struct ecryptfs_msg_ctx *msg_ctx; size_t packet_length_size; char packet_length[ECRYPTFS_MAX_PKT_LEN_SIZE]; size_t i; size_t total_length; - uid_t euid = current_euid(); int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EINVAL; - } mutex_lock(&daemon->mux); - if (task_pid(current) != daemon->pid) { - mutex_unlock(&daemon->mux); - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EPERM; - } if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; - mutex_unlock(&ecryptfs_daemon_hash_mux); printk(KERN_WARNING "%s: Attempt to read from zombified " "daemon\n", __func__); goto out_unlock_daemon; } if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { rc = 0; - mutex_unlock(&ecryptfs_daemon_hash_mux); goto out_unlock_daemon; } /* This daemon will not go away so long as this flag is set */ daemon->flags |= ECRYPTFS_DAEMON_IN_READ; - mutex_unlock(&ecryptfs_daemon_hash_mux); check_list: if (list_empty(&daemon->msg_ctx_out_queue)) { mutex_unlock(&daemon->mux); @@ -382,16 +342,12 @@ out_unlock_daemon: * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon * @data: Bytes comprising struct ecryptfs_message * @data_size: sizeof(struct ecryptfs_message) + data len - * @euid: Effective user id of miscdevess sending the miscdev response - * @user_ns: The namespace in which @euid applies - * @pid: Miscdevess id of miscdevess sending the miscdev response * @seq: Sequence number for miscdev response packet * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_miscdev_response(char *data, size_t data_size, - uid_t euid, struct user_namespace *user_ns, - struct pid *pid, u32 seq) +static int ecryptfs_miscdev_response(struct ecryptfs_daemon *daemon, char *data, + size_t data_size, u32 seq) { struct ecryptfs_message *msg = (struct ecryptfs_message *)data; int rc; @@ -403,7 +359,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size, rc = -EINVAL; goto out; } - rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq); + rc = ecryptfs_process_response(daemon, msg, seq); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -413,7 +369,7 @@ out: /** * ecryptfs_miscdev_write - handle write to daemon miscdev handle - * @file: File for misc dev handle (ignored) + * @file: File for misc dev handle * @buf: Buffer containing user data * @count: Amount of data in @buf * @ppos: Pointer to offset in file (ignored) @@ -428,7 +384,6 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, u32 seq; size_t packet_size, packet_size_length; char *data; - uid_t euid = current_euid(); unsigned char packet_size_peek[ECRYPTFS_MAX_PKT_LEN_SIZE]; ssize_t rc; @@ -488,10 +443,9 @@ memdup: } memcpy(&counter_nbo, &data[PKT_CTR_OFFSET], PKT_CTR_SIZE); seq = be32_to_cpu(counter_nbo); - rc = ecryptfs_miscdev_response( + rc = ecryptfs_miscdev_response(file->private_data, &data[PKT_LEN_OFFSET + packet_size_length], - packet_size, euid, current_user_ns(), - task_pid(current), seq); + packet_size, seq); if (rc) { printk(KERN_WARNING "%s: Failed to deliver miscdev " "response to requesting operation; rc = [%zd]\n", -- cgit v1.2.3 From 8bc2d3cf612994a960c2e8eaea37f6676f67082a Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 22 May 2012 15:09:50 -0500 Subject: eCryptfs: Unlink lower inode when ecryptfs_create() fails ecryptfs_create() creates a lower inode, allocates an eCryptfs inode, initializes the eCryptfs inode and cryptographic metadata attached to the inode, and then writes the metadata to the header of the file. If an error was to occur after the lower inode was created, an empty lower file would be left in the lower filesystem. This is a problem because ecryptfs_open() refuses to open any lower files which do not have the appropriate metadata in the file header. This patch properly unlinks the lower inode when an error occurs in the later stages of ecryptfs_create(), reducing the chance that an empty lower file will be left in the lower filesystem. https://launchpad.net/bugs/872905 Signed-off-by: Tyler Hicks Cc: John Johansen Cc: Colin Ian King --- fs/ecryptfs/inode.c | 55 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a07441a0a878..65efe5fa687c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -143,6 +143,31 @@ static int ecryptfs_interpose(struct dentry *lower_dentry, return 0; } +static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; + int rc; + + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry); + if (rc) { + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); + goto out_unlock; + } + fsstack_copy_attr_times(dir, lower_dir_inode); + set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); + inode->i_ctime = dir->i_ctime; + d_drop(dentry); +out_unlock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + return rc; +} + /** * ecryptfs_do_create * @directory_inode: inode of the new file's dentry's parent in ecryptfs @@ -182,8 +207,10 @@ ecryptfs_do_create(struct inode *directory_inode, } inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); goto out_lock; + } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); out_lock: @@ -265,7 +292,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); if (rc) { - drop_nlink(ecryptfs_inode); + ecryptfs_do_unlink(directory_inode, ecryptfs_dentry, + ecryptfs_inode); + make_bad_inode(ecryptfs_inode); unlock_new_inode(ecryptfs_inode); iput(ecryptfs_inode); goto out; @@ -477,27 +506,7 @@ out_lock: static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) { - int rc = 0; - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); - struct dentry *lower_dir_dentry; - - dget(lower_dentry); - lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry); - if (rc) { - printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); - goto out_unlock; - } - fsstack_copy_attr_times(dir, lower_dir_inode); - set_nlink(dentry->d_inode, - ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink); - dentry->d_inode->i_ctime = dir->i_ctime; - d_drop(dentry); -out_unlock: - unlock_dir(lower_dir_dentry); - dput(lower_dentry); - return rc; + return ecryptfs_do_unlink(dir, dentry, dentry->d_inode); } static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, -- cgit v1.2.3 From e3ccaa9761200952cc269b1f4b7d7bb77a5e071b Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 20 Jun 2012 23:50:59 -0700 Subject: eCryptfs: Initialize empty lower files when opening them Historically, eCryptfs has only initialized lower files in the ecryptfs_create() path. Lower file initialization is the act of writing the cryptographic metadata from the inode's crypt_stat to the header of the file. The ecryptfs_open() path already expects that metadata to be in the header of the file. A number of users have reported empty lower files in beneath their eCryptfs mounts. Most of the causes for those empty files being left around have been addressed, but the presence of empty files causes problems due to the lack of proper cryptographic metadata. To transparently solve this problem, this patch initializes empty lower files in the ecryptfs_open() error path. If the metadata is unreadable due to the lower inode size being 0, plaintext passthrough support is not in use, and the metadata is stored in the header of the file (as opposed to the user.ecryptfs extended attribute), the lower file will be initialized. The number of nested conditionals in ecryptfs_open() was getting out of hand, so a helper function was created. To avoid the same nested conditional problem, the conditional logic was reversed inside of the helper function. https://launchpad.net/bugs/911507 Signed-off-by: Tyler Hicks Cc: John Johansen Cc: Colin Ian King --- fs/ecryptfs/ecryptfs_kernel.h | 2 ++ fs/ecryptfs/file.c | 71 +++++++++++++++++++++++++++---------------- fs/ecryptfs/inode.c | 4 +-- 3 files changed, 49 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 0deb4f24957a..9f77ff818173 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -563,6 +563,8 @@ struct ecryptfs_open_req { struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b17f2f9b121..baf8b0550391 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -161,6 +161,48 @@ static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma) struct kmem_cache *ecryptfs_file_info_cache; +static int read_or_initialize_metadata(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct ecryptfs_crypt_stat *crypt_stat; + int rc; + + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + mount_crypt_stat = &ecryptfs_superblock_to_private( + inode->i_sb)->mount_crypt_stat; + mutex_lock(&crypt_stat->cs_mutex); + + if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED && + crypt_stat->flags & ECRYPTFS_KEY_VALID) { + rc = 0; + goto out; + } + + rc = ecryptfs_read_metadata(dentry); + if (!rc) + goto out; + + if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) { + crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED + | ECRYPTFS_ENCRYPTED); + rc = 0; + goto out; + } + + if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) && + !i_size_read(ecryptfs_inode_to_lower(inode))) { + rc = ecryptfs_initialize_file(dentry, inode); + if (!rc) + goto out; + } + + rc = -EIO; +out: + mutex_unlock(&crypt_stat->cs_mutex); + return rc; +} + /** * ecryptfs_open * @inode: inode speciying file to open @@ -236,32 +278,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = 0; goto out; } - mutex_lock(&crypt_stat->cs_mutex); - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) - || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { - rc = ecryptfs_read_metadata(ecryptfs_dentry); - if (rc) { - ecryptfs_printk(KERN_DEBUG, - "Valid headers not found\n"); - if (!(mount_crypt_stat->flags - & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { - rc = -EIO; - printk(KERN_WARNING "Either the lower file " - "is not in a valid eCryptfs format, " - "or the key could not be retrieved. " - "Plaintext passthrough mode is not " - "enabled; returning -EIO\n"); - mutex_unlock(&crypt_stat->cs_mutex); - goto out_put; - } - rc = 0; - crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED - | ECRYPTFS_ENCRYPTED); - mutex_unlock(&crypt_stat->cs_mutex); - goto out; - } - } - mutex_unlock(&crypt_stat->cs_mutex); + rc = read_or_initialize_metadata(ecryptfs_dentry); + if (rc) + goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 65efe5fa687c..2d4143f8f5c9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -227,8 +227,8 @@ out: * * Returns zero on success */ -static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, - struct inode *ecryptfs_inode) +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; -- cgit v1.2.3 From f3da93105b6963a2be2a56dee27fdc88ac4ad769 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 May 2012 23:40:17 +0800 Subject: quota: fix checkpatch.pl warning by replacing with checkpatch.pl warns: "WARNING: Use #include instead of " Below patch fixes it. Signed-off-by: Jie Liu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 +- fs/quota/quota.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 10cbe841cb7e..0c541dcbdf07 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -78,7 +78,7 @@ #include #include "../internal.h" /* ugh */ -#include +#include /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9a391204ca27..639782d5a76f 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From f007dbf8e51f4a0910194eebc2aa119eb861893e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 24 May 2012 12:00:37 -0500 Subject: ext3: force ro mount if ext3_setup_super() fails If ext3_setup_super() fails i.e. due to a too-high revision, the error is logged in dmesg but the fs is not mounted RO as indicated. Tested by: [164152.114551] EXT3-fs (sdb6): error: revision level too high, forcing read-only mode /dev/sdb6 /mnt/test2 ext3 rw,seclabel,relatime,errors=continue,user_xattr,acl,barrier=1,data=ordered 0 0 ^^ Signed-off-by: Eric Sandeen Reviewed-by: Andreas Dilger Signed-off-by: Jan Kara --- fs/ext3/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c3a44b7c375..b4e19926f46f 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2058,7 +2058,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount3; } - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); + if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; ext3_orphan_cleanup(sb, es); -- cgit v1.2.3 From db8109ef98b5fb7e26e0d265c02f7164b13009d4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 4 Jun 2012 14:46:24 +0300 Subject: udf: stop using s_dirt The UDF file-system does not need the 's_dirt' superblock flag because it does not define the 'write_super()' method. This flag was set to 1 in few places and set to 0 in '->sync_fs()' and was basically useless. Stop using it because it is on its way out. Signed-off-by: Artem Bityutskiy Signed-off-by: Jan Kara --- fs/udf/super.c | 6 ------ fs/udf/udfdecl.h | 1 - 2 files changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 8d86a8706c0e..e7534fb84c2d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1974,7 +1974,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_op = &udf_sb_ops; sb->s_export_op = &udf_export_ops; - sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; @@ -2096,10 +2095,6 @@ void _udf_err(struct super_block *sb, const char *function, struct va_format vaf; va_list args; - /* mark sb error */ - if (!(sb->s_flags & MS_RDONLY)) - sb->s_dirt = 1; - va_start(args, fmt); vaf.fmt = fmt; @@ -2161,7 +2156,6 @@ static int udf_sync_fs(struct super_block *sb, int wait) * the buffer for IO */ mark_buffer_dirty(sbi->s_lvid_bh); - sb->s_dirt = 0; sbi->s_lvid_dirty = 0; } mutex_unlock(&sbi->s_alloc_mutex); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index ebe10314e512..de038da6f6bd 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -129,7 +129,6 @@ static inline void udf_updated_lvid(struct super_block *sb) WARN_ON_ONCE(((struct logicalVolIntegrityDesc *) bh->b_data)->integrityType != cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN)); - sb->s_dirt = 1; UDF_SB(sb)->s_lvid_dirty = 1; } extern u64 lvid_get_unique_id(struct super_block *sb); -- cgit v1.2.3 From a0e589b485cd5e6a74d40d195b3d7de212b4227d Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Tue, 26 Jun 2012 19:33:11 +0530 Subject: UDF: Remove unnecessary variable "offset" from udf_fill_inode The variable "offset" is not needed. Remove it. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: Jan Kara --- fs/udf/inode.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 873e1bab9c4c..fafaad795cd6 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1247,7 +1247,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) { struct fileEntry *fe; struct extendedFileEntry *efe; - int offset; struct udf_sb_info *sbi = UDF_SB(inode->i_sb); struct udf_inode_info *iinfo = UDF_I(inode); unsigned int link_count; @@ -1359,7 +1358,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint); - offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr; } else { inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); @@ -1381,8 +1379,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); - offset = sizeof(struct extendedFileEntry) + - iinfo->i_lenEAttr; } switch (fe->icbTag.fileType) { -- cgit v1.2.3 From e124a32043416ddefaec3c54cc945b7667c00628 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Thu, 28 Jun 2012 00:49:44 +0800 Subject: ext2: cleanup the confused goto label Cleanup the confused goto label, since the big lock has been removed. Signed-off-by: Wanlong Gao Signed-off-by: Jan Kara --- fs/ext2/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b3621cb7ea31..c8e49794ab5d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -771,13 +771,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - goto failed_unlock; + goto failed; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); - goto failed_unlock; + goto failed; } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; @@ -1130,7 +1130,7 @@ failed_sbi: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); -failed_unlock: +failed: return ret; } -- cgit v1.2.3 From bff943af6fe3af022c1c7a22cdb2e18a242eaf35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Jun 2012 22:27:05 +0200 Subject: udf: Fix memory leak when mounting When we are mounting filesystem, we can load one partition table before finding out that we cannot complete processing of logical volume descriptor and trying the reserve descriptor. Free the table properly before trying the reserve descriptor. Signed-off-by: Jan Kara --- fs/udf/super.c | 122 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index e7534fb84c2d..8a7583867811 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -252,6 +252,63 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) return 0; } +static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) +{ + int i; + int nr_groups = bitmap->s_nr_groups; + int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * + nr_groups); + + for (i = 0; i < nr_groups; i++) + if (bitmap->s_block_bitmap[i]) + brelse(bitmap->s_block_bitmap[i]); + + if (size <= PAGE_SIZE) + kfree(bitmap); + else + vfree(bitmap); +} + +static void udf_free_partition(struct udf_part_map *map) +{ + int i; + struct udf_meta_data *mdata; + + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) + iput(map->s_uspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) + iput(map->s_fspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) + udf_sb_free_bitmap(map->s_uspace.s_bitmap); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) + udf_sb_free_bitmap(map->s_fspace.s_bitmap); + if (map->s_partition_type == UDF_SPARABLE_MAP15) + for (i = 0; i < 4; i++) + brelse(map->s_type_specific.s_sparing.s_spar_map[i]); + else if (map->s_partition_type == UDF_METADATA_MAP25) { + mdata = &map->s_type_specific.s_metadata; + iput(mdata->s_metadata_fe); + mdata->s_metadata_fe = NULL; + + iput(mdata->s_mirror_fe); + mdata->s_mirror_fe = NULL; + + iput(mdata->s_bitmap_fe); + mdata->s_bitmap_fe = NULL; + } +} + +static void udf_sb_free_partitions(struct super_block *sb) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + int i; + + for (i = 0; i < sbi->s_partitions; i++) + udf_free_partition(&sbi->s_partmaps[i]); + kfree(sbi->s_partmaps); + sbi->s_partmaps = NULL; +} + static int udf_show_options(struct seq_file *seq, struct dentry *root) { struct super_block *sb = root->d_sb; @@ -1596,7 +1653,11 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, /* responsible for finding the PartitionDesc(s) */ if (!udf_process_sequence(sb, main_s, main_e, fileset)) return 1; - return !udf_process_sequence(sb, reserve_s, reserve_e, fileset); + udf_sb_free_partitions(sb); + if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset)) + return 1; + udf_sb_free_partitions(sb); + return 0; } /* @@ -1861,55 +1922,8 @@ u64 lvid_get_unique_id(struct super_block *sb) return ret; } -static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) -{ - int i; - int nr_groups = bitmap->s_nr_groups; - int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * - nr_groups); - - for (i = 0; i < nr_groups; i++) - if (bitmap->s_block_bitmap[i]) - brelse(bitmap->s_block_bitmap[i]); - - if (size <= PAGE_SIZE) - kfree(bitmap); - else - vfree(bitmap); -} - -static void udf_free_partition(struct udf_part_map *map) -{ - int i; - struct udf_meta_data *mdata; - - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) - iput(map->s_uspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) - iput(map->s_fspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) - udf_sb_free_bitmap(map->s_uspace.s_bitmap); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) - udf_sb_free_bitmap(map->s_fspace.s_bitmap); - if (map->s_partition_type == UDF_SPARABLE_MAP15) - for (i = 0; i < 4; i++) - brelse(map->s_type_specific.s_sparing.s_spar_map[i]); - else if (map->s_partition_type == UDF_METADATA_MAP25) { - mdata = &map->s_type_specific.s_metadata; - iput(mdata->s_metadata_fe); - mdata->s_metadata_fe = NULL; - - iput(mdata->s_mirror_fe); - mdata->s_mirror_fe = NULL; - - iput(mdata->s_bitmap_fe); - mdata->s_bitmap_fe = NULL; - } -} - static int udf_fill_super(struct super_block *sb, void *options, int silent) { - int i; int ret; struct inode *inode = NULL; struct udf_options uopt; @@ -2071,9 +2085,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) error_out: if (sbi->s_vat_inode) iput(sbi->s_vat_inode); - if (sbi->s_partitions) - for (i = 0; i < sbi->s_partitions; i++) - udf_free_partition(&sbi->s_partmaps[i]); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -2081,8 +2092,7 @@ error_out: if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); - - kfree(sbi->s_partmaps); + udf_sb_free_partitions(sb); kfree(sbi); sb->s_fs_info = NULL; @@ -2123,16 +2133,12 @@ void _udf_warn(struct super_block *sb, const char *function, static void udf_put_super(struct super_block *sb) { - int i; struct udf_sb_info *sbi; sbi = UDF_SB(sb); if (sbi->s_vat_inode) iput(sbi->s_vat_inode); - if (sbi->s_partitions) - for (i = 0; i < sbi->s_partitions; i++) - udf_free_partition(&sbi->s_partmaps[i]); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -2140,7 +2146,7 @@ static void udf_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); - kfree(sbi->s_partmaps); + udf_sb_free_partitions(sb); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } -- cgit v1.2.3 From 17dc59ba418c3d6b0675d5b74d280acab2d4e369 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 9 Jul 2012 13:24:21 +0200 Subject: udf: Do not decrement i_blocks when freeing indirect extent block Indirect extent block is not accounted in i_blocks during allocation thus we should not decrement i_blocks when we are freeing such block during truncation. Reported-by: Steve Nickel Signed-off-by: Jan Kara --- fs/udf/truncate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 4b98fee8e161..8a9657d7f7c6 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -248,7 +248,7 @@ void udf_truncate_extents(struct inode *inode) /* We managed to free all extents in the * indirect extent - free it too */ BUG_ON(!epos.bh); - udf_free_blocks(sb, inode, &epos.block, + udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len); } else if (!epos.bh) { iinfo->i_lenAlloc = lenalloc; @@ -275,7 +275,7 @@ void udf_truncate_extents(struct inode *inode) if (indirect_ext_len) { BUG_ON(!epos.bh); - udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len); + udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len); } else if (!epos.bh) { iinfo->i_lenAlloc = lenalloc; mark_inode_dirty(inode); -- cgit v1.2.3 From 952fc18ef9ec707ebdc16c0786ec360295e5ff15 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 9 Jul 2012 16:27:05 -0400 Subject: ext4: fix overhead calculation used by ext4_statfs() Commit f975d6bcc7a introduced bug which caused ext4_statfs() to miscalculate the number of file system overhead blocks. This causes the f_blocks field in the statfs structure to be larger than it should be. This would in turn cause the "df" output to show the number of data blocks in the file system and the number of data blocks used to be larger than they should be. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/bitmap.c | 4 -- fs/ext4/ext4.h | 4 +- fs/ext4/resize.c | 7 ++- fs/ext4/super.c | 174 +++++++++++++++++++++++++++++++++++++++---------------- 4 files changed, 132 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 7e86a6d28c64..a94b9c63ee5c 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -11,8 +11,6 @@ #include #include "ext4.h" -#ifdef EXT4FS_DEBUG - static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; unsigned int ext4_count_free(char *bitmap, unsigned int numchars) @@ -25,8 +23,6 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars) return sum; } -#endif /* EXT4FS_DEBUG */ - int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 293fa1ced21b..01434f25917d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1161,8 +1161,7 @@ struct ext4_sb_info { unsigned long s_desc_per_block; /* Number of group descriptors per block */ ext4_group_t s_groups_count; /* Number of groups in the fs */ ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ + unsigned long s_overhead; /* # of fs overhead clusters */ unsigned int s_cluster_ratio; /* Number of blocks per cluster */ unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ @@ -2037,6 +2036,7 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern int ext4_calculate_overhead(struct super_block *sb); extern int ext4_superblock_csum_verify(struct super_block *sb, struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7ea6cbb44121..17d38de4068c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1197,7 +1197,7 @@ static void ext4_update_super(struct super_block *sb, struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int i; + int i, ret; BUG_ON(flex_gd->count == 0 || group_data == NULL); /* @@ -1272,6 +1272,11 @@ static void ext4_update_super(struct super_block *sb, &sbi->s_flex_groups[flex_group].free_inodes); } + /* + * Update the fs overhead information + */ + ext4_calculate_overhead(sb); + if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index eb7aa3e4ef05..78b7ede2efa0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3085,6 +3085,114 @@ static int set_journal_csum_feature_set(struct super_block *sb) return ret; } +/* + * Note: calculating the overhead so we can be compatible with + * historical BSD practice is quite difficult in the face of + * clusters/bigalloc. This is because multiple metadata blocks from + * different block group can end up in the same allocation cluster. + * Calculating the exact overhead in the face of clustered allocation + * requires either O(all block bitmaps) in memory or O(number of block + * groups**2) in time. We will still calculate the superblock for + * older file systems --- and if we come across with a bigalloc file + * system with zero in s_overhead_clusters the estimate will be close to + * correct especially for very large cluster sizes --- but for newer + * file systems, it's better to calculate this figure once at mkfs + * time, and store it in the superblock. If the superblock value is + * present (even for non-bigalloc file systems), we will use it. + */ +static int count_overhead(struct super_block *sb, ext4_group_t grp, + char *buf) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp; + ext4_fsblk_t first_block, last_block, b; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); + int s, j, count = 0; + + first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + + (grp * EXT4_BLOCKS_PER_GROUP(sb)); + last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + b = ext4_block_bitmap(sb, gdp); + if (b >= first_block && b <= last_block) { + ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); + count++; + } + b = ext4_inode_bitmap(sb, gdp); + if (b >= first_block && b <= last_block) { + ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); + count++; + } + b = ext4_inode_table(sb, gdp); + if (b >= first_block && b + sbi->s_itb_per_group <= last_block) + for (j = 0; j < sbi->s_itb_per_group; j++, b++) { + int c = EXT4_B2C(sbi, b - first_block); + ext4_set_bit(c, buf); + count++; + } + if (i != grp) + continue; + s = 0; + if (ext4_bg_has_super(sb, grp)) { + ext4_set_bit(s++, buf); + count++; + } + for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { + ext4_set_bit(EXT4_B2C(sbi, s++), buf); + count++; + } + } + if (!count) + return 0; + return EXT4_CLUSTERS_PER_GROUP(sb) - + ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); +} + +/* + * Compute the overhead and stash it in sbi->s_overhead + */ +int ext4_calculate_overhead(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); + ext4_fsblk_t overhead = 0; + char *buf = (char *) get_zeroed_page(GFP_KERNEL); + + memset(buf, 0, PAGE_SIZE); + if (!buf) + return -ENOMEM; + + /* + * Compute the overhead (FS structures). This is constant + * for a given filesystem unless the number of block groups + * changes so we cache the previous value until it does. + */ + + /* + * All of the blocks before first_data_block are overhead + */ + overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); + + /* + * Add the overhead found in each block group + */ + for (i = 0; i < ngroups; i++) { + int blks; + + blks = count_overhead(sb, i, buf); + overhead += blks; + if (blks) + memset(buf, 0, PAGE_SIZE); + cond_resched(); + } + sbi->s_overhead = overhead; + smp_wmb(); + free_page((unsigned long) buf); + return 0; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { char *orig_data = kstrdup(data, GFP_KERNEL); @@ -3734,6 +3842,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); no_journal: + /* + * Get the # of file system overhead blocks from the + * superblock if present. + */ + if (es->s_overhead_clusters) + sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); + else { + ret = ext4_calculate_overhead(sb); + if (ret) + goto failed_mount_wq; + } + /* * The maximum number of concurrent works can be high and * concurrency isn't really necessary. Limit it to 1. @@ -4600,67 +4720,21 @@ restore_opts: return err; } -/* - * Note: calculating the overhead so we can be compatible with - * historical BSD practice is quite difficult in the face of - * clusters/bigalloc. This is because multiple metadata blocks from - * different block group can end up in the same allocation cluster. - * Calculating the exact overhead in the face of clustered allocation - * requires either O(all block bitmaps) in memory or O(number of block - * groups**2) in time. We will still calculate the superblock for - * older file systems --- and if we come across with a bigalloc file - * system with zero in s_overhead_clusters the estimate will be close to - * correct especially for very large cluster sizes --- but for newer - * file systems, it's better to calculate this figure once at mkfs - * time, and store it in the superblock. If the superblock value is - * present (even for non-bigalloc file systems), we will use it. - */ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - struct ext4_group_desc *gdp; + ext4_fsblk_t overhead = 0; u64 fsid; s64 bfree; - if (test_opt(sb, MINIX_DF)) { - sbi->s_overhead_last = 0; - } else if (es->s_overhead_clusters) { - sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters); - } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t i, ngroups = ext4_get_groups_count(sb); - ext4_fsblk_t overhead = 0; - - /* - * Compute the overhead (FS structures). This is constant - * for a given filesystem unless the number of block groups - * changes so we cache the previous value until it does. - */ - - /* - * All of the blocks before first_data_block are - * overhead - */ - overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); - - /* - * Add the overhead found in each block group - */ - for (i = 0; i < ngroups; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); - overhead += ext4_num_overhead_clusters(sb, i, gdp); - cond_resched(); - } - sbi->s_overhead_last = overhead; - smp_wmb(); - sbi->s_blocks_last = ext4_blocks_count(es); - } + if (!test_opt(sb, MINIX_DF)) + overhead = sbi->s_overhead; buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = (ext4_blocks_count(es) - - EXT4_C2B(sbi, sbi->s_overhead_last)); + buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead); bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); /* prevent underflow in case that few free space is available */ -- cgit v1.2.3 From ef58f69c3c34f6377f1e21d3533c806dbd980ad0 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 9 Jul 2012 16:29:05 -0400 Subject: ext4: use proper csum calculation in ext4_rename In ext4_rename, when the old name is a dir, we need to change ".." to its new parent and journal the change, so with metadata_csum enabled, we have to re-calc the csum. As the first block of the dir can be either a htree root or a normal directory block and we have different csum calculation for these 2 types, we have to choose the right one in ext4_rename. btw, it is found by xfstests 013. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Acked-by: Darrick J. Wong --- fs/ext4/namei.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5845cd97bf8b..0edaf18d843e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2918,8 +2918,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); - retval = ext4_handle_dirty_dirent_node(handle, old_inode, - dir_bh); + if (is_dx(old_inode)) { + retval = ext4_handle_dirty_dx_node(handle, + old_inode, + dir_bh); + } else { + retval = ext4_handle_dirty_dirent_node(handle, + old_inode, + dir_bh); + } if (retval) { ext4_std_error(old_dir->i_sb, retval); goto end_rename; -- cgit v1.2.3 From 41eb70dde42b2360074a559a6f1fc49860a50179 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 9 Jul 2012 16:29:27 -0400 Subject: ext4: use s_csum_seed instead of i_csum_seed for xattr block In xattr block operation, we use h_refcount to indicate whether the xattr block is shared among many inodes. And xattr block csum uses s_csum_seed if it is shared and i_csum_seed if it belongs to one inode. But this has a problem. So consider the block is shared first bewteen inode A and B, and B has some xattr update and CoW the xattr block. When it updates the *old* xattr block(because of the h_refcount change) and calls ext4_xattr_release_block, we has no idea that inode A is the real owner of the *old* xattr block and we can't use the i_csum_seed of inode A either in xattr block csum calculation. And I don't think we have an easy way to find inode A. So this patch just removes the tricky i_csum_seed and we now uses s_csum_seed every time for the xattr block csum. The corresponding patch for the e2fsprogs will be sent in another patch. This is spotted by xfstests 117. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Acked-by: Darrick J. Wong --- fs/ext4/xattr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e56c9ed7d6e3..2cdb98d62980 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -127,19 +127,16 @@ static __le32 ext4_xattr_block_csum(struct inode *inode, struct ext4_xattr_header *hdr) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum, old; old = hdr->h_checksum; hdr->h_checksum = 0; - if (le32_to_cpu(hdr->h_refcount) != 1) { - block_nr = cpu_to_le64(block_nr); - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, - sizeof(block_nr)); - } else - csum = ei->i_csum_seed; + block_nr = cpu_to_le64(block_nr); + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, + sizeof(block_nr)); csum = ext4_chksum(sbi, csum, (__u8 *)hdr, EXT4_BLOCK_SIZE(inode->i_sb)); + hdr->h_checksum = old; return cpu_to_le32(csum); } -- cgit v1.2.3 From e7bcf8230498b9568e09d74e296e71a01e024006 Mon Sep 17 00:00:00 2001 From: HaiboLiu Date: Mon, 9 Jul 2012 16:29:28 -0400 Subject: ext4: fix out-of-date comments in extents.c In this patch, ext4_ext_try_to_merge has been change to merge an extent both left and right. So we need to update the comment in here. Signed-off-by: HaiboLiu Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1089cba913a..46b5c9fdc96a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1891,11 +1891,10 @@ has_space: nearex->ee_len = newext->ee_len; merge: - /* try to merge extents to the right */ + /* try to merge extents */ if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(inode, path, nearex); - /* try to merge extents to the left */ /* time to correct all indexes above */ err = ext4_ext_correct_indexes(handle, inode, path); -- cgit v1.2.3 From 62a1391ddd6fbe82fc02154dc760bcc5cbc9ef68 Mon Sep 17 00:00:00 2001 From: Haibo Liu Date: Mon, 9 Jul 2012 16:29:28 -0400 Subject: ext4: remove an unused statement in ext4_mb_get_buddy_page_lock() In this patch, the statement "poff = block % blocks_per_page" in ext4_mb_get_buddy_page_lock has no effect. It will be optimized out by the compiler, but it's better to remove it. Signed-off-by: Haibo Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9f1e655979b9..ca376e7d716a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -969,7 +969,6 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, block++; pnum = block / blocks_per_page; - poff = block % blocks_per_page; page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); if (!page) return -EIO; -- cgit v1.2.3 From fbe104942d3ff44f6802e8e4a3fbf267c1fb9ac4 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 9 Jul 2012 16:29:29 -0400 Subject: ext4: split ext4_file_write into buffered IO and direct IO ext4_file_dio_write is defined in order to split buffered IO and direct IO in ext4. This patch just refactor some stuff in write path. CC: Tao Ma CC: Eric Sandeen CC: Robin Dong Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/file.c | 60 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8c7642a00054..a10dc7742aec 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -90,34 +90,16 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, } static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int unaligned_aio = 0; ssize_t ret; - /* - * If we have encountered a bitmap-format file, the size limit - * is smaller than s_maxbytes, which is for extent-mapped files. - */ - - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - size_t length = iov_length(iov, nr_segs); - - if ((pos > sbi->s_bitmap_maxbytes || - (pos == sbi->s_bitmap_maxbytes && length > 0))) - return -EFBIG; - - if (pos + length > sbi->s_bitmap_maxbytes) { - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, - sbi->s_bitmap_maxbytes - pos); - } - } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && - !is_sync_kiocb(iocb))) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + !is_sync_kiocb(iocb)) unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); - } /* Unaligned direct AIO must be serialized; see comment above */ if (unaligned_aio) { @@ -141,6 +123,40 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, return ret; } +static ssize_t +ext4_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + ssize_t ret; + + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. + */ + + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + size_t length = iov_length(iov, nr_segs); + + if ((pos > sbi->s_bitmap_maxbytes || + (pos == sbi->s_bitmap_maxbytes && length > 0))) + return -EFBIG; + + if (pos + length > sbi->s_bitmap_maxbytes) { + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, + sbi->s_bitmap_maxbytes - pos); + } + } + + if (unlikely(iocb->ki_filp->f_flags & O_DIRECT)) + ret = ext4_file_dio_write(iocb, iov, nr_segs, pos); + else + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + + return ret; +} + static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ext4_page_mkwrite, -- cgit v1.2.3 From 729f52c6be51013c9268e5fc85acbc1091286fdb Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 9 Jul 2012 16:29:29 -0400 Subject: ext4: add a new nolock flag in ext4_map_blocks EXT4_GET_BLOCKS_NO_LOCK flag is added to indicate that we don't need to acquire i_data_sem lock in ext4_map_blocks. Meanwhile, it changes ext4_get_block() to not start a new journal because when we do a overwrite dio, there is no any metadata that needs to be modified. We define a new function called ext4_get_block_write_nolock, which is used in dio overwrite nolock. In this function, it doesn't try to acquire i_data_sem lock and doesn't start a new journal as it does a lookup. CC: Tao Ma CC: Eric Sandeen CC: Robin Dong Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/inode.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 01434f25917d..4a49f8225d0b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -571,6 +571,8 @@ enum { #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 /* Request will not result in inode size update (user for fallocate) */ #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 + /* Do not take i_data_sem locking in ext4_map_blocks */ +#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 /* * Flags used by ext4_free_blocks diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 02bc8cbe7281..76cb3b1ad78a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -544,7 +544,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * Try to see if we can get the block without requesting a new * file system block. */ - down_read((&EXT4_I(inode)->i_data_sem)); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + down_read((&EXT4_I(inode)->i_data_sem)); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -552,7 +553,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, retval = ext4_ind_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); } - up_read((&EXT4_I(inode)->i_data_sem)); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + up_read((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, map); @@ -2818,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, EXT4_GET_BLOCKS_IO_CREATE_EXT); } +static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int flags) +{ + handle_t *handle = ext4_journal_current_handle(); + struct ext4_map_blocks map; + int ret = 0; + + ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n", + inode->i_ino, flags); + + flags = EXT4_GET_BLOCKS_NO_LOCK; + + map.m_lblk = iblock; + map.m_len = bh_result->b_size >> inode->i_blkbits; + + ret = ext4_map_blocks(handle, inode, &map, flags); + if (ret > 0) { + map_bh(bh_result, inode->i_sb, map.m_pblk); + bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | + map.m_flags; + bh_result->b_size = inode->i_sb->s_blocksize * map.m_len; + ret = 0; + } + return ret; +} + static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private, int ret, bool is_async) @@ -2966,6 +2994,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, loff_t final_size = offset + count; if (rw == WRITE && final_size <= inode->i_size) { + int overwrite = 0; + /* * We could direct write to holes and fallocate. * @@ -3005,13 +3035,22 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, EXT4_I(inode)->cur_aio_dio = iocb->private; } - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block_write, - ext4_end_io_dio, - NULL, - DIO_LOCKING); + if (overwrite) + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block_write_nolock, + ext4_end_io_dio, + NULL, + 0); + else + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block_write, + ext4_end_io_dio, + NULL, + DIO_LOCKING); if (iocb->private) EXT4_I(inode)->cur_aio_dio = NULL; /* @@ -3031,7 +3070,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { ext4_free_io_end(iocb->private); iocb->private = NULL; - } else if (ret > 0 && ext4_test_inode_state(inode, + } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN)) { int err; /* -- cgit v1.2.3 From 349ecd6a3c0e4f97fa4dc6bd3917455ccc106d23 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 9 Jul 2012 23:38:36 +0200 Subject: jbd: Check return value of blkdev_issue_flush() blkdev_issue_flush() can fail. Make sure the error gets properly propagated. Signed-off-by: Jan Kara --- fs/jbd/recovery.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 008bf062fd26..a748fe21465a 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -265,8 +265,11 @@ int journal_recover(journal_t *journal) if (!err) err = err2; /* Flush disk caches to get replayed data on the permanent storage */ - if (journal->j_flags & JFS_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (journal->j_flags & JFS_BARRIER) { + err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (!err) + err = err2; + } return err; } -- cgit v1.2.3 From 44f4f729e7a143b08bd63c33cb78b3181d9f4716 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 9 Jul 2012 23:40:46 +0200 Subject: ext3: Check return value of blkdev_issue_flush() blkdev_issue_flush() can fail. Make sure the error gets properly propagated. Signed-off-by: Jan Kara --- fs/ext3/fsync.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d4dff278cbd8..b31dbd4c46ad 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -92,8 +92,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * disk caches manually so that data really is on persistent * storage */ - if (needs_barrier) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (needs_barrier) { + int err; + + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (!ret) + ret = err; + } out: trace_ext3_sync_file_exit(inode, ret); return ret; -- cgit v1.2.3 From cf5388307a2b4faab4b11d732b61c85741be6169 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 4 Jul 2012 15:42:48 +0200 Subject: Btrfs: fix buffer leak in btrfs_next_old_leaf When calling btrfs_next_old_leaf, we were leaking an extent buffer in the rare case of using the deadlock avoidance code needed for the tree mod log. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8206b3900587..67fe46fdee6f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5127,6 +5127,7 @@ again: * locked. To solve this situation, we give up * on our lock and cycle. */ + free_extent_buffer(next); btrfs_release_path(path); cond_resched(); goto again; -- cgit v1.2.3 From 097b8a7c9e48e2cb50fd0eb9315791921beaf484 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 21 Jun 2012 11:08:04 +0200 Subject: Btrfs: join tree mod log code with the code holding back delayed refs We've got two mechanisms both required for reliable backref resolving (tree mod log and holding back delayed refs). You cannot make use of one without the other. So instead of requiring the user of this mechanism to setup both correctly, we join them into a single interface. Additionally, we stop inserting non-blockers into fs_info->tree_mod_seq_list as we did before, which was of no value. Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 30 ++---- fs/btrfs/backref.h | 3 +- fs/btrfs/ctree.c | 275 ++++++++++++++++++++++++++++++------------------- fs/btrfs/ctree.h | 31 +++--- fs/btrfs/delayed-ref.c | 44 ++++---- fs/btrfs/delayed-ref.h | 49 +-------- fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 21 ++-- fs/btrfs/transaction.c | 4 - 9 files changed, 240 insertions(+), 219 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a383c18e74e8..7d80ddd8f544 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist *refs, struct ulist *roots, - const u64 *extent_item_pos) + u64 time_seq, struct ulist *refs, + struct ulist *roots, const u64 *extent_item_pos) { struct btrfs_key key; struct btrfs_path *path; @@ -837,7 +836,7 @@ again: btrfs_put_delayed_ref(&head->node); goto again; } - ret = __add_delayed_refs(head, delayed_ref_seq, + ret = __add_delayed_refs(head, time_seq, &prefs_delayed); mutex_unlock(&head->mutex); if (ret) { @@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks) */ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **leafs, + u64 time_seq, struct ulist **leafs, const u64 *extent_item_pos) { struct ulist *tmp; @@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, return -ENOMEM; } - ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, + ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, *leafs, tmp, extent_item_pos); ulist_free(tmp); @@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, */ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **roots) + u64 time_seq, struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; @@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, ULIST_ITER_INIT(&uiter); while (1) { - ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, + ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, tmp, *roots, NULL); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); @@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; - struct seq_list seq_elem = {}; struct seq_list tree_mod_seq_elem = {}; struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; - struct btrfs_delayed_ref_root *delayed_refs = NULL; pr_debug("resolving all inodes for extent %llu\n", extent_item_objectid); @@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return PTR_ERR(trans); - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - btrfs_get_delayed_seq(delayed_refs, &seq_elem); - spin_unlock(&delayed_refs->lock); btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - seq_elem.seq, tree_mod_seq_elem.seq, &refs, + tree_mod_seq_elem.seq, &refs, &extent_item_pos); if (ret) goto out; @@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, - seq_elem.seq, - tree_mod_seq_elem.seq, &roots); + tree_mod_seq_elem.seq, &roots); if (ret) break; ULIST_ITER_INIT(&root_uiter); @@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, out: if (!search_commit_root) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); - btrfs_put_delayed_seq(delayed_refs, &seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index c18d8ac7b795..3a1ad3e2dcb0 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **roots); + u64 time_seq, struct ulist **roots); struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 67fe46fdee6f..bef68ab32204 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -321,7 +321,7 @@ struct tree_mod_root { struct tree_mod_elem { struct rb_node node; u64 index; /* shifted logical */ - struct seq_list elem; + u64 seq; enum mod_log_op op; /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ @@ -341,20 +341,50 @@ struct tree_mod_elem { struct tree_mod_root old_root; }; -static inline void -__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) +static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) { - elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); - list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); + read_lock(&fs_info->tree_mod_log_lock); } -void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem) +static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) +{ + read_unlock(&fs_info->tree_mod_log_lock); +} + +static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) +{ + write_lock(&fs_info->tree_mod_log_lock); +} + +static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) { - elem->flags = 1; + write_unlock(&fs_info->tree_mod_log_lock); +} + +/* + * This adds a new blocker to the tree mod log's blocker list if the @elem + * passed does not already have a sequence number set. So when a caller expects + * to record tree modifications, it should ensure to set elem->seq to zero + * before calling btrfs_get_tree_mod_seq. + * Returns a fresh, unused tree log modification sequence number, even if no new + * blocker was added. + */ +u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem) +{ + u64 seq; + + tree_mod_log_write_lock(fs_info); spin_lock(&fs_info->tree_mod_seq_lock); - __get_tree_mod_seq(fs_info, elem); + if (!elem->seq) { + elem->seq = btrfs_inc_tree_mod_seq(fs_info); + list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); + } + seq = btrfs_inc_tree_mod_seq(fs_info); spin_unlock(&fs_info->tree_mod_seq_lock); + tree_mod_log_write_unlock(fs_info); + + return seq; } void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, @@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; - BUG_ON(!(elem->flags & 1)); spin_lock(&fs_info->tree_mod_seq_lock); list_del(&elem->list); + elem->seq = 0; list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { - if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { + if (cur_elem->seq < min_seq) { if (seq_putting > cur_elem->seq) { /* * blocker with lower sequence number exists, we * cannot remove anything from the log */ - goto out; + spin_unlock(&fs_info->tree_mod_seq_lock); + return; } min_seq = cur_elem->seq; } } + spin_unlock(&fs_info->tree_mod_seq_lock); + + /* + * we removed the lowest blocker from the blocker list, so there may be + * more processible delayed refs. + */ + wake_up(&fs_info->tree_mod_seq_wait); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - write_lock(&fs_info->tree_mod_log_lock); + tree_mod_log_write_lock(fs_info); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); tm = container_of(node, struct tree_mod_elem, node); - if (tm->elem.seq > min_seq) + if (tm->seq > min_seq) continue; rb_erase(node, tm_root); - list_del(&tm->elem.list); kfree(tm); } - write_unlock(&fs_info->tree_mod_log_lock); -out: - spin_unlock(&fs_info->tree_mod_seq_lock); + tree_mod_log_write_unlock(fs_info); } /* @@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; - int ret = 0; - BUG_ON(!tm || !tm->elem.seq); + BUG_ON(!tm || !tm->seq); - write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; while (*new) { @@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) new = &((*new)->rb_left); else if (cur->index > tm->index) new = &((*new)->rb_right); - else if (cur->elem.seq < tm->elem.seq) + else if (cur->seq < tm->seq) new = &((*new)->rb_left); - else if (cur->elem.seq > tm->elem.seq) + else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { kfree(tm); - ret = -EEXIST; - goto unlock; + return -EEXIST; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); -unlock: - write_unlock(&fs_info->tree_mod_log_lock); - return ret; + return 0; } +/* + * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it + * returns zero with the tree_mod_log_lock acquired. The caller must hold + * this until all tree mod log insertions are recorded in the rb tree and then + * call tree_mod_log_write_unlock() to release. + */ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { smp_mb(); if (list_empty(&(fs_info)->tree_mod_seq_list)) return 1; - if (!eb) - return 0; - if (btrfs_header_level(eb) == 0) + if (eb && btrfs_header_level(eb) == 0) return 1; + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + /* + * someone emptied the list while we were waiting for the lock. + * we must not add to the list when no blocker exists. + */ + tree_mod_log_write_unlock(fs_info); + return 1; + } + return 0; } /* - * This allocates memory and gets a tree modification sequence number when - * needed. + * This allocates memory and gets a tree modification sequence number. * - * Returns 0 when no sequence number is needed, < 0 on error. - * Returns 1 when a sequence number was added. In this case, - * fs_info->tree_mod_seq_lock was acquired and must be released by the caller - * after inserting into the rb tree. + * Returns <0 on error. + * Returns >0 (the added sequence number) on success. */ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { struct tree_mod_elem *tm; - int seq; - if (tree_mod_dont_log(fs_info, NULL)) - return 0; - - tm = *tm_ret = kzalloc(sizeof(*tm), flags); + /* + * once we switch from spin locks to something different, we should + * honor the flags parameter here. + */ + tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); if (!tm) return -ENOMEM; - tm->elem.flags = 0; - spin_lock(&fs_info->tree_mod_seq_lock); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list, because no blocker exists. items - * are removed from the list only when the existing blocker is - * removed from the list. - */ - kfree(tm); - seq = 0; - spin_unlock(&fs_info->tree_mod_seq_lock); - } else { - __get_tree_mod_seq(fs_info, &tm->elem); - seq = tm->elem.seq; - } - - return seq; + tm->seq = btrfs_inc_tree_mod_seq(fs_info); + return tm->seq; } -static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +static inline int +__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - struct tree_mod_elem *tm; int ret; + struct tree_mod_elem *tm; ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) + if (ret < 0) return ret; tm->index = eb->start >> PAGE_CACHE_SHIFT; @@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); - ret = __tree_mod_log_insert(fs_info, tm); - spin_unlock(&fs_info->tree_mod_seq_lock); + return __tree_mod_log_insert(fs_info, tm); +} + +static noinline int +tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) +{ + int ret; + + if (tree_mod_dont_log(fs_info, eb)) + return 0; + + ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); + + tree_mod_log_write_unlock(fs_info); return ret; } @@ -542,6 +582,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); } +static noinline int +tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op) +{ + return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); +} + static noinline int tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int dst_slot, int src_slot, @@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, return 0; for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, MOD_LOG_KEY_REMOVE_WHILE_MOVING); BUG_ON(ret < 0); } ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; + if (ret < 0) + goto out; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->op = MOD_LOG_MOVE_KEYS; ret = __tree_mod_log_insert(fs_info, tm); - spin_unlock(&fs_info->tree_mod_seq_lock); +out: + tree_mod_log_write_unlock(fs_info); return ret; } +static inline void +__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) +{ + int i; + u32 nritems; + int ret; + + nritems = btrfs_header_nritems(eb); + for (i = nritems - 1; i >= 0; i--) { + ret = tree_mod_log_insert_key_locked(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING); + BUG_ON(ret < 0); + } +} + static noinline int tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, struct extent_buffer *old_root, @@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm; int ret; + if (tree_mod_dont_log(fs_info, NULL)) + return 0; + + __tree_mod_log_free_eb(fs_info, old_root); + ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; + if (ret < 0) + goto out; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->op = MOD_LOG_ROOT_REPLACE; ret = __tree_mod_log_insert(fs_info, tm); - spin_unlock(&fs_info->tree_mod_seq_lock); +out: + tree_mod_log_write_unlock(fs_info); return ret; } @@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct tree_mod_elem *found = NULL; u64 index = start >> PAGE_CACHE_SHIFT; - read_lock(&fs_info->tree_mod_log_lock); + tree_mod_log_read_lock(fs_info); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { @@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, node = node->rb_left; } else if (cur->index > index) { node = node->rb_right; - } else if (cur->elem.seq < min_seq) { + } else if (cur->seq < min_seq) { node = node->rb_left; } else if (!smallest) { /* we want the node with the highest seq */ if (found) - BUG_ON(found->elem.seq > cur->elem.seq); + BUG_ON(found->seq > cur->seq); found = cur; node = node->rb_left; - } else if (cur->elem.seq > min_seq) { + } else if (cur->seq > min_seq) { /* we want the node with the smallest seq */ if (found) - BUG_ON(found->elem.seq < cur->elem.seq); + BUG_ON(found->seq < cur->seq); found = cur; node = node->rb_right; } else { @@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, break; } } - read_unlock(&fs_info->tree_mod_log_lock); + tree_mod_log_read_unlock(fs_info); return found; } @@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) return __tree_mod_log_search(fs_info, start, min_seq, 0); } -static inline void +static noinline void tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, int nr_items) @@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { + tree_mod_log_write_unlock(fs_info); return; + } - /* speed this up by single seq for all operations? */ for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + ret = tree_mod_log_insert_key_locked(fs_info, src, + i + src_offset, + MOD_LOG_KEY_REMOVE); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + ret = tree_mod_log_insert_key_locked(fs_info, dst, + i + dst_offset, + MOD_LOG_KEY_ADD); BUG_ON(ret < 0); } + + tree_mod_log_write_unlock(fs_info); } static inline void @@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, BUG_ON(ret < 0); } -static inline void +static noinline void tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int slot, int atomic) @@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, BUG_ON(ret < 0); } -static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) +static noinline void +tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { - int i; - int ret; - u32 nritems; - if (tree_mod_dont_log(fs_info, eb)) return; - nritems = btrfs_header_nritems(eb); - for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); - BUG_ON(ret < 0); - } + __tree_mod_log_free_eb(fs_info, eb); + + tree_mod_log_write_unlock(fs_info); } -static inline void +static noinline void tree_mod_log_set_root_pointer(struct btrfs_root *root, struct extent_buffer *new_root_node) { int ret; - tree_mod_log_free_eb(root->fs_info, root->node); ret = tree_mod_log_insert_root(root->fs_info, root->node, new_root_node, GFP_NOFS); BUG_ON(ret < 0); @@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); - while (tm && tm->elem.seq >= time_seq) { + while (tm && tm->seq >= time_seq) { /* * all the operations are recorded with the operator used for * the modification. as we're going backwards, we do the diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84ac723f58f8..8f8dc46f44e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1030,6 +1030,13 @@ struct btrfs_block_group_cache { struct list_head cluster_list; }; +/* delayed seq elem */ +struct seq_list { + struct list_head list; + u64 seq; +}; + +/* fs_info */ struct reloc_control; struct btrfs_device; struct btrfs_fs_devices; @@ -1144,6 +1151,8 @@ struct btrfs_fs_info { spinlock_t tree_mod_seq_lock; atomic_t tree_mod_seq; struct list_head tree_mod_seq_list; + struct seq_list tree_mod_seq_elem; + wait_queue_head_t tree_mod_seq_wait; /* this protects tree_mod_log */ rwlock_t tree_mod_log_lock; @@ -2798,6 +2807,16 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info); } +/* tree mod log functions from ctree.c */ +u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); +void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); +static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) +{ + return atomic_inc_return(&fs_info->tree_mod_seq); +} + /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, @@ -3157,18 +3176,6 @@ void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); -/* delayed seq elem */ -struct seq_list { - struct list_head list; - u64 seq; - u32 flags; -}; - -void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); - static inline int is_fstree(u64 rootid) { if (rootid == BTRFS_FS_TREE_OBJECTID || diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 13ae7b04790e..21a757717637 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } -int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, +int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, u64 seq) { struct seq_list *elem; - - assert_spin_locked(&delayed_refs->lock); - if (list_empty(&delayed_refs->seq_head)) - return 0; - - elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); - if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", - seq, elem->seq, delayed_refs); - return 1; + int ret = 0; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + if (seq >= elem->seq) { + pr_debug("holding back delayed_ref %llu, lowest is " + "%llu (%p)\n", seq, elem->seq, delayed_refs); + ret = 1; + } } - return 0; + + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, @@ -526,7 +530,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->in_tree = 1; if (is_fstree(ref_root)) - seq = inc_delayed_seq(delayed_refs); + seq = btrfs_inc_tree_mod_seq(fs_info); ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -585,7 +589,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->in_tree = 1; if (is_fstree(ref_root)) - seq = inc_delayed_seq(delayed_refs); + seq = btrfs_inc_tree_mod_seq(fs_info); ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -659,8 +663,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, num_bytes, parent, ref_root, level, action, for_cow); if (!is_fstree(ref_root) && - waitqueue_active(&delayed_refs->seq_wait)) - wake_up(&delayed_refs->seq_wait); + waitqueue_active(&fs_info->tree_mod_seq_wait)) + wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; @@ -708,8 +712,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, num_bytes, parent, ref_root, owner, offset, action, for_cow); if (!is_fstree(ref_root) && - waitqueue_active(&delayed_refs->seq_wait)) - wake_up(&delayed_refs->seq_wait); + waitqueue_active(&fs_info->tree_mod_seq_wait)) + wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; @@ -736,8 +740,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); - if (waitqueue_active(&delayed_refs->seq_wait)) - wake_up(&delayed_refs->seq_wait); + if (waitqueue_active(&fs_info->tree_mod_seq_wait)) + wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 413927fb9957..2b5cb27f9861 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root { int flushing; u64 run_delayed_start; - - /* - * seq number of delayed refs. We need to know if a backref was being - * added before the currently processed ref or afterwards. - */ - u64 seq; - - /* - * seq_list holds a list of all seq numbers that are currently being - * added to the list. While walking backrefs (btrfs_find_all_roots, - * qgroups), which might take some time, no newer ref must be processed, - * as it might influence the outcome of the walk. - */ - struct list_head seq_head; - - /* - * when the only refs we have in the list must not be processed, we want - * to wait for more refs to show up or for the end of backref walking. - */ - wait_queue_head_t seq_wait; }; static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) @@ -195,33 +175,8 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, struct list_head *cluster, u64 search_start); -static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) -{ - assert_spin_locked(&delayed_refs->lock); - ++delayed_refs->seq; - return delayed_refs->seq; -} - -static inline void -btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, - struct seq_list *elem) -{ - assert_spin_locked(&delayed_refs->lock); - elem->seq = delayed_refs->seq; - list_add_tail(&elem->list, &delayed_refs->seq_head); -} - -static inline void -btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, - struct seq_list *elem) -{ - spin_lock(&delayed_refs->lock); - list_del(&elem->list); - wake_up(&delayed_refs->seq_wait); - spin_unlock(&delayed_refs->lock); -} - -int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, +int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, u64 seq); /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8cc47103a32e..19a39e10d6f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1944,6 +1944,8 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; + init_waitqueue_head(&fs_info->tree_mod_seq_wait); + /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1d36702ff7..94ce79f76e5f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2217,6 +2217,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_head *locked_ref = NULL; struct btrfs_delayed_extent_op *extent_op; + struct btrfs_fs_info *fs_info = root->fs_info; int ret; int count = 0; int must_insert_reserved = 0; @@ -2255,7 +2256,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref = select_delayed_ref(locked_ref); if (ref && ref->seq && - btrfs_check_delayed_seq(delayed_refs, ref->seq)) { + btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { /* * there are still refs with lower seq numbers in the * process of being added. Don't run this ref yet. @@ -2337,7 +2338,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } next: - do_chunk_alloc(trans, root->fs_info->extent_root, + do_chunk_alloc(trans, fs_info->extent_root, 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); @@ -2347,18 +2348,19 @@ next: return count; } -static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, +static void wait_for_more_refs(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, unsigned long num_refs, struct list_head *first_seq) { spin_unlock(&delayed_refs->lock); pr_debug("waiting for more refs (num %ld, first %p)\n", num_refs, first_seq); - wait_event(delayed_refs->seq_wait, + wait_event(fs_info->tree_mod_seq_wait, num_refs != delayed_refs->num_entries || - delayed_refs->seq_head.next != first_seq); + fs_info->tree_mod_seq_list.next != first_seq); pr_debug("done waiting for more refs (num %ld, first %p)\n", - delayed_refs->num_entries, delayed_refs->seq_head.next); + delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); spin_lock(&delayed_refs->lock); } @@ -2403,6 +2405,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, again: consider_waiting = 0; spin_lock(&delayed_refs->lock); + if (count == 0) { count = delayed_refs->num_entries * 2; run_most = 1; @@ -2437,7 +2440,7 @@ again: num_refs = delayed_refs->num_entries; first_seq = root->fs_info->tree_mod_seq_list.next; } else { - wait_for_more_refs(delayed_refs, + wait_for_more_refs(root->fs_info, delayed_refs, num_refs, first_seq); /* * after waiting, things have changed. we @@ -5190,8 +5193,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; - if (waitqueue_active(&delayed_refs->seq_wait)) - wake_up(&delayed_refs->seq_wait); + if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) + wake_up(&root->fs_info->tree_mod_seq_wait); /* * we don't take a ref on the node because we're removing it from the diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b72b068183ec..621c8dc48fb6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction) if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); WARN_ON(transaction->delayed_refs.root.rb_node); - WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -126,7 +125,6 @@ loop: cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; - cur_trans->delayed_refs.seq = 1; /* * although the tree mod log is per file system and not per transaction, @@ -145,10 +143,8 @@ loop: } atomic_set(&fs_info->tree_mod_seq, 0); - init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); - INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &fs_info->trans_list); -- cgit v1.2.3 From 630dc772ea51bca3ec6fac609f450cbe0cafd1d6 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 11:06:07 +0200 Subject: Btrfs: qgroup on-disk format Not all features are in use by the current version and thus may change in the future. Signed-off-by: Arne Jansen --- fs/btrfs/ctree.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f8dc46f44e7..33088b0dbf3f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -91,6 +91,9 @@ struct btrfs_ordered_sum; /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL +/* holds quota configuration and tracking */ +#define BTRFS_QUOTA_TREE_OBJECTID 8ULL + /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL @@ -883,6 +886,72 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +/* + * is subvolume quota turned on? + */ +#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) +/* + * SCANNING is set during the initialization phase + */ +#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) +/* + * Some qgroup entries are known to be out of date, + * either because the configuration has changed in a way that + * makes a rescan necessary, or because the fs has been mounted + * with a non-qgroup-aware version. + * Turning qouta off and on again makes it inconsistent, too. + */ +#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) + +#define BTRFS_QGROUP_STATUS_VERSION 1 + +struct btrfs_qgroup_status_item { + __le64 version; + /* + * the generation is updated during every commit. As older + * versions of btrfs are not aware of qgroups, it will be + * possible to detect inconsistencies by checking the + * generation on mount time + */ + __le64 generation; + + /* flag definitions see above */ + __le64 flags; + + /* + * only used during scanning to record the progress + * of the scan. It contains a logical address + */ + __le64 scan; +} __attribute__ ((__packed__)); + +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 rfer; + __le64 rfer_cmpr; + __le64 excl; + __le64 excl_cmpr; +} __attribute__ ((__packed__)); + +/* flags definition for qgroup limits */ +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) + +struct btrfs_qgroup_limit_item { + /* + * only updated when any of the other values change + */ + __le64 flags; + __le64 max_rfer; + __le64 max_excl; + __le64 rsv_rfer; + __le64 rsv_excl; +} __attribute__ ((__packed__)); + struct btrfs_space_info { u64 flags; @@ -1534,6 +1603,30 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 +/* + * Records the overall state of the qgroups. + * There's only one instance of this key present, + * (0, BTRFS_QGROUP_STATUS_KEY, 0) + */ +#define BTRFS_QGROUP_STATUS_KEY 240 +/* + * Records the currently used space of the qgroup. + * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). + */ +#define BTRFS_QGROUP_INFO_KEY 242 +/* + * Contains the user configured limits for the qgroup. + * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). + */ +#define BTRFS_QGROUP_LIMIT_KEY 244 +/* + * Records the child-parent relationship of qgroups. For + * each relation, 2 keys are present: + * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) + * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) + */ +#define BTRFS_QGROUP_RELATION_KEY 246 + #define BTRFS_BALANCE_ITEM_KEY 248 /* @@ -2474,6 +2567,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, sizeof(val)); } +/* btrfs_qgroup_status_item */ +BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, + version, 64); +BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, + scan, 64); + +/* btrfs_qgroup_info_item */ +BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, + rfer_cmpr, 64); +BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); +BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, + excl_cmpr, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, + struct btrfs_qgroup_info_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, + rfer, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, + struct btrfs_qgroup_info_item, rfer_cmpr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, + excl, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, + struct btrfs_qgroup_info_item, excl_cmpr, 64); + +/* btrfs_qgroup_limit_item */ +BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, + max_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, + max_excl, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, + rsv_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, + rsv_excl, 64); + static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; -- cgit v1.2.3 From 2f38b3e1900634e64a186873b3388b1bf85dabc0 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 11:18:10 +0200 Subject: Btrfs: add helper for tree enumeration Often no exact match is wanted but just the next lower or higher item. There's a lot of duplicated code throughout btrfs to deal with the corner cases. This patch adds a helper function that can facilitate searching. Signed-off-by: Arne Jansen --- fs/btrfs/ctree.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 3 +++ 2 files changed, 75 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bef68ab32204..fb21431fe4e0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2788,6 +2788,78 @@ done: return ret; } +/* + * helper to use instead of search slot if no exact match is needed but + * instead the next or previous item should be returned. + * When find_higher is true, the next higher item is returned, the next lower + * otherwise. + * When return_any and find_higher are both true, and no higher item is found, + * return the next lower instead. + * When return_any is true and find_higher is false, and no lower item is found, + * return the next higher instead. + * It returns 0 if any item is found, 1 if none is found (tree empty), and + * < 0 on error + */ +int btrfs_search_slot_for_read(struct btrfs_root *root, + struct btrfs_key *key, struct btrfs_path *p, + int find_higher, int return_any) +{ + int ret; + struct extent_buffer *leaf; + +again: + ret = btrfs_search_slot(NULL, root, key, p, 0, 0); + if (ret <= 0) + return ret; + /* + * a return value of 1 means the path is at the position where the + * item should be inserted. Normally this is the next bigger item, + * but in case the previous item is the last in a leaf, path points + * to the first free slot in the previous leaf, i.e. at an invalid + * item. + */ + leaf = p->nodes[0]; + + if (find_higher) { + if (p->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, p); + if (ret <= 0) + return ret; + if (!return_any) + return 1; + /* + * no higher item found, return the next + * lower instead + */ + return_any = 0; + find_higher = 0; + btrfs_release_path(p); + goto again; + } + } else { + if (p->slots[0] >= btrfs_header_nritems(leaf)) { + /* we're sitting on an invalid slot */ + if (p->slots[0] == 0) { + ret = btrfs_prev_leaf(root, p); + if (ret <= 0) + return ret; + if (!return_any) + return 1; + /* + * no lower item found, return the next + * higher instead + */ + return_any = 0; + find_higher = 1; + btrfs_release_path(p); + goto again; + } + --p->slots[0]; + } + } + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 33088b0dbf3f..27cf995564ed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2856,6 +2856,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, u64 time_seq); +int btrfs_search_slot_for_read(struct btrfs_root *root, + struct btrfs_key *key, struct btrfs_path *p, + int find_higher, int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, -- cgit v1.2.3 From d13603ef6e14a12cd65a6975e8117c0fea7c7ddf Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 11:40:09 +0200 Subject: Btrfs: check the root passed to btrfs_end_transaction This patch only add a consistancy check to validate that the same root is passed to start_transaction and end_transaction. Subvolume quota depends on this. Signed-off-by: Arne Jansen --- fs/btrfs/transaction.c | 6 ++++++ fs/btrfs/transaction.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 621c8dc48fb6..23cbda0685b8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -345,6 +345,7 @@ again: h->transaction = cur_trans; h->blocks_used = 0; h->bytes_reserved = 0; + h->root = root; h->delayed_ref_updates = 0; h->use_count = 1; h->block_rsv = NULL; @@ -511,6 +512,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; + /* + * the same root has to be passed to start_transaction and + * end_transaction. Subvolume quota depends on this. + */ + WARN_ON(trans->root != root); while (count < 2) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fe27379e368b..010729446e13 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,6 +57,12 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; int aborted; + /* + * this root is only needed to validate that the root passed to + * start_transaction is the same as the one passed to end_transaction. + * Subvolume quota depends on this + */ + struct btrfs_root *root; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From 20897f5c86b9d2b77baea1d48eda7fa4ac217279 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 12:44:20 +0200 Subject: Btrfs: added helper to create new trees This creates a brand new tree. Will be used to create the quota tree. Signed-off-by: Arne Jansen --- fs/btrfs/disk-io.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 6 +++++ 2 files changed, 83 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 19a39e10d6f5..6fc243eccffa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1225,6 +1225,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } +struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 objectid) +{ + struct extent_buffer *leaf; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *root; + struct btrfs_key key; + int ret = 0; + u64 bytenr; + + root = btrfs_alloc_root(fs_info); + if (!root) + return ERR_PTR(-ENOMEM); + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); + root->root_key.objectid = objectid; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = 0; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, objectid, NULL, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + goto fail; + } + + bytenr = leaf->start; + memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(leaf, objectid); + root->node = leaf; + + write_extent_buffer(leaf, fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + write_extent_buffer(leaf, fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + BTRFS_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + root->commit_root = btrfs_root_node(root); + root->track_dirty = 1; + + + root->root_item.flags = 0; + root->root_item.byte_limit = 0; + btrfs_set_root_bytenr(&root->root_item, leaf->start); + btrfs_set_root_generation(&root->root_item, trans->transid); + btrfs_set_root_level(&root->root_item, 0); + btrfs_set_root_refs(&root->root_item, 1); + btrfs_set_root_used(&root->root_item, leaf->len); + btrfs_set_root_last_snapshot(&root->root_item, 0); + btrfs_set_root_dirid(&root->root_item, 0); + root->root_item.drop_level = 0; + + key.objectid = objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); + if (ret) + goto fail; + + btrfs_tree_unlock(leaf); + +fail: + if (ret) + return ERR_PTR(ret); + + return root; +} + static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -3260,7 +3336,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); } -static int btree_lock_page_hook(struct page *page, void *data, +int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)) { struct inode *inode = page->mapping->host; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 05b3fab39f7e..95e147eea239 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int btrfs_cleanup_transaction(struct btrfs_root *root); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_root *root); +void btrfs_abort_devices(struct btrfs_root *root); +struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 objectid); +int btree_lock_page_hook(struct page *page, void *data, + void (*flush_fn)(void *)); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); -- cgit v1.2.3 From 416ac51da90e98daaac17e1f359a6c5591f7f5bd Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 12:56:09 +0200 Subject: Btrfs: qgroup state and initialization Add state to fs_info. Signed-off-by: Arne Jansen --- fs/btrfs/ctree.h | 24 ++++++++++++++++++++++++ fs/btrfs/disk-io.c | 7 +++++++ 2 files changed, 31 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27cf995564ed..a5269d4a164f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1120,6 +1120,7 @@ struct btrfs_fs_info { struct btrfs_root *dev_root; struct btrfs_root *fs_root; struct btrfs_root *csum_root; + struct btrfs_root *quota_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -1374,6 +1375,29 @@ struct btrfs_fs_info { #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY u32 check_integrity_print_mask; #endif + /* + * quota information + */ + unsigned int quota_enabled:1; + + /* + * quota_enabled only changes state after a commit. This holds the + * next state. + */ + unsigned int pending_quota_state:1; + + /* is qgroup tracking in a consistent state? */ + u64 qgroup_flags; + + /* holds configuration and tracking. Protected by qgroup_lock */ + struct rb_root qgroup_tree; + spinlock_t qgroup_lock; + + /* list of dirty qgroups to be written at next commit */ + struct list_head dirty_qgroups; + + /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ + u64 qgroup_seq; /* filesystem state */ u64 fs_state; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6fc243eccffa..eca054974425 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2110,6 +2110,13 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); + spin_lock_init(&fs_info->qgroup_lock); + fs_info->qgroup_tree = RB_ROOT; + INIT_LIST_HEAD(&fs_info->dirty_qgroups); + fs_info->qgroup_seq = 1; + fs_info->quota_enabled = 0; + fs_info->pending_quota_state = 0; + btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); -- cgit v1.2.3 From 709c0486b9fe9586736b108b7233bbce0300cfa5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 12:22:57 +0200 Subject: Btrfs: Test code to change the order of delayed-ref processing Normally delayed refs get processed in ascending bytenr order. This correlates in most cases to the order added. To expose dependencies on this order, we start to process the tree in the middle instead of the beginning. This code is only effective when SCRAMBLE_DELAYED_REFS is defined. Signed-off-by: Arne Jansen --- fs/btrfs/extent-tree.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 94ce79f76e5f..b13f1fbc3733 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,6 +34,8 @@ #include "locking.h" #include "free-space-cache.h" +#undef SCRAMBLE_DELAYED_REFS + /* * control flags for do_chunk_alloc's force field * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk @@ -2364,6 +2366,49 @@ static void wait_for_more_refs(struct btrfs_fs_info *fs_info, spin_lock(&delayed_refs->lock); } +#ifdef SCRAMBLE_DELAYED_REFS +/* + * Normally delayed refs get processed in ascending bytenr order. This + * correlates in most cases to the order added. To expose dependencies on this + * order, we start to process the tree in the middle instead of the beginning + */ +static u64 find_middle(struct rb_root *root) +{ + struct rb_node *n = root->rb_node; + struct btrfs_delayed_ref_node *entry; + int alt = 1; + u64 middle; + u64 first = 0, last = 0; + + n = rb_first(root); + if (n) { + entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); + first = entry->bytenr; + } + n = rb_last(root); + if (n) { + entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); + last = entry->bytenr; + } + n = root->rb_node; + + while (n) { + entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); + WARN_ON(!entry->in_tree); + + middle = entry->bytenr; + + if (alt) + n = n->rb_left; + else + n = n->rb_right; + + alt = 1 - alt; + } + return middle; +} +#endif + /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be @@ -2406,6 +2451,10 @@ again: consider_waiting = 0; spin_lock(&delayed_refs->lock); +#ifdef SCRAMBLE_DELAYED_REFS + delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); +#endif + if (count == 0) { count = delayed_refs->num_entries * 2; run_most = 1; -- cgit v1.2.3 From 57b9655d01ef057a523e810d29c37ac09b80eead Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Jul 2012 17:58:04 +0200 Subject: udf: Improve table length check to avoid possible overflow When a partition table length is corrupted to be close to 1 << 32, the check for its length may overflow on 32-bit systems and we will think the length is valid. Later on the kernel can crash trying to read beyond end of buffer. Fix the check to avoid possible overflow. CC: stable@vger.kernel.org Reported-by: Ben Hutchings Signed-off-by: Jan Kara --- fs/udf/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 8a7583867811..dcbf98722afc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1340,7 +1340,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); - if (sizeof(*lvd) + table_len > sb->s_blocksize) { + if (table_len > sb->s_blocksize - sizeof(*lvd)) { udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); -- cgit v1.2.3 From 2930d381d22b9c56f40dd4c63a8fa59719ca2c3c Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Tue, 5 Jun 2012 16:52:06 -0400 Subject: nfsd4: our filesystems are normally case sensitive Actually, xfs and jfs can optionally be case insensitive; we'll handle that case in later patches. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4949667c84ea..6322df36031f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2259,7 +2259,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(1); + WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { if ((buflen -= 4) < 0) -- cgit v1.2.3 From 74dbafaf5d84b5187e50dbe82442ec8df66d55b3 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Wed, 6 Jun 2012 12:53:48 -0400 Subject: nfsd4: release openowners on free in >=4.1 case We don't need to keep openowners around in the >=4.1 case, because they aren't needed to handle CLOSE replays any more (that's a problem for sessions). And doing so causes unexpected failures on a subsequent destroy_clientid to fail. We probably also need something comparable for lock owners on last unlock. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9efa4055b5a8..e404fca08260 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3763,12 +3763,19 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); oo->oo_last_closed_stid = stp; - /* place unused nfs4_stateowners on so_close_lru list to be - * released by the laundromat service after the lease period - * to enable us to handle CLOSE replay - */ - if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo); + if (list_empty(&oo->oo_owner.so_stateids)) { + if (cstate->minorversion) { + release_openowner(oo); + cstate->replay_owner = NULL; + } else { + /* + * In the 4.0 case we need to keep the owners around a + * little while to handle CLOSE replay. + */ + if (list_empty(&oo->oo_owner.so_stateids)) + move_to_close_lru(oo); + } + } out: if (!cstate->replay_owner) nfs4_unlock_state(); -- cgit v1.2.3 From d91d0b569044ab366895d587d4811b154dd7d7f5 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Wed, 6 Jun 2012 12:12:57 -0400 Subject: nfsd: allow owner_override only for regular files We normally allow the owner of a file to override permissions checks on IO operations, since: - the client will take responsibility for doing an access check on open; - the permission checks offer no protection against malicious clients--if they can authenticate as the file's owner then they can always just change its permissions; - checking permission on each IO operation breaks the usual posix rule that permission is checked only on open. However, we've never allowed the owner to override permissions on readdir operations, even though the above logic would also apply to directories. I've never heard of this causing a problem, probably because a) simultaneously opening and creating a directory (with restricted mode) isn't possible, and b) opening a directory, then chmod'ing it, is rare. Our disallowal of owner-override on directories appears to be an accident, though--the readdir itself succeeds, and then we fail just because lookup_one_len() calls in our filldir methods fail. I'm not sure what the easiest fix for that would be. For now, just make this behavior obvious by denying the override right at the start. This also fixes some odd v4 behavior: with the rdattr_error attribute requested, it would perform the readdir but return an ACCES error with each entry. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c8bd9c3be7f7..3256b5c324bc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -757,8 +757,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. + * + * Arguably we should also allow the owner override for + * directories, but we never have and it doesn't seem to have + * caused anyone a problem. If we were to change this, note + * also that our filldir callbacks would need a variant of + * lookup_one_len that doesn't check permissions. */ - err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE); + if (type == S_IFREG) + may_flags |= NFSD_MAY_OWNER_OVERRIDE; + err = fh_verify(rqstp, fhp, type, may_flags); if (err) goto out; -- cgit v1.2.3 From 7f2e7dc0fdd9f124da43d1bd12adcebf92bedf16 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Tue, 12 Jun 2012 16:06:29 -0400 Subject: nfsd: share some function prototypes Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1671429ffa66..6d425c2f9fcd 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -72,6 +72,8 @@ int nfsd_nrthreads(void); int nfsd_nrpools(void); int nfsd_get_nrthreads(int n, int *); int nfsd_set_nrthreads(int n, int *); +int nfsd_pool_stats_open(struct inode *, struct file *); +int nfsd_pool_stats_release(struct inode *, struct file *); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3 From 475d0094293b51353e342d1198377967dbc48169 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 11 Jul 2012 15:16:37 +1000 Subject: of: Improve prom_update_property() function prom_update_property() currently fails if the property doesn't actually exist yet which isn't what we want. Change to add-or-update instead of update-only, then we can remove a lot duplicated lines. Suggested-by: Grant Likely Signed-off-by: Dong Aisheng Acked-by: Rob Herring Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/85xx/p1022_ds.c | 8 +------- arch/powerpc/platforms/pseries/mobility.c | 8 +------- arch/powerpc/platforms/pseries/reconfig.c | 16 ++++++---------- drivers/of/base.c | 15 +++++++++++---- fs/proc/proc_devtree.c | 5 +++++ include/linux/of.h | 3 +-- 6 files changed, 25 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 74e310b4b460..31d18b964f94 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -348,13 +348,7 @@ void __init p1022_ds_pic_init(void) */ static void __init disable_one_node(struct device_node *np, struct property *new) { - struct property *old; - - old = of_find_property(np, new->name, NULL); - if (old) - prom_update_property(np, new, old); - else - prom_add_property(np, new); + prom_update_property(np, new); } /* TRUE if there is a "video=fslfb" command-line parameter. */ diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 029a562af373..dd30b12edfe4 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -67,7 +67,6 @@ static int update_dt_property(struct device_node *dn, struct property **prop, const char *name, u32 vd, char *value) { struct property *new_prop = *prop; - struct property *old_prop; int more = 0; /* A negative 'vd' value indicates that only part of the new property @@ -117,12 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { - old_prop = of_find_property(dn, new_prop->name, NULL); - if (old_prop) - prom_update_property(dn, new_prop, old_prop); - else - prom_add_property(dn, new_prop); - + prom_update_property(dn, new_prop); new_prop = NULL; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 7b3bf76ef834..39f71fba9b38 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -432,7 +432,7 @@ static int do_update_property(char *buf, size_t bufsize) unsigned char *value; char *name, *end, *next_prop; int rc, length; - struct property *newprop, *oldprop; + struct property *newprop; buf = parse_node(buf, bufsize, &np); end = buf + bufsize; @@ -443,6 +443,9 @@ static int do_update_property(char *buf, size_t bufsize) if (!next_prop) return -EINVAL; + if (!strlen(name)) + return -ENODEV; + newprop = new_property(name, length, value, NULL); if (!newprop) return -ENOMEM; @@ -450,18 +453,11 @@ static int do_update_property(char *buf, size_t bufsize) if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size")) slb_set_size(*(int *)value); - oldprop = of_find_property(np, name,NULL); - if (!oldprop) { - if (strlen(name)) - return prom_add_property(np, newprop); - return -ENODEV; - } - upd_value.node = np; upd_value.property = newprop; pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value); - rc = prom_update_property(np, newprop, oldprop); + rc = prom_update_property(np, newprop); if (rc) return rc; @@ -486,7 +482,7 @@ static int do_update_property(char *buf, size_t bufsize) rc = pSeries_reconfig_notify(action, value); if (rc) { - prom_update_property(np, oldprop, newprop); + prom_update_property(np, newprop); return rc; } } diff --git a/drivers/of/base.c b/drivers/of/base.c index eada3f4ef801..bc86ea2af668 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1073,7 +1073,8 @@ int prom_remove_property(struct device_node *np, struct property *prop) } /* - * prom_update_property - Update a property in a node. + * prom_update_property - Update a property in a node, if the property does + * not exist, add it. * * Note that we don't actually remove it, since we have given out * who-knows-how-many pointers to the data using get-property. @@ -1081,13 +1082,19 @@ int prom_remove_property(struct device_node *np, struct property *prop) * and add the new property to the property list */ int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop) + struct property *newprop) { - struct property **next; + struct property **next, *oldprop; unsigned long flags; int found = 0; + if (!newprop->name) + return -EINVAL; + + oldprop = of_find_property(np, newprop->name, NULL); + if (!oldprop) + return prom_add_property(np, newprop); + write_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 927cbd115e53..df7dd08d4391 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -101,6 +101,11 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde, { struct proc_dir_entry *ent; + if (!oldprop) { + proc_device_tree_add_prop(pde, newprop); + return; + } + for (ent = pde->subdir; ent != NULL; ent = ent->next) if (ent->data == oldprop) break; diff --git a/include/linux/of.h b/include/linux/of.h index 2ec1083af7ff..b27c87191df2 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -260,8 +260,7 @@ extern int of_machine_is_compatible(const char *compat); extern int prom_add_property(struct device_node* np, struct property* prop); extern int prom_remove_property(struct device_node *np, struct property *prop); extern int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop); + struct property *newprop); #if defined(CONFIG_OF_DYNAMIC) /* For updating the device tree at runtime */ -- cgit v1.2.3 From bed92eae26ccf280d1a2168b7509447b56675a27 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Thu, 28 Jun 2012 18:03:02 +0200 Subject: Btrfs: qgroup implementation and prototypes Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 46 ++ fs/btrfs/extent-tree.c | 34 ++ fs/btrfs/ioctl.h | 24 + fs/btrfs/qgroup.c | 1571 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 2 + fs/btrfs/transaction.h | 3 + 7 files changed, 1681 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/qgroup.c (limited to 'fs') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4fa2befae7..0bc4d3a10a5f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o + reada.o backref.o ulist.o qgroup.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a5269d4a164f..ccba9b684c96 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2830,6 +2830,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); +int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -3339,6 +3341,50 @@ void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); +/* qgroup.c */ +struct qgroup_update { + struct list_head list; + struct btrfs_delayed_ref_node *node; + struct btrfs_delayed_extent_op *extent_op; +}; + +int btrfs_quota_enable(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_quota_disable(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); +int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 src, u64 dst); +int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 src, u64 dst); +int btrfs_create_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid, + char *name); +int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid); +int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid, + struct btrfs_qgroup_limit *limit); +int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); +void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); +struct btrfs_delayed_extent_op; +int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *node, + struct btrfs_delayed_extent_op *extent_op); +int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_node *node, + struct btrfs_delayed_extent_op *extent_op); +int btrfs_run_qgroups(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, + struct btrfs_qgroup_inherit *inherit); +int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); +void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); + +void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); + static inline int is_fstree(u64 rootid) { if (rootid == BTRFS_FS_TREE_OBJECTID || diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b13f1fbc3733..1a63b830846d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2409,6 +2409,40 @@ static u64 find_middle(struct rb_root *root) } #endif +int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct qgroup_update *qgroup_update; + int ret = 0; + + if (list_empty(&trans->qgroup_ref_list) != + !trans->delayed_ref_elem.seq) { + /* list without seq or seq without list */ + printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", + list_empty(&trans->qgroup_ref_list) ? "" : " not", + trans->delayed_ref_elem.seq); + BUG(); + } + + if (!trans->delayed_ref_elem.seq) + return 0; + + while (!list_empty(&trans->qgroup_ref_list)) { + qgroup_update = list_first_entry(&trans->qgroup_ref_list, + struct qgroup_update, list); + list_del(&qgroup_update->list); + if (!ret) + ret = btrfs_qgroup_account_ref( + trans, fs_info, qgroup_update->node, + qgroup_update->extent_op); + kfree(qgroup_update); + } + + btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); + + return ret; +} + /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index e440aa653c30..a8a2230f4c5c 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -35,6 +35,30 @@ struct btrfs_ioctl_vol_args { #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + #define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { __s64 fd; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c new file mode 100644 index 000000000000..bc424ae5a81a --- /dev/null +++ b/fs/btrfs/qgroup.c @@ -0,0 +1,1571 @@ +/* + * Copyright (C) 2011 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "locking.h" +#include "ulist.h" +#include "ioctl.h" +#include "backref.h" + +/* TODO XXX FIXME + * - subvol delete -> delete when ref goes to 0? delete limits also? + * - reorganize keys + * - compressed + * - sync + * - rescan + * - copy also limits on subvol creation + * - limit + * - caches fuer ulists + * - performance benchmarks + * - check all ioctl parameters + */ + +/* + * one struct for each qgroup, organized in fs_info->qgroup_tree. + */ +struct btrfs_qgroup { + u64 qgroupid; + + /* + * state + */ + u64 rfer; /* referenced */ + u64 rfer_cmpr; /* referenced compressed */ + u64 excl; /* exclusive */ + u64 excl_cmpr; /* exclusive compressed */ + + /* + * limits + */ + u64 lim_flags; /* which limits are set */ + u64 max_rfer; + u64 max_excl; + u64 rsv_rfer; + u64 rsv_excl; + + /* + * reservation tracking + */ + u64 reserved; + + /* + * lists + */ + struct list_head groups; /* groups this group is member of */ + struct list_head members; /* groups that are members of this group */ + struct list_head dirty; /* dirty groups */ + struct rb_node node; /* tree of qgroups */ + + /* + * temp variables for accounting operations + */ + u64 tag; + u64 refcnt; +}; + +/* + * glue structure to represent the relations between qgroups. + */ +struct btrfs_qgroup_list { + struct list_head next_group; + struct list_head next_member; + struct btrfs_qgroup *group; + struct btrfs_qgroup *member; +}; + +/* must be called with qgroup_lock held */ +static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, + u64 qgroupid) +{ + struct rb_node *n = fs_info->qgroup_tree.rb_node; + struct btrfs_qgroup *qgroup; + + while (n) { + qgroup = rb_entry(n, struct btrfs_qgroup, node); + if (qgroup->qgroupid < qgroupid) + n = n->rb_left; + else if (qgroup->qgroupid > qgroupid) + n = n->rb_right; + else + return qgroup; + } + return NULL; +} + +/* must be called with qgroup_lock held */ +static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, + u64 qgroupid) +{ + struct rb_node **p = &fs_info->qgroup_tree.rb_node; + struct rb_node *parent = NULL; + struct btrfs_qgroup *qgroup; + + while (*p) { + parent = *p; + qgroup = rb_entry(parent, struct btrfs_qgroup, node); + + if (qgroup->qgroupid < qgroupid) + p = &(*p)->rb_left; + else if (qgroup->qgroupid > qgroupid) + p = &(*p)->rb_right; + else + return qgroup; + } + + qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); + if (!qgroup) + return ERR_PTR(-ENOMEM); + + qgroup->qgroupid = qgroupid; + INIT_LIST_HEAD(&qgroup->groups); + INIT_LIST_HEAD(&qgroup->members); + INIT_LIST_HEAD(&qgroup->dirty); + + rb_link_node(&qgroup->node, parent, p); + rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); + + return qgroup; +} + +/* must be called with qgroup_lock held */ +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); + struct btrfs_qgroup_list *list; + + if (!qgroup) + return -ENOENT; + + rb_erase(&qgroup->node, &fs_info->qgroup_tree); + list_del(&qgroup->dirty); + + while (!list_empty(&qgroup->groups)) { + list = list_first_entry(&qgroup->groups, + struct btrfs_qgroup_list, next_group); + list_del(&list->next_group); + list_del(&list->next_member); + kfree(list); + } + + while (!list_empty(&qgroup->members)) { + list = list_first_entry(&qgroup->members, + struct btrfs_qgroup_list, next_member); + list_del(&list->next_group); + list_del(&list->next_member); + kfree(list); + } + kfree(qgroup); + + return 0; +} + +/* must be called with qgroup_lock held */ +static int add_relation_rb(struct btrfs_fs_info *fs_info, + u64 memberid, u64 parentid) +{ + struct btrfs_qgroup *member; + struct btrfs_qgroup *parent; + struct btrfs_qgroup_list *list; + + member = find_qgroup_rb(fs_info, memberid); + parent = find_qgroup_rb(fs_info, parentid); + if (!member || !parent) + return -ENOENT; + + list = kzalloc(sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + list->group = parent; + list->member = member; + list_add_tail(&list->next_group, &member->groups); + list_add_tail(&list->next_member, &parent->members); + + return 0; +} + +/* must be called with qgroup_lock held */ +static int del_relation_rb(struct btrfs_fs_info *fs_info, + u64 memberid, u64 parentid) +{ + struct btrfs_qgroup *member; + struct btrfs_qgroup *parent; + struct btrfs_qgroup_list *list; + + member = find_qgroup_rb(fs_info, memberid); + parent = find_qgroup_rb(fs_info, parentid); + if (!member || !parent) + return -ENOENT; + + list_for_each_entry(list, &member->groups, next_group) { + if (list->group == parent) { + list_del(&list->next_group); + list_del(&list->next_member); + kfree(list); + return 0; + } + } + return -ENOENT; +} + +/* + * The full config is read in one go, only called from open_ctree() + * It doesn't use any locking, as at this point we're still single-threaded + */ +int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_path *path = NULL; + struct extent_buffer *l; + int slot; + int ret = 0; + u64 flags = 0; + + if (!fs_info->quota_enabled) + return 0; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + /* default this to quota off, in case no status key is found */ + fs_info->qgroup_flags = 0; + + /* + * pass 1: read status, all qgroup infos and limits + */ + key.objectid = 0; + key.type = 0; + key.offset = 0; + ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); + if (ret) + goto out; + + while (1) { + struct btrfs_qgroup *qgroup; + + slot = path->slots[0]; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + + if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { + struct btrfs_qgroup_status_item *ptr; + + ptr = btrfs_item_ptr(l, slot, + struct btrfs_qgroup_status_item); + + if (btrfs_qgroup_status_version(l, ptr) != + BTRFS_QGROUP_STATUS_VERSION) { + printk(KERN_ERR + "btrfs: old qgroup version, quota disabled\n"); + goto out; + } + if (btrfs_qgroup_status_generation(l, ptr) != + fs_info->generation) { + flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + printk(KERN_ERR + "btrfs: qgroup generation mismatch, " + "marked as inconsistent\n"); + } + fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, + ptr); + /* FIXME read scan element */ + goto next1; + } + + if (found_key.type != BTRFS_QGROUP_INFO_KEY && + found_key.type != BTRFS_QGROUP_LIMIT_KEY) + goto next1; + + qgroup = find_qgroup_rb(fs_info, found_key.offset); + if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || + (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { + printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); + flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } + if (!qgroup) { + qgroup = add_qgroup_rb(fs_info, found_key.offset); + if (IS_ERR(qgroup)) { + ret = PTR_ERR(qgroup); + goto out; + } + } + switch (found_key.type) { + case BTRFS_QGROUP_INFO_KEY: { + struct btrfs_qgroup_info_item *ptr; + + ptr = btrfs_item_ptr(l, slot, + struct btrfs_qgroup_info_item); + qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); + qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); + qgroup->excl = btrfs_qgroup_info_excl(l, ptr); + qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); + /* generation currently unused */ + break; + } + case BTRFS_QGROUP_LIMIT_KEY: { + struct btrfs_qgroup_limit_item *ptr; + + ptr = btrfs_item_ptr(l, slot, + struct btrfs_qgroup_limit_item); + qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); + qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); + qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); + qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); + qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); + break; + } + } +next1: + ret = btrfs_next_item(quota_root, path); + if (ret < 0) + goto out; + if (ret) + break; + } + btrfs_release_path(path); + + /* + * pass 2: read all qgroup relations + */ + key.objectid = 0; + key.type = BTRFS_QGROUP_RELATION_KEY; + key.offset = 0; + ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); + if (ret) + goto out; + while (1) { + slot = path->slots[0]; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + + if (found_key.type != BTRFS_QGROUP_RELATION_KEY) + goto next2; + + if (found_key.objectid > found_key.offset) { + /* parent <- member, not needed to build config */ + /* FIXME should we omit the key completely? */ + goto next2; + } + + ret = add_relation_rb(fs_info, found_key.objectid, + found_key.offset); + if (ret) + goto out; +next2: + ret = btrfs_next_item(quota_root, path); + if (ret < 0) + goto out; + if (ret) + break; + } +out: + fs_info->qgroup_flags |= flags; + if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { + fs_info->quota_enabled = 0; + fs_info->pending_quota_state = 0; + } + btrfs_free_path(path); + + return ret < 0 ? ret : 0; +} + +/* + * This is only called from close_ctree() or open_ctree(), both in single- + * treaded paths. Clean up the in-memory structures. No locking needed. + */ +void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) +{ + struct rb_node *n; + struct btrfs_qgroup *qgroup; + struct btrfs_qgroup_list *list; + + while ((n = rb_first(&fs_info->qgroup_tree))) { + qgroup = rb_entry(n, struct btrfs_qgroup, node); + rb_erase(n, &fs_info->qgroup_tree); + + WARN_ON(!list_empty(&qgroup->dirty)); + + while (!list_empty(&qgroup->groups)) { + list = list_first_entry(&qgroup->groups, + struct btrfs_qgroup_list, + next_group); + list_del(&list->next_group); + list_del(&list->next_member); + kfree(list); + } + + while (!list_empty(&qgroup->members)) { + list = list_first_entry(&qgroup->members, + struct btrfs_qgroup_list, + next_member); + list_del(&list->next_group); + list_del(&list->next_member); + kfree(list); + } + kfree(qgroup); + } +} + +static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, + struct btrfs_root *quota_root, + u64 src, u64 dst) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = src; + key.type = BTRFS_QGROUP_RELATION_KEY; + key.offset = dst; + + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); + + btrfs_mark_buffer_dirty(path->nodes[0]); + + btrfs_free_path(path); + return ret; +} + +static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, + struct btrfs_root *quota_root, + u64 src, u64 dst) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = src; + key.type = BTRFS_QGROUP_RELATION_KEY; + key.offset = dst; + + ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, quota_root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int add_qgroup_item(struct btrfs_trans_handle *trans, + struct btrfs_root *quota_root, u64 qgroupid) +{ + int ret; + struct btrfs_path *path; + struct btrfs_qgroup_info_item *qgroup_info; + struct btrfs_qgroup_limit_item *qgroup_limit; + struct extent_buffer *leaf; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = 0; + key.type = BTRFS_QGROUP_INFO_KEY; + key.offset = qgroupid; + + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, + sizeof(*qgroup_info)); + if (ret) + goto out; + + leaf = path->nodes[0]; + qgroup_info = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_qgroup_info_item); + btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); + btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); + btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); + btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); + btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); + + btrfs_mark_buffer_dirty(leaf); + + btrfs_release_path(path); + + key.type = BTRFS_QGROUP_LIMIT_KEY; + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, + sizeof(*qgroup_limit)); + if (ret) + goto out; + + leaf = path->nodes[0]; + qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_qgroup_limit_item); + btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); + btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); + btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); + btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); + btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); + + btrfs_mark_buffer_dirty(leaf); + + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + +static int del_qgroup_item(struct btrfs_trans_handle *trans, + struct btrfs_root *quota_root, u64 qgroupid) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = 0; + key.type = BTRFS_QGROUP_INFO_KEY; + key.offset = qgroupid; + ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, quota_root, path); + if (ret) + goto out; + + btrfs_release_path(path); + + key.type = BTRFS_QGROUP_LIMIT_KEY; + ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, quota_root, path); + +out: + btrfs_free_path(path); + return ret; +} + +static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 qgroupid, + u64 flags, u64 max_rfer, u64 max_excl, + u64 rsv_rfer, u64 rsv_excl) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_qgroup_limit_item *qgroup_limit; + int ret; + int slot; + + key.objectid = 0; + key.type = BTRFS_QGROUP_LIMIT_KEY; + key.offset = qgroupid; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + + if (ret) + goto out; + + l = path->nodes[0]; + slot = path->slots[0]; + qgroup_limit = btrfs_item_ptr(l, path->slots[0], + struct btrfs_qgroup_limit_item); + btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); + btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); + btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); + btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); + btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); + + btrfs_mark_buffer_dirty(l); + +out: + btrfs_free_path(path); + return ret; +} + +static int update_qgroup_info_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_qgroup *qgroup) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_qgroup_info_item *qgroup_info; + int ret; + int slot; + + key.objectid = 0; + key.type = BTRFS_QGROUP_INFO_KEY; + key.offset = qgroup->qgroupid; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + + if (ret) + goto out; + + l = path->nodes[0]; + slot = path->slots[0]; + qgroup_info = btrfs_item_ptr(l, path->slots[0], + struct btrfs_qgroup_info_item); + btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); + btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); + btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); + btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); + btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); + + btrfs_mark_buffer_dirty(l); + +out: + btrfs_free_path(path); + return ret; +} + +static int update_qgroup_status_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_root *root) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_qgroup_status_item *ptr; + int ret; + int slot; + + key.objectid = 0; + key.type = BTRFS_QGROUP_STATUS_KEY; + key.offset = 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + + if (ret) + goto out; + + l = path->nodes[0]; + slot = path->slots[0]; + ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); + btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); + btrfs_set_qgroup_status_generation(l, ptr, trans->transid); + /* XXX scan */ + + btrfs_mark_buffer_dirty(l); + +out: + btrfs_free_path(path); + return ret; +} + +/* + * called with qgroup_lock held + */ +static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + if (!root) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while (1) { + key.objectid = 0; + key.offset = 0; + key.type = 0; + + path->leave_spinning = 1; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } else if (ret < 0) { + break; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + btrfs_release_path(path); + } + ret = 0; +out: + root->fs_info->pending_quota_state = 0; + btrfs_free_path(path); + return ret; +} + +int btrfs_quota_enable(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *quota_root; + struct btrfs_path *path = NULL; + struct btrfs_qgroup_status_item *ptr; + struct extent_buffer *leaf; + struct btrfs_key key; + int ret = 0; + + spin_lock(&fs_info->qgroup_lock); + if (fs_info->quota_root) { + fs_info->pending_quota_state = 1; + spin_unlock(&fs_info->qgroup_lock); + goto out; + } + spin_unlock(&fs_info->qgroup_lock); + + /* + * initially create the quota tree + */ + quota_root = btrfs_create_tree(trans, fs_info, + BTRFS_QUOTA_TREE_OBJECTID); + if (IS_ERR(quota_root)) { + ret = PTR_ERR(quota_root); + goto out; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = 0; + key.type = BTRFS_QGROUP_STATUS_KEY; + key.offset = 0; + + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, + sizeof(*ptr)); + if (ret) + goto out; + + leaf = path->nodes[0]; + ptr = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_qgroup_status_item); + btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); + btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); + fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); + btrfs_set_qgroup_status_scan(leaf, ptr, 0); + + btrfs_mark_buffer_dirty(leaf); + + spin_lock(&fs_info->qgroup_lock); + fs_info->quota_root = quota_root; + fs_info->pending_quota_state = 1; + spin_unlock(&fs_info->qgroup_lock); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_quota_disable(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *quota_root; + int ret = 0; + + spin_lock(&fs_info->qgroup_lock); + fs_info->quota_enabled = 0; + fs_info->pending_quota_state = 0; + quota_root = fs_info->quota_root; + fs_info->quota_root = NULL; + btrfs_free_qgroup_config(fs_info); + spin_unlock(&fs_info->qgroup_lock); + + if (!quota_root) + return -EINVAL; + + ret = btrfs_clean_quota_tree(trans, quota_root); + if (ret) + goto out; + + ret = btrfs_del_root(trans, tree_root, "a_root->root_key); + if (ret) + goto out; + + list_del("a_root->dirty_list); + + btrfs_tree_lock(quota_root->node); + clean_tree_block(trans, tree_root, quota_root->node); + btrfs_tree_unlock(quota_root->node); + btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); + + free_extent_buffer(quota_root->node); + free_extent_buffer(quota_root->commit_root); + kfree(quota_root); +out: + return ret; +} + +int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) +{ + /* FIXME */ + return 0; +} + +int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 src, u64 dst) +{ + struct btrfs_root *quota_root; + int ret = 0; + + quota_root = fs_info->quota_root; + if (!quota_root) + return -EINVAL; + + ret = add_qgroup_relation_item(trans, quota_root, src, dst); + if (ret) + return ret; + + ret = add_qgroup_relation_item(trans, quota_root, dst, src); + if (ret) { + del_qgroup_relation_item(trans, quota_root, src, dst); + return ret; + } + + spin_lock(&fs_info->qgroup_lock); + ret = add_relation_rb(quota_root->fs_info, src, dst); + spin_unlock(&fs_info->qgroup_lock); + + return ret; +} + +int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 src, u64 dst) +{ + struct btrfs_root *quota_root; + int ret = 0; + int err; + + quota_root = fs_info->quota_root; + if (!quota_root) + return -EINVAL; + + ret = del_qgroup_relation_item(trans, quota_root, src, dst); + err = del_qgroup_relation_item(trans, quota_root, dst, src); + if (err && !ret) + ret = err; + + spin_lock(&fs_info->qgroup_lock); + del_relation_rb(fs_info, src, dst); + + spin_unlock(&fs_info->qgroup_lock); + + return ret; +} + +int btrfs_create_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) +{ + struct btrfs_root *quota_root; + struct btrfs_qgroup *qgroup; + int ret = 0; + + quota_root = fs_info->quota_root; + if (!quota_root) + return -EINVAL; + + ret = add_qgroup_item(trans, quota_root, qgroupid); + + spin_lock(&fs_info->qgroup_lock); + qgroup = add_qgroup_rb(fs_info, qgroupid); + spin_unlock(&fs_info->qgroup_lock); + + if (IS_ERR(qgroup)) + ret = PTR_ERR(qgroup); + + return ret; +} + +int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_root *quota_root; + int ret = 0; + + quota_root = fs_info->quota_root; + if (!quota_root) + return -EINVAL; + + ret = del_qgroup_item(trans, quota_root, qgroupid); + + spin_lock(&fs_info->qgroup_lock); + del_qgroup_rb(quota_root->fs_info, qgroupid); + + spin_unlock(&fs_info->qgroup_lock); + + return ret; +} + +int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 qgroupid, + struct btrfs_qgroup_limit *limit) +{ + struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_qgroup *qgroup; + int ret = 0; + + if (!quota_root) + return -EINVAL; + + ret = update_qgroup_limit_item(trans, quota_root, qgroupid, + limit->flags, limit->max_rfer, + limit->max_excl, limit->rsv_rfer, + limit->rsv_excl); + if (ret) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + printk(KERN_INFO "unable to update quota limit for %llu\n", + (unsigned long long)qgroupid); + } + + spin_lock(&fs_info->qgroup_lock); + + qgroup = find_qgroup_rb(fs_info, qgroupid); + if (!qgroup) { + ret = -ENOENT; + goto unlock; + } + qgroup->lim_flags = limit->flags; + qgroup->max_rfer = limit->max_rfer; + qgroup->max_excl = limit->max_excl; + qgroup->rsv_rfer = limit->rsv_rfer; + qgroup->rsv_excl = limit->rsv_excl; + +unlock: + spin_unlock(&fs_info->qgroup_lock); + + return ret; +} + +static void qgroup_dirty(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup) +{ + if (list_empty(&qgroup->dirty)) + list_add(&qgroup->dirty, &fs_info->dirty_qgroups); +} + +/* + * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts + * the modification into a list that's later used by btrfs_end_transaction to + * pass the recorded modifications on to btrfs_qgroup_account_ref. + */ +int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *node, + struct btrfs_delayed_extent_op *extent_op) +{ + struct qgroup_update *u; + + BUG_ON(!trans->delayed_ref_elem.seq); + u = kmalloc(sizeof(*u), GFP_NOFS); + if (!u) + return -ENOMEM; + + u->node = node; + u->extent_op = extent_op; + list_add_tail(&u->list, &trans->qgroup_ref_list); + + return 0; +} + +/* + * btrfs_qgroup_account_ref is called for every ref that is added to or deleted + * from the fs. First, all roots referencing the extent are searched, and + * then the space is accounted accordingly to the different roots. The + * accounting algorithm works in 3 steps documented inline. + */ +int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_node *node, + struct btrfs_delayed_extent_op *extent_op) +{ + struct btrfs_key ins; + struct btrfs_root *quota_root; + u64 ref_root; + struct btrfs_qgroup *qgroup; + struct ulist_node *unode; + struct ulist *roots = NULL; + struct ulist *tmp = NULL; + struct ulist_iterator uiter; + u64 seq; + int ret = 0; + int sgn; + + if (!fs_info->quota_enabled) + return 0; + + BUG_ON(!fs_info->quota_root); + + ins.objectid = node->bytenr; + ins.offset = node->num_bytes; + ins.type = BTRFS_EXTENT_ITEM_KEY; + + if (node->type == BTRFS_TREE_BLOCK_REF_KEY || + node->type == BTRFS_SHARED_BLOCK_REF_KEY) { + struct btrfs_delayed_tree_ref *ref; + ref = btrfs_delayed_node_to_tree_ref(node); + ref_root = ref->root; + } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || + node->type == BTRFS_SHARED_DATA_REF_KEY) { + struct btrfs_delayed_data_ref *ref; + ref = btrfs_delayed_node_to_data_ref(node); + ref_root = ref->root; + } else { + BUG(); + } + + if (!is_fstree(ref_root)) { + /* + * non-fs-trees are not being accounted + */ + return 0; + } + + switch (node->action) { + case BTRFS_ADD_DELAYED_REF: + case BTRFS_ADD_DELAYED_EXTENT: + sgn = 1; + break; + case BTRFS_DROP_DELAYED_REF: + sgn = -1; + break; + case BTRFS_UPDATE_DELAYED_HEAD: + return 0; + default: + BUG(); + } + + /* + * the delayed ref sequence number we pass depends on the direction of + * the operation. for add operations, we pass (node->seq - 1) to skip + * the delayed ref's current sequence number, because we need the state + * of the tree before the add operation. for delete operations, we pass + * (node->seq) to include the delayed ref's current sequence number, + * because we need the state of the tree after the delete operation. + */ + ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, + sgn > 0 ? node->seq - 1 : node->seq, &roots); + if (ret < 0) + goto out; + + spin_lock(&fs_info->qgroup_lock); + quota_root = fs_info->quota_root; + if (!quota_root) + goto unlock; + + qgroup = find_qgroup_rb(fs_info, ref_root); + if (!qgroup) + goto unlock; + + /* + * step 1: for each old ref, visit all nodes once and inc refcnt + */ + tmp = ulist_alloc(GFP_ATOMIC); + if (!tmp) { + ret = -ENOMEM; + goto unlock; + } + seq = fs_info->qgroup_seq; + fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + struct ulist_node *tmp_unode; + struct ulist_iterator tmp_uiter; + struct btrfs_qgroup *qg; + + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ulist_reinit(tmp); + /* XXX id not needed */ + ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)tmp_unode->aux; + if (qg->refcnt < seq) + qg->refcnt = seq + 1; + else + ++qg->refcnt; + + list_for_each_entry(glist, &qg->groups, next_group) { + ulist_add(tmp, glist->group->qgroupid, + (unsigned long)glist->group, + GFP_ATOMIC); + } + } + } + + /* + * step 2: walk from the new root + */ + ulist_reinit(tmp); + ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(tmp, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)unode->aux; + if (qg->refcnt < seq) { + /* not visited by step 1 */ + qg->rfer += sgn * node->num_bytes; + qg->rfer_cmpr += sgn * node->num_bytes; + if (roots->nnodes == 0) { + qg->excl += sgn * node->num_bytes; + qg->excl_cmpr += sgn * node->num_bytes; + } + qgroup_dirty(fs_info, qg); + } + WARN_ON(qg->tag >= seq); + qg->tag = seq; + + list_for_each_entry(glist, &qg->groups, next_group) { + ulist_add(tmp, glist->group->qgroupid, + (unsigned long)glist->group, GFP_ATOMIC); + } + } + + /* + * step 3: walk again from old refs + */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + struct btrfs_qgroup *qg; + struct ulist_node *tmp_unode; + struct ulist_iterator tmp_uiter; + + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ulist_reinit(tmp); + ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)tmp_unode->aux; + if (qg->tag == seq) + continue; + + if (qg->refcnt - seq == roots->nnodes) { + qg->excl -= sgn * node->num_bytes; + qg->excl_cmpr -= sgn * node->num_bytes; + qgroup_dirty(fs_info, qg); + } + + list_for_each_entry(glist, &qg->groups, next_group) { + ulist_add(tmp, glist->group->qgroupid, + (unsigned long)glist->group, + GFP_ATOMIC); + } + } + } + ret = 0; +unlock: + spin_unlock(&fs_info->qgroup_lock); +out: + ulist_free(roots); + ulist_free(tmp); + + return ret; +} + +/* + * called from commit_transaction. Writes all changed qgroups to disk. + */ +int btrfs_run_qgroups(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *quota_root = fs_info->quota_root; + int ret = 0; + + if (!quota_root) + goto out; + + fs_info->quota_enabled = fs_info->pending_quota_state; + + spin_lock(&fs_info->qgroup_lock); + while (!list_empty(&fs_info->dirty_qgroups)) { + struct btrfs_qgroup *qgroup; + qgroup = list_first_entry(&fs_info->dirty_qgroups, + struct btrfs_qgroup, dirty); + list_del_init(&qgroup->dirty); + spin_unlock(&fs_info->qgroup_lock); + ret = update_qgroup_info_item(trans, quota_root, qgroup); + if (ret) + fs_info->qgroup_flags |= + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + spin_lock(&fs_info->qgroup_lock); + } + if (fs_info->quota_enabled) + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; + else + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; + spin_unlock(&fs_info->qgroup_lock); + + ret = update_qgroup_status_item(trans, fs_info, quota_root); + if (ret) + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + +out: + + return ret; +} + +/* + * copy the acounting information between qgroups. This is necessary when a + * snapshot or a subvolume is created + */ +int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, + struct btrfs_qgroup_inherit *inherit) +{ + int ret = 0; + int i; + u64 *i_qgroups; + struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_qgroup *srcgroup; + struct btrfs_qgroup *dstgroup; + u32 level_size = 0; + + if (!fs_info->quota_enabled) + return 0; + + if (!quota_root) + return -EINVAL; + + /* + * create a tracking group for the subvol itself + */ + ret = add_qgroup_item(trans, quota_root, objectid); + if (ret) + goto out; + + if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { + ret = update_qgroup_limit_item(trans, quota_root, objectid, + inherit->lim.flags, + inherit->lim.max_rfer, + inherit->lim.max_excl, + inherit->lim.rsv_rfer, + inherit->lim.rsv_excl); + if (ret) + goto out; + } + + if (srcid) { + struct btrfs_root *srcroot; + struct btrfs_key srckey; + int srcroot_level; + + srckey.objectid = srcid; + srckey.type = BTRFS_ROOT_ITEM_KEY; + srckey.offset = (u64)-1; + srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); + if (IS_ERR(srcroot)) { + ret = PTR_ERR(srcroot); + goto out; + } + + rcu_read_lock(); + srcroot_level = btrfs_header_level(srcroot->node); + level_size = btrfs_level_size(srcroot, srcroot_level); + rcu_read_unlock(); + } + + /* + * add qgroup to all inherited groups + */ + if (inherit) { + i_qgroups = (u64 *)(inherit + 1); + for (i = 0; i < inherit->num_qgroups; ++i) { + ret = add_qgroup_relation_item(trans, quota_root, + objectid, *i_qgroups); + if (ret) + goto out; + ret = add_qgroup_relation_item(trans, quota_root, + *i_qgroups, objectid); + if (ret) + goto out; + ++i_qgroups; + } + } + + + spin_lock(&fs_info->qgroup_lock); + + dstgroup = add_qgroup_rb(fs_info, objectid); + if (!dstgroup) + goto unlock; + + if (srcid) { + srcgroup = find_qgroup_rb(fs_info, srcid); + if (!srcgroup) + goto unlock; + dstgroup->rfer = srcgroup->rfer - level_size; + dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; + srcgroup->excl = level_size; + srcgroup->excl_cmpr = level_size; + qgroup_dirty(fs_info, dstgroup); + qgroup_dirty(fs_info, srcgroup); + } + + if (!inherit) + goto unlock; + + i_qgroups = (u64 *)(inherit + 1); + for (i = 0; i < inherit->num_qgroups; ++i) { + ret = add_relation_rb(quota_root->fs_info, objectid, + *i_qgroups); + if (ret) + goto unlock; + ++i_qgroups; + } + + for (i = 0; i < inherit->num_ref_copies; ++i) { + struct btrfs_qgroup *src; + struct btrfs_qgroup *dst; + + src = find_qgroup_rb(fs_info, i_qgroups[0]); + dst = find_qgroup_rb(fs_info, i_qgroups[1]); + + if (!src || !dst) { + ret = -EINVAL; + goto unlock; + } + + dst->rfer = src->rfer - level_size; + dst->rfer_cmpr = src->rfer_cmpr - level_size; + i_qgroups += 2; + } + for (i = 0; i < inherit->num_excl_copies; ++i) { + struct btrfs_qgroup *src; + struct btrfs_qgroup *dst; + + src = find_qgroup_rb(fs_info, i_qgroups[0]); + dst = find_qgroup_rb(fs_info, i_qgroups[1]); + + if (!src || !dst) { + ret = -EINVAL; + goto unlock; + } + + dst->excl = src->excl + level_size; + dst->excl_cmpr = src->excl_cmpr + level_size; + i_qgroups += 2; + } + +unlock: + spin_unlock(&fs_info->qgroup_lock); +out: + return ret; +} + +/* + * reserve some space for a qgroup and all its parents. The reservation takes + * place with start_transaction or dealloc_reserve, similar to ENOSPC + * accounting. If not enough space is available, EDQUOT is returned. + * We assume that the requested space is new for all qgroups. + */ +int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) +{ + struct btrfs_root *quota_root; + struct btrfs_qgroup *qgroup; + struct btrfs_fs_info *fs_info = root->fs_info; + u64 ref_root = root->root_key.objectid; + int ret = 0; + struct ulist *ulist = NULL; + struct ulist_node *unode; + struct ulist_iterator uiter; + + if (!is_fstree(ref_root)) + return 0; + + if (num_bytes == 0) + return 0; + + spin_lock(&fs_info->qgroup_lock); + quota_root = fs_info->quota_root; + if (!quota_root) + goto out; + + qgroup = find_qgroup_rb(fs_info, ref_root); + if (!qgroup) + goto out; + + /* + * in a first step, we check all affected qgroups if any limits would + * be exceeded + */ + ulist = ulist_alloc(GFP_ATOMIC); + ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(ulist, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)unode->aux; + + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && + qg->reserved + qg->rfer + num_bytes > + qg->max_rfer) + ret = -EDQUOT; + + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && + qg->reserved + qg->excl + num_bytes > + qg->max_excl) + ret = -EDQUOT; + + list_for_each_entry(glist, &qg->groups, next_group) { + ulist_add(ulist, glist->group->qgroupid, + (unsigned long)glist->group, GFP_ATOMIC); + } + } + if (ret) + goto out; + + /* + * no limits exceeded, now record the reservation into all qgroups + */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(ulist, &uiter))) { + struct btrfs_qgroup *qg; + + qg = (struct btrfs_qgroup *)unode->aux; + + qg->reserved += num_bytes; + } + +out: + spin_unlock(&fs_info->qgroup_lock); + ulist_free(ulist); + + return ret; +} + +void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) +{ + struct btrfs_root *quota_root; + struct btrfs_qgroup *qgroup; + struct btrfs_fs_info *fs_info = root->fs_info; + struct ulist *ulist = NULL; + struct ulist_node *unode; + struct ulist_iterator uiter; + u64 ref_root = root->root_key.objectid; + + if (!is_fstree(ref_root)) + return; + + if (num_bytes == 0) + return; + + spin_lock(&fs_info->qgroup_lock); + + quota_root = fs_info->quota_root; + if (!quota_root) + goto out; + + qgroup = find_qgroup_rb(fs_info, ref_root); + if (!qgroup) + goto out; + + ulist = ulist_alloc(GFP_ATOMIC); + ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(ulist, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)unode->aux; + + qg->reserved -= num_bytes; + + list_for_each_entry(glist, &qg->groups, next_group) { + ulist_add(ulist, glist->group->qgroupid, + (unsigned long)glist->group, GFP_ATOMIC); + } + } + +out: + spin_unlock(&fs_info->qgroup_lock); + ulist_free(ulist); +} + +void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) +{ + if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) + return; + printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", + trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", + trans->delayed_ref_elem.seq); + BUG(); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 23cbda0685b8..0d6c8816845a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -351,6 +351,8 @@ again: h->block_rsv = NULL; h->orig_rsv = NULL; h->aborted = 0; + h->delayed_ref_elem.seq = 0; + INIT_LIST_HEAD(&h->qgroup_ref_list); smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 010729446e13..16ba00842c38 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -20,6 +20,7 @@ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" #include "delayed-ref.h" +#include "ctree.h" struct btrfs_transaction { u64 transid; @@ -63,6 +64,8 @@ struct btrfs_trans_handle { * Subvolume quota depends on this */ struct btrfs_root *root; + struct seq_list delayed_ref_elem; + struct list_head qgroup_ref_list; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From edf39272db4810282360f7362d43ade1d524c913 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 28 Jun 2012 18:04:55 +0200 Subject: Btrfs: call the qgroup accounting functions Signed-off-by: Jan Schmidt --- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/transaction.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1a63b830846d..c08337a83ace 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2479,6 +2479,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); + btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); + delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: @@ -2588,6 +2590,7 @@ again: } out: spin_unlock(&delayed_refs->lock); + assert_qgroups_uptodate(trans); return 0; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0d6c8816845a..d20d2e24f8d2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -512,6 +512,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } + /* + * do the qgroup accounting as early as possible + */ + err = btrfs_delayed_refs_qgroup_accounting(trans, info); + btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; /* @@ -571,6 +576,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { err = -EIO; } + assert_qgroups_uptodate(trans); memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -1355,6 +1361,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; + /* + * running the delayed items may have added new refs. account + * them now so that they hinder processing of more delayed refs + * as little as possible. + */ + btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); + /* * rename don't use btrfs_join_transaction, so, once we * set the transaction to blocked above, we aren't going @@ -1467,6 +1480,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->chunk_root->node); switch_commit_root(root->fs_info->chunk_root); + assert_qgroups_uptodate(trans); update_super_roots(root); if (!root->fs_info->log_root_recovering) { -- cgit v1.2.3 From bcef60f249034f69e89e544461cbfecb68975595 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 15:23:30 +0200 Subject: Btrfs: quota tree support and startup Init the quota tree along with the others on open_ctree and close_ctree. Add the quota tree to the list of well known trees in btrfs_read_fs_root_no_name. Signed-off-by: Arne Jansen --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ccba9b684c96..2ba03b96fbe0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2967,6 +2967,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->chunk_root); kfree(fs_info->dev_root); kfree(fs_info->csum_root); + kfree(fs_info->quota_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kfree(fs_info); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eca054974425..87d9391c0576 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1472,6 +1472,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; + if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) + return fs_info->quota_root ? fs_info->quota_root : + ERR_PTR(-ENOENT); again: spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, @@ -1899,6 +1902,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_extent_buffer(info->extent_root->commit_root); free_extent_buffer(info->csum_root->node); free_extent_buffer(info->csum_root->commit_root); + if (info->quota_root) { + free_extent_buffer(info->quota_root->node); + free_extent_buffer(info->quota_root->commit_root); + } info->tree_root->node = NULL; info->tree_root->commit_root = NULL; @@ -1908,6 +1915,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->extent_root->commit_root = NULL; info->csum_root->node = NULL; info->csum_root->commit_root = NULL; + if (info->quota_root) { + info->quota_root->node = NULL; + info->quota_root->commit_root = NULL; + } if (chunk_root) { free_extent_buffer(info->chunk_root->node); @@ -1938,6 +1949,7 @@ int open_ctree(struct super_block *sb, struct btrfs_root *csum_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + struct btrfs_root *quota_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; @@ -1949,9 +1961,10 @@ int open_ctree(struct super_block *sb, csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); + quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); if (!tree_root || !extent_root || !csum_root || - !chunk_root || !dev_root) { + !chunk_root || !dev_root || !quota_root) { err = -ENOMEM; goto fail; } @@ -2441,6 +2454,17 @@ retry_root_backup: goto recovery_tree_root; csum_root->track_dirty = 1; + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_QUOTA_TREE_OBJECTID, quota_root); + if (ret) { + kfree(quota_root); + quota_root = fs_info->quota_root = NULL; + } else { + quota_root->track_dirty = 1; + fs_info->quota_enabled = 1; + fs_info->pending_quota_state = 1; + } + fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -2500,6 +2524,9 @@ retry_root_backup: " integrity check module %s\n", sb->s_id); } #endif + ret = btrfs_read_qgroup_config(fs_info); + if (ret) + goto fail_trans_kthread; /* do not make disk changes in broken FS */ if (btrfs_super_log_root(disk_super) != 0 && @@ -2510,7 +2537,7 @@ retry_root_backup: printk(KERN_WARNING "Btrfs log replay required " "on RO media\n"); err = -EIO; - goto fail_trans_kthread; + goto fail_qgroup; } blocksize = btrfs_level_size(tree_root, @@ -2519,7 +2546,7 @@ retry_root_backup: log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { err = -ENOMEM; - goto fail_trans_kthread; + goto fail_qgroup; } __setup_root(nodesize, leafsize, sectorsize, stripesize, @@ -2559,7 +2586,7 @@ retry_root_backup: printk(KERN_WARNING "btrfs: failed to recover relocation\n"); err = -EINVAL; - goto fail_trans_kthread; + goto fail_qgroup; } } @@ -2569,10 +2596,10 @@ retry_root_backup: fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (!fs_info->fs_root) - goto fail_trans_kthread; + goto fail_qgroup; if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); - goto fail_trans_kthread; + goto fail_qgroup; } if (sb->s_flags & MS_RDONLY) @@ -2596,6 +2623,8 @@ retry_root_backup: return 0; +fail_qgroup: + btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); fail_cleaner: @@ -3194,6 +3223,8 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 2; smp_mb(); + btrfs_free_qgroup_config(root->fs_info); + if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); @@ -3213,6 +3244,10 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->dev_root->commit_root); free_extent_buffer(fs_info->csum_root->node); free_extent_buffer(fs_info->csum_root->commit_root); + if (fs_info->quota_root) { + free_extent_buffer(fs_info->quota_root->node); + free_extent_buffer(fs_info->quota_root->commit_root); + } btrfs_free_block_groups(fs_info); -- cgit v1.2.3 From 546adb0d817c34dc2be3a7cb5bba8771f837a562 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 14 Jun 2012 16:37:44 +0200 Subject: Btrfs: hooks for qgroup to record delayed refs Hooks into qgroup code to record refs and into transaction commit. This is the main entry point for qgroup. Basically every change in extent backrefs got accounted to the appropriate qgroups. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/delayed-ref.c | 16 ++++++++++------ fs/btrfs/delayed-ref.h | 19 +++++++++++++++++++ fs/btrfs/transaction.c | 7 +++++++ 3 files changed, 36 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 21a757717637..da7419ed01bb 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -529,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (is_fstree(ref_root)) - seq = btrfs_inc_tree_mod_seq(fs_info); + if (need_ref_seq(for_cow, ref_root)) + seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -588,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (is_fstree(ref_root)) - seq = btrfs_inc_tree_mod_seq(fs_info); + if (need_ref_seq(for_cow, ref_root)) + seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -662,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!is_fstree(ref_root) && + if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&fs_info->tree_mod_seq_wait)) wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); + if (need_ref_seq(for_cow, ref_root)) + btrfs_qgroup_record_ref(trans, &ref->node, extent_op); return 0; } @@ -711,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!is_fstree(ref_root) && + if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&fs_info->tree_mod_seq_wait)) wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); + if (need_ref_seq(for_cow, ref_root)) + btrfs_qgroup_record_ref(trans, &ref->node, extent_op); return 0; } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 2b5cb27f9861..0d7c90c366b6 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -179,6 +179,25 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq); +/* + * delayed refs with a ref_seq > 0 must be held back during backref walking. + * this only applies to items in one of the fs-trees. for_cow items never need + * to be held back, so they won't get a ref_seq number. + */ +static inline int need_ref_seq(int for_cow, u64 rootid) +{ + if (for_cow) + return 0; + + if (rootid == BTRFS_FS_TREE_OBJECTID) + return 1; + + if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) + return 1; + + return 0; +} + /* * a node might live in a head or a regular ref, this lets you * test for the proper type to use. diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d20d2e24f8d2..21c768cb443f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -795,6 +795,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ret = btrfs_run_dev_stats(trans, root->fs_info); BUG_ON(ret); + ret = btrfs_run_qgroups(trans, root->fs_info); + BUG_ON(ret); + + /* run_qgroups might have added some more refs */ + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); + while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); -- cgit v1.2.3 From c556723794b3487a79de1ecd6354975b1389f5ff Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 15:44:05 +0200 Subject: Btrfs: hooks to reserve qgroup space Like block reserves, reserve a small piece of space on each transaction start and for delalloc. These are the hooks that can actually return EDQUOT to the user. The amount of space reserved is tracked in the transaction handle. Signed-off-by: Arne Jansen --- fs/btrfs/extent-tree.c | 12 ++++++++++++ fs/btrfs/transaction.c | 16 ++++++++++++++++ fs/btrfs/transaction.h | 1 + 3 files changed, 29 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c08337a83ace..2ce16f97730a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4565,6 +4565,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) csum_bytes = BTRFS_I(inode)->csum_bytes; spin_unlock(&BTRFS_I(inode)->lock); + if (root->fs_info->quota_enabled) { + ret = btrfs_qgroup_reserve(root, num_bytes + + nr_extents * root->leafsize); + if (ret) + return ret; + } + ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); if (ret) { u64 to_free = 0; @@ -4643,6 +4650,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); + if (root->fs_info->quota_enabled) { + btrfs_qgroup_free(root, num_bytes + + dropped * root->leafsize); + } + btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 21c768cb443f..f1e29fbd5317 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -295,6 +295,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_transaction *cur_trans; u64 num_bytes = 0; int ret; + u64 qgroup_reserved = 0; if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) return ERR_PTR(-EROFS); @@ -313,6 +314,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, * the appropriate flushing if need be. */ if (num_items > 0 && root != root->fs_info->chunk_root) { + if (root->fs_info->quota_enabled && + is_fstree(root->root_key.objectid)) { + qgroup_reserved = num_items * root->leafsize; + ret = btrfs_qgroup_reserve(root, qgroup_reserved); + if (ret) + return ERR_PTR(ret); + } + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, @@ -351,6 +360,7 @@ again: h->block_rsv = NULL; h->orig_rsv = NULL; h->aborted = 0; + h->qgroup_reserved = qgroup_reserved; h->delayed_ref_elem.seq = 0; INIT_LIST_HEAD(&h->qgroup_ref_list); @@ -524,6 +534,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, * end_transaction. Subvolume quota depends on this. */ WARN_ON(trans->root != root); + + if (trans->qgroup_reserved) { + btrfs_qgroup_free(root, trans->qgroup_reserved); + trans->qgroup_reserved = 0; + } + while (count < 2) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 16ba00842c38..2759e0572c5c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -50,6 +50,7 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; + u64 qgroup_reserved; unsigned long use_count; unsigned long blocks_reserved; unsigned long blocks_used; -- cgit v1.2.3 From 5d13a37bd5327220e13329943d1228acfbe5934a Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 15:53:51 +0200 Subject: Btrfs: add qgroup ioctls Ioctls to control the qgroup feature like adding and removing qgroups and assigning qgroups. Signed-off-by: Arne Jansen --- fs/btrfs/ioctl.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 27 ++++++++ 2 files changed, 212 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0e92e5763005..55a7283a9e18 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3390,6 +3390,183 @@ out: return ret; } +static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_quota_ctl_args *sa; + struct btrfs_trans_handle *trans = NULL; + int ret; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + } + + switch (sa->cmd) { + case BTRFS_QUOTA_CTL_ENABLE: + ret = btrfs_quota_enable(trans, root->fs_info); + break; + case BTRFS_QUOTA_CTL_DISABLE: + ret = btrfs_quota_disable(trans, root->fs_info); + break; + case BTRFS_QUOTA_CTL_RESCAN: + ret = btrfs_quota_rescan(root->fs_info); + break; + default: + ret = -EINVAL; + break; + } + + if (copy_to_user(arg, sa, sizeof(*sa))) + ret = -EFAULT; + + if (trans) { + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; + } + +out: + kfree(sa); + return ret; +} + +static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_qgroup_assign_args *sa; + struct btrfs_trans_handle *trans; + int ret; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + /* FIXME: check if the IDs really exist */ + if (sa->assign) { + ret = btrfs_add_qgroup_relation(trans, root->fs_info, + sa->src, sa->dst); + } else { + ret = btrfs_del_qgroup_relation(trans, root->fs_info, + sa->src, sa->dst); + } + + err = btrfs_end_transaction(trans, root); + if (err && !ret) + ret = err; + +out: + kfree(sa); + return ret; +} + +static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_qgroup_create_args *sa; + struct btrfs_trans_handle *trans; + int ret; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + /* FIXME: check if the IDs really exist */ + if (sa->create) { + ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, + NULL); + } else { + ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); + } + + err = btrfs_end_transaction(trans, root); + if (err && !ret) + ret = err; + +out: + kfree(sa); + return ret; +} + +static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_qgroup_limit_args *sa; + struct btrfs_trans_handle *trans; + int ret; + int err; + u64 qgroupid; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + qgroupid = sa->qgroupid; + if (!qgroupid) { + /* take the current subvol as qgroup */ + qgroupid = root->root_key.objectid; + } + + /* FIXME: check if the IDs really exist */ + ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); + + err = btrfs_end_transaction(trans, root); + if (err && !ret) + ret = err; + +out: + kfree(sa); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3476,6 +3653,14 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_dev_stats(root, argp, 0); case BTRFS_IOC_GET_AND_RESET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp, 1); + case BTRFS_IOC_QUOTA_CTL: + return btrfs_ioctl_quota_ctl(root, argp); + case BTRFS_IOC_QGROUP_ASSIGN: + return btrfs_ioctl_qgroup_assign(root, argp); + case BTRFS_IOC_QGROUP_CREATE: + return btrfs_ioctl_qgroup_create(root, argp); + case BTRFS_IOC_QGROUP_LIMIT: + return btrfs_ioctl_qgroup_limit(root, argp); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index a8a2230f4c5c..9dd50c4656b3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -319,6 +319,25 @@ struct btrfs_ioctl_get_dev_stats { __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ }; +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -388,4 +407,12 @@ struct btrfs_ioctl_get_dev_stats { #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) #endif -- cgit v1.2.3 From 6f72c7e20dbaea55f04546de69586c84a3654503 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 15:58:21 +0200 Subject: Btrfs: add qgroup inheritance When creating a subvolume or snapshot, it is necessary to initialize the qgroup account with a copy of some other (tracking) qgroup. This patch adds parameters to the ioctls to pass the information from which qgroup to inherit. Signed-off-by: Arne Jansen --- fs/btrfs/ioctl.c | 59 +++++++++++++++++++++++++++++++++++--------------- fs/btrfs/ioctl.h | 11 +++++++++- fs/btrfs/transaction.c | 8 +++++++ fs/btrfs/transaction.h | 1 + 4 files changed, 61 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55a7283a9e18..1dffd0adf975 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -336,7 +336,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, - u64 *async_transid) + u64 *async_transid, + struct btrfs_qgroup_inherit **inherit) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -368,6 +369,11 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); + ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, + inherit ? *inherit : NULL); + if (ret) + goto fail; + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { @@ -484,7 +490,7 @@ fail: static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - bool readonly) + bool readonly, struct btrfs_qgroup_inherit **inherit) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -502,6 +508,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot->dentry = dentry; pending_snapshot->root = root; pending_snapshot->readonly = readonly; + if (inherit) { + pending_snapshot->inherit = *inherit; + *inherit = NULL; /* take responsibility to free it */ + } trans = btrfs_start_transaction(root->fs_info->extent_root, 5); if (IS_ERR(trans)) { @@ -635,7 +645,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, - u64 *async_transid, bool readonly) + u64 *async_transid, bool readonly, + struct btrfs_qgroup_inherit **inherit) { struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; @@ -666,11 +677,11 @@ static noinline int btrfs_mksubvol(struct path *parent, goto out_up_read; if (snap_src) { - error = create_snapshot(snap_src, dentry, - name, namelen, async_transid, readonly); + error = create_snapshot(snap_src, dentry, name, namelen, + async_transid, readonly, inherit); } else { error = create_subvol(BTRFS_I(dir)->root, dentry, - name, namelen, async_transid); + name, namelen, async_transid, inherit); } if (!error) fsnotify_mkdir(dir, dentry); @@ -1379,11 +1390,9 @@ out: } static noinline int btrfs_ioctl_snap_create_transid(struct file *file, - char *name, - unsigned long fd, - int subvol, - u64 *transid, - bool readonly) + char *name, unsigned long fd, int subvol, + u64 *transid, bool readonly, + struct btrfs_qgroup_inherit **inherit) { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct file *src_file; @@ -1407,7 +1416,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, - NULL, transid, readonly); + NULL, transid, readonly, inherit); } else { struct inode *src_inode; src_file = fget(fd); @@ -1426,7 +1435,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, } ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, - transid, readonly); + transid, readonly, inherit); fput(src_file); } out: @@ -1446,7 +1455,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, vol_args->fd, subvol, - NULL, false); + NULL, false, NULL); kfree(vol_args); return ret; @@ -1460,6 +1469,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, u64 transid = 0; u64 *ptr = NULL; bool readonly = false; + struct btrfs_qgroup_inherit *inherit = NULL; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) @@ -1467,7 +1477,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; if (vol_args->flags & - ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { + ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | + BTRFS_SUBVOL_QGROUP_INHERIT)) { ret = -EOPNOTSUPP; goto out; } @@ -1476,10 +1487,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ptr = &transid; if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; + if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { + if (vol_args->size > PAGE_CACHE_SIZE) { + ret = -EINVAL; + goto out; + } + inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); + if (IS_ERR(inherit)) { + ret = PTR_ERR(inherit); + goto out; + } + } ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, - vol_args->fd, subvol, - ptr, readonly); + vol_args->fd, subvol, ptr, + readonly, &inherit); if (ret == 0 && ptr && copy_to_user(arg + @@ -1488,6 +1510,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = -EFAULT; out: kfree(vol_args); + kfree(inherit); return ret; } @@ -3588,6 +3611,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create_v2(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); + case BTRFS_IOC_SUBVOL_CREATE_V2: + return btrfs_ioctl_snap_create_v2(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_SUBVOL_GETFLAGS: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 9dd50c4656b3..cdda57f1c240 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -32,6 +32,7 @@ struct btrfs_ioctl_vol_args { #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) #define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 @@ -64,7 +65,13 @@ struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; __u64 flags; - __u64 unused[4]; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; char name[BTRFS_SUBVOL_NAME_MAX + 1]; }; @@ -382,6 +389,8 @@ struct btrfs_ioctl_qgroup_create_args { #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f1e29fbd5317..127283913a42 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -990,6 +990,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } } + ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, + objectid, pending->inherit); + kfree(pending->inherit); + if (ret) { + pending->error = ret; + goto fail; + } + key.objectid = objectid; key.offset = (u64)-1; key.type = BTRFS_ROOT_ITEM_KEY; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2759e0572c5c..cca315dcdfcd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,6 +73,7 @@ struct btrfs_pending_snapshot { struct dentry *dentry; struct btrfs_root *root; struct btrfs_root *snap; + struct btrfs_qgroup_inherit *inherit; /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; /* extra metadata reseration for relocation */ -- cgit v1.2.3 From 10983f2e8dc65d118371681548809109b570b63b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 11 Jul 2012 15:26:19 +0800 Subject: Btrfs: fix typo in convert_extent_bit It should be convert_extent_bit. Signed-off-by: Liu Bo Signed-off-by: Jiri Kosina --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9018a05036e..97f6703fd493 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -933,7 +933,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, /** - * convert_extent - convert all bits in a given range from one bit to another + * convert_extent_bit - convert all bits in a given range from one bit to + * another * @tree: the io tree to search * @start: the start offset in bytes * @end: the end offset in bytes (inclusive) -- cgit v1.2.3 From fe685aabf7c8c9f138e5ea900954d295bf229175 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Jul 2012 08:46:54 +0200 Subject: isofs: avoid info leak on export For type 1 the parent_offset member in struct isofs_fid gets copied uninitialized to userland. Fix this by initializing it to 0. Signed-off-by: Mathias Krause Signed-off-by: Jan Kara --- fs/isofs/export.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/isofs/export.c b/fs/isofs/export.c index aa4356d09eee..1d3804492aa7 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -134,6 +134,7 @@ isofs_export_encode_fh(struct inode *inode, len = 3; fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ + fh16[3] = 0; /* avoid leaking uninitialized data */ fh32[2] = inode->i_generation; if (parent) { struct iso_inode_info *eparent; -- cgit v1.2.3 From 0143fc5e9f6f5aad4764801015bc8d4b4a278200 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Jul 2012 08:46:55 +0200 Subject: udf: avoid info leak on export For type 0x51 the udf.parent_partref member in struct fid gets copied uninitialized to userland. Fix this by initializing it to 0. Signed-off-by: Mathias Krause Signed-off-by: Jan Kara --- fs/udf/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 18024178ac4c..c31deb382afd 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1279,6 +1279,7 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, *lenp = 3; fid->udf.block = location.logicalBlockNum; fid->udf.partref = location.partitionReferenceNum; + fid->udf.parent_partref = 0; fid->udf.generation = inode->i_generation; if (parent) { -- cgit v1.2.3 From 1f432a887e9a5a5c25be6ac72b5da13652c8bed3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 12 Jul 2012 07:40:42 +1000 Subject: xfs: really fix the cursor leak in xfs_alloc_ag_vextent_near The current cursor is reallocated when retrying the allocation, so the existing cursor needs to be destroyed in both the restart and the failure cases. Signed-off-by: Dave Chinner Tested-by: Mike Snitzer Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9d1aeb7e2734..f654f51b0c67 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1074,13 +1074,13 @@ restart: * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + if (!forced++) { trace_xfs_alloc_near_busy(args); xfs_log_force(args->mp, XFS_LOG_SYNC); goto restart; } - - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; -- cgit v1.2.3 From eb71a12e411fe065f8663e12a8d81d561f9502ee Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 12 Jul 2012 07:40:43 +1000 Subject: xfs: don't defer metadata allocation to the workqueue Almost all metadata allocations come from shallow stack usage situations. Avoid the overhead of switching the allocation to a workqueue as we are not in danger of running out of stack when making these allocations. Metadata allocations are already marked through the args that are passed down, so this is trivial to do. Signed-off-by: Dave Chinner Reported-by: Mel Gorman Tested-by: Mel Gorman Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index f654f51b0c67..4f33c32affe3 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker( current_restore_flags_nested(&pflags, PF_FSTRANS); } - -int /* error */ +/* + * Data allocation requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. Metadata + * requests, OTOH, are generally from low stack usage paths, so avoid the + * context switch overhead here. + */ +int xfs_alloc_vextent( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) { DECLARE_COMPLETION_ONSTACK(done); + if (!args->userdata) + return __xfs_alloc_vextent(args); + + args->done = &done; INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); queue_work(xfs_alloc_wq, &args->work); -- cgit v1.2.3 From 08023d6dbe840dc4271805a9ea376fcbdee9f744 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Jul 2012 06:00:04 -0400 Subject: xfs: prevent recursion in xfs_buf_iorequest If the b_iodone handler is run in calling context in xfs_buf_iorequest we can run into a recursion where xfs_buf_iodone_callbacks keeps calling back into xfs_buf_iorequest because an I/O error happened, which keeps calling back into xfs_buf_iorequest. This chain will usually not take long because the filesystem gets shut down because of log I/O errors, but even over a short time it can cause stack overflows if run on the same context. As a short term workaround make sure we always call the iodone handler in workqueue context. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 39c5d7622dec..d1edfa1a8112 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1355,7 +1355,7 @@ xfs_buf_iorequest( */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); + _xfs_buf_ioend(bp, 1); xfs_buf_rele(bp); } -- cgit v1.2.3 From a2dcf5df5f3813a44423d4a5026666e751ec00dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jul 2012 02:24:10 -0400 Subject: xfs: do not call xfs_bdstrat_cb in xfs_buf_iodone_callbacks xfs_bdstrat_cb only adds a check for a shutdown filesystem over xfs_buf_iorequest, but xfs_buf_iodone_callbacks just checked for a shut down filesystem a little earlier. In addition the shutdown handling in xfs_bdstrat_cb is not very suitable for this caller. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 51 ++++++++++++++++++++++----------------------------- fs/xfs/xfs_buf.h | 1 - fs/xfs/xfs_buf_item.c | 2 +- 3 files changed, 23 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1edfa1a8112..d7a9dd735e1e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1049,27 +1049,6 @@ xfs_buf_ioerror_alert( (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); } -int -xfs_bwrite( - struct xfs_buf *bp) -{ - int error; - - ASSERT(xfs_buf_islocked(bp)); - - bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); - - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); - if (error) { - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); - } - return error; -} - /* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call xfs_buf_ioend @@ -1139,14 +1118,7 @@ xfs_bioerror_relse( return EIO; } - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int +STATIC int xfs_bdstrat_cb( struct xfs_buf *bp) { @@ -1167,6 +1139,27 @@ xfs_bdstrat_cb( return 0; } +int +xfs_bwrite( + struct xfs_buf *bp) +{ + int error; + + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + + xfs_bdstrat_cb(bp); + + error = xfs_buf_iowait(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } + return error; +} + /* * Wrapper around bdstrat so that we can stop data from going to disk in case * we are shutting down the filesystem. Typically user data goes thru this diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index aa96bd410aed..d03b73b9604e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -250,7 +250,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); extern int xfs_bwrite(struct xfs_buf *bp); extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e4a6e4b6fa03..a8d0ed911196 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1101,7 +1101,7 @@ xfs_buf_iodone_callbacks( if (!XFS_BUF_ISSTALE(bp)) { bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; - xfs_bdstrat_cb(bp); + xfs_buf_iorequest(bp); } else { xfs_buf_relse(bp); } -- cgit v1.2.3 From 821f7494a77627fb1ab539591c57b22cdca702d6 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 3 Jul 2012 16:50:57 -0700 Subject: eCryptfs: Revert to a writethrough cache model A change was made about a year ago to get eCryptfs to better utilize its page cache during writes. The idea was to do the page encryption operations during page writeback, rather than doing them when initially writing into the page cache, to reduce the number of page encryption operations during sequential writes. This meant that the encrypted page would only be written to the lower filesystem during page writeback, which was a change from how eCryptfs had previously wrote to the lower filesystem in ecryptfs_write_end(). The change caused a few eCryptfs-internal bugs that were shook out. Unfortunately, more grave side effects have been identified that will force changes outside of eCryptfs. Because the lower filesystem isn't consulted until page writeback, eCryptfs has no way to pass lower write errors (ENOSPC, mainly) back to userspace. Additionaly, it was reported that quotas could be bypassed because of the way eCryptfs may sometimes open the lower filesystem using a privileged kthread. It would be nice to resolve the latest issues, but it is best if the eCryptfs commits be reverted to the old behavior in the meantime. This reverts: 32001d6f "eCryptfs: Flush file in vma close" 5be79de2 "eCryptfs: Flush dirty pages in setattr" 57db4e8d "ecryptfs: modify write path to encrypt page in writepage" Signed-off-by: Tyler Hicks Tested-by: Colin King Cc: Colin King Cc: Thieu Le --- fs/ecryptfs/file.c | 33 ++------------------------------- fs/ecryptfs/inode.c | 6 ------ fs/ecryptfs/mmap.c | 39 +++++++++++++-------------------------- 3 files changed, 15 insertions(+), 63 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index baf8b0550391..44ce5c6a541d 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -138,27 +138,6 @@ out: return rc; } -static void ecryptfs_vma_close(struct vm_area_struct *vma) -{ - filemap_write_and_wait(vma->vm_file->f_mapping); -} - -static const struct vm_operations_struct ecryptfs_file_vm_ops = { - .close = ecryptfs_vma_close, - .fault = filemap_fault, -}; - -static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma) -{ - int rc; - - rc = generic_file_mmap(file, vma); - if (!rc) - vma->vm_ops = &ecryptfs_file_vm_ops; - - return rc; -} - struct kmem_cache *ecryptfs_file_info_cache; static int read_or_initialize_metadata(struct dentry *dentry) @@ -311,15 +290,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) static int ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int rc = 0; - - rc = generic_file_fsync(file, start, end, datasync); - if (rc) - goto out; - rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end, - datasync); -out: - return rc; + return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } static int ecryptfs_fasync(int fd, struct file *file, int flag) @@ -388,7 +359,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = ecryptfs_file_mmap, + .mmap = generic_file_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 2d4143f8f5c9..769fb8524aeb 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -981,12 +981,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) goto out; } - if (S_ISREG(inode->i_mode)) { - rc = filemap_write_and_wait(inode->i_mapping); - if (rc) - goto out; - fsstack_copy_attr_all(inode, lower_inode); - } memcpy(&lower_ia, ia, sizeof(lower_ia)); if (ia->ia_valid & ATTR_FILE) lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index a46b3a8fee1e..bd1d57f98f74 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -66,18 +66,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) { int rc; - /* - * Refuse to write the page out if we are called from reclaim context - * since our writepage() path may potentially allocate memory when - * calling into the lower fs vfs_write() which may in turn invoke - * us again. - */ - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - rc = 0; - goto out; - } - rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " @@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file, struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; int rc; - int need_unlock_page = 1; ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); @@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file, "zeros in page with index = [0x%.16lx]\n", index); goto out; } - set_page_dirty(page); - unlock_page(page); - need_unlock_page = 0; + rc = ecryptfs_encrypt_page(page); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " + "index [0x%.16lx])\n", index); + goto out; + } if (pos + copied > i_size_read(ecryptfs_inode)) { i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " "[0x%.16llx]\n", (unsigned long long)i_size_read(ecryptfs_inode)); - balance_dirty_pages_ratelimited(mapping); - rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); - if (rc) { - printk(KERN_ERR "Error writing inode size to metadata; " - "rc = [%d]\n", rc); - goto out; - } } - rc = copied; + rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); + if (rc) + printk(KERN_ERR "Error writing inode size to metadata; " + "rc = [%d]\n", rc); + else + rc = copied; out: - if (need_unlock_page) - unlock_page(page); + unlock_page(page); page_cache_release(page); return rc; } -- cgit v1.2.3 From 5f5b331d5c21228a6519dcb793fc1629646c51a6 Mon Sep 17 00:00:00 2001 From: Tim Sally Date: Thu, 12 Jul 2012 19:10:24 -0400 Subject: eCryptfs: check for eCryptfs cipher support at mount The issue occurs when eCryptfs is mounted with a cipher supported by the crypto subsystem but not by eCryptfs. The mount succeeds and an error does not occur until a write. This change checks for eCryptfs cipher support at mount time. Resolves Launchpad issue #338914, reported by Tyler Hicks in 03/2009. https://bugs.launchpad.net/ecryptfs/+bug/338914 Signed-off-by: Tim Sally Signed-off-by: Tyler Hicks --- fs/ecryptfs/main.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index df217dc9f1d9..923f6aa09cb3 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -279,6 +279,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *fnek_src; char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + u8 cipher_code; *check_ruid = 0; @@ -420,6 +421,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; + + cipher_code = ecryptfs_code_for_cipher_string( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (!cipher_code) { + ecryptfs_printk(KERN_ERR, + "eCryptfs doesn't support cipher: %s", + mount_crypt_stat->global_default_cipher_name); + rc = -EINVAL; + goto out; + } + mutex_lock(&key_tfm_list_mutex); if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, NULL)) { -- cgit v1.2.3 From bc86256d2e80e6731a2055175d9a32cf96eb71f8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:51 +0300 Subject: affs: stop setting bm_flags AFFS stores values '1' and '2' in 'bm_flags', and I fail to see any logic when it prefers one or another. AFFS writes '1' only from '->put_super()', while '->sync_fs()' and '->write_super()' store value '2'. So on the first glance, it looks like we want to have '1' if we unmount. However, this does not really happen in these cases: 1. superblock is written via 'write_super()' then we unmount; 2. we re-mount R/O, then unmount. which are quite typical. I could not find good documentation describing this field, except of one random piece of documentation in the internet which says that -1 means that the root block is valid, which is not consistent with what we have in the Linux AFFS driver. Jan Kara commented on this: "I have some vague recollection that on Amiga boolean was usually encoded as: 0 == false, ~0 == -1 == true. But it has been ages..." Thus, my conclusion is that value of '1' is as good as value of '2' and we can just always use '2'. An Jan Kara suggested to go further: "generally bm_flags handling looks strange. If they are 0, we mount fs read only and thus cannot change them. If they are != 0, we write 2 there. So IMHO if you just removed bm_flags setting, nothing will really happen." So this patch removes the bm_flags setting completely. This makes the "clean" argument of the 'affs_commit_super()' function unneeded, so it is also removed. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 0782653a05a2..1d42e468abdd 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -25,13 +25,12 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void -affs_commit_super(struct super_block *sb, int wait, int clean) +affs_commit_super(struct super_block *sb, int wait) { struct affs_sb_info *sbi = AFFS_SB(sb); struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); - tail->bm_flag = cpu_to_be32(clean); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); mark_buffer_dirty(bh); @@ -46,7 +45,7 @@ affs_put_super(struct super_block *sb) pr_debug("AFFS: put_super()\n"); if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) - affs_commit_super(sb, 1, 1); + affs_commit_super(sb, 1); kfree(sbi->s_prefix); affs_free_bitmap(sb); @@ -60,7 +59,7 @@ affs_write_super(struct super_block *sb) { lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 1, 2); + affs_commit_super(sb, 1); sb->s_dirt = 0; unlock_super(sb); @@ -71,7 +70,7 @@ static int affs_sync_fs(struct super_block *sb, int wait) { lock_super(sb); - affs_commit_super(sb, wait, 2); + affs_commit_super(sb, wait); sb->s_dirt = 0; unlock_super(sb); return 0; -- cgit v1.2.3 From c9753b1d20e13c94d15a1c8b252a696744bd22a2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:52 +0300 Subject: affs: remove useless superblock writeout on unmount We do not need to write out the superblock from '->put_super()' because VFS has already called '->sync_fs()' by this time and the superblock has already been written out. Thus, remove the 'affs_commit_super()' infocation from 'affs_put_super()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 1d42e468abdd..12b4f58081b4 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -44,9 +44,6 @@ affs_put_super(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); - if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) - affs_commit_super(sb, 1); - kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); -- cgit v1.2.3 From 0164b1a32e6849121ea73ef3124a2994951a4713 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:53 +0300 Subject: affs: remove useless superblock writeout on remount We do not need to write out the superblock from '->remount_fs()' because VFS has already called '->sync_fs()' by this time and the superblock has already been written out. Thus, remove the 'affs_write_super()' infocation from 'affs_remount()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 12b4f58081b4..c837e43687a6 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -545,10 +545,9 @@ affs_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; - if (*flags & MS_RDONLY) { - affs_write_super(sb); + if (*flags & MS_RDONLY) affs_free_bitmap(sb); - } else + else res = affs_init_bitmap(sb, flags); return res; -- cgit v1.2.3 From e0471c8d8abbc2b07fc82f7b02896d1637909319 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:54 +0300 Subject: affs: re-structure superblock locking a bit AFFS wants to serialize the superblock (the root block in AFFS terms) updates and uses 'lock_super()/unlock_super()' for these purposes. This patch pushes the locking down to the 'affs_commit_super()' from the callers. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index c837e43687a6..4ceec5636434 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -31,11 +31,13 @@ affs_commit_super(struct super_block *sb, int wait) struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); + lock_super(sb); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); mark_buffer_dirty(bh); if (wait) sync_dirty_buffer(bh); + unlock_super(sb); } static void @@ -54,22 +56,17 @@ affs_put_super(struct super_block *sb) static void affs_write_super(struct super_block *sb) { - lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) affs_commit_super(sb, 1); sb->s_dirt = 0; - unlock_super(sb); - pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); } static int affs_sync_fs(struct super_block *sb, int wait) { - lock_super(sb); affs_commit_super(sb, wait); sb->s_dirt = 0; - unlock_super(sb); return 0; } -- cgit v1.2.3 From a837107439ea50116e59943556d6902c09e52772 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:55 +0300 Subject: affs: stop using lock_super The VFS's 'lock_super()' and 'unlock_super()' calls are deprecated and unwanted and just wait for a brave knight who'd kill them. This patch makes AFFS stop using them and use the buffer-head's own lock instead. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 4ceec5636434..da7498da80a6 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -31,13 +31,14 @@ affs_commit_super(struct super_block *sb, int wait) struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); - lock_super(sb); + lock_buffer(bh); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); if (wait) sync_dirty_buffer(bh); - unlock_super(sb); } static void -- cgit v1.2.3 From a215fef7edfdcd8948037ceb3060b9ae7ebcef8b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:56 +0300 Subject: affs: introduce VFS superblock object back-reference Add an 'sb' VFS superblock back-reference to the 'struct affs_sb_info' data structure - we will need to find the VFS superblock from a 'struct affs_sb_info' object in the next patch, so this change is jut a preparation. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/affs.h | 1 + fs/affs/super.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 1fceb320d2f2..5a726e992245 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -100,6 +100,7 @@ struct affs_sb_info { char *s_prefix; /* Prefix for volumes and assigns. */ char s_volume[32]; /* Volume prefix for absolute symlinks. */ spinlock_t symlink_lock; /* protects the previous two */ + struct super_block *sb; /* the VFS superblock object */ }; #define SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/super.c b/fs/affs/super.c index da7498da80a6..0496cbbeda1b 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -299,6 +299,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; + sbi->sb = sb; mutex_init(&sbi->s_bmlock); spin_lock_init(&sbi->symlink_lock); -- cgit v1.2.3 From 3dd847820d138c9d60764b0e920380373285ff10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:57 +0300 Subject: affs: get rid of affs_sync_super This patch makes affs stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/affs.h | 6 ++++++ fs/affs/bitmap.c | 4 ++-- fs/affs/super.c | 48 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 5a726e992245..3a130e27eb15 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -3,6 +3,7 @@ #include #include #include +#include /* AmigaOS allows file names with up to 30 characters length. * Names longer than that will be silently truncated. If you @@ -101,6 +102,9 @@ struct affs_sb_info { char s_volume[32]; /* Volume prefix for absolute symlinks. */ spinlock_t symlink_lock; /* protects the previous two */ struct super_block *sb; /* the VFS superblock object */ + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work sb_work; /* superblock flush delayed work */ + spinlock_t work_lock; /* protects sb_work and work_queued */ }; #define SF_INTL 0x0001 /* International filesystem. */ @@ -121,6 +125,8 @@ static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) return sb->s_fs_info; } +void affs_mark_sb_dirty(struct super_block *sb); + /* amigaffs.c */ extern int affs_insert_hash(struct inode *inode, struct buffer_head *bh); diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 3e262711ae06..6e0be43ef6ef 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -103,7 +103,7 @@ affs_free_block(struct super_block *sb, u32 block) *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask); mark_buffer_dirty(bh); - sb->s_dirt = 1; + affs_mark_sb_dirty(sb); bm->bm_free++; mutex_unlock(&sbi->s_bmlock); @@ -248,7 +248,7 @@ find_bit: *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask); mark_buffer_dirty(bh); - sb->s_dirt = 1; + affs_mark_sb_dirty(sb); mutex_unlock(&sbi->s_bmlock); diff --git a/fs/affs/super.c b/fs/affs/super.c index 0496cbbeda1b..c70f1e5fc024 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "affs.h" extern struct timezone sys_tz; @@ -47,6 +48,7 @@ affs_put_super(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); + cancel_delayed_work_sync(&sbi->sb_work); kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); @@ -54,23 +56,45 @@ affs_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -static void -affs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 1); - sb->s_dirt = 0; - pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); -} - static int affs_sync_fs(struct super_block *sb, int wait) { affs_commit_super(sb, wait); - sb->s_dirt = 0; return 0; } +static void flush_superblock(struct work_struct *work) +{ + struct affs_sb_info *sbi; + struct super_block *sb; + + sbi = container_of(work, struct affs_sb_info, sb_work.work); + sb = sbi->sb; + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + affs_commit_super(sb, 1); +} + +void affs_mark_sb_dirty(struct super_block *sb) +{ + struct affs_sb_info *sbi = AFFS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sb_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); +} + static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) @@ -132,7 +156,6 @@ static const struct super_operations affs_sops = { .write_inode = affs_write_inode, .evict_inode = affs_evict_inode, .put_super = affs_put_super, - .write_super = affs_write_super, .sync_fs = affs_sync_fs, .statfs = affs_statfs, .remount_fs = affs_remount, @@ -302,6 +325,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sbi->sb = sb; mutex_init(&sbi->s_bmlock); spin_lock_init(&sbi->symlink_lock); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock); if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, @@ -526,6 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; } + flush_delayed_work_sync(&sbi->sb_work); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; -- cgit v1.2.3 From d187663ef24cd3d033f0cbf2867e70b36a3a90b8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 7 Jun 2012 15:45:00 -0700 Subject: fs/direct-io.c: adjust suspicious bit operation READ is 0, so the result of the bit-and operation is 0. Rewrite with == as done elsewhere in the same file. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Reviewed-by: Jeff Moyer Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/direct-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 0c85fae37666..1faf4cb56f39 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1258,7 +1258,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, */ BUG_ON(retval == -EIOCBQUEUED); if (dio->is_async && retval == 0 && dio->result && - ((rw & READ) || (dio->result == sdio.size))) + ((rw == READ) || (dio->result == sdio.size))) retval = -EIOCBQUEUED; if (retval != -EIOCBQUEUED) -- cgit v1.2.3 From f7a99c5b7c8bd3d3f533c8b38274e33f3da9096e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 00:59:08 -0400 Subject: get rid of ->mnt_longterm it's enough to set ->mnt_ns of internal vfsmounts to something distinct from all struct mnt_namespace out there; then we can just use the check for ->mnt_ns != NULL in the fast path of mntput_no_expire() Signed-off-by: Al Viro --- fs/dcache.c | 2 +- fs/fs_struct.c | 32 ++++++++++---------------------- fs/internal.h | 2 -- fs/mount.h | 9 ++++++++- fs/namespace.c | 53 +++++++---------------------------------------------- 5 files changed, 26 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 40469044088d..44acb5b29ae4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2622,7 +2622,7 @@ global_root: if (!slash) error = prepend(buffer, buflen, "/", 1); if (!error) - error = real_mount(vfsmnt)->mnt_ns ? 1 : 2; + error = is_mounted(vfsmnt) ? 1 : 2; goto out; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index e159e682ad4c..5df4775fea03 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -6,18 +6,6 @@ #include #include "internal.h" -static inline void path_get_longterm(struct path *path) -{ - path_get(path); - mnt_make_longterm(path->mnt); -} - -static inline void path_put_longterm(struct path *path) -{ - mnt_make_shortterm(path->mnt); - path_put(path); -} - /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. @@ -26,7 +14,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - path_get_longterm(path); + path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_root = fs->root; @@ -34,7 +22,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put_longterm(&old_root); + path_put(&old_root); } /* @@ -45,7 +33,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - path_get_longterm(path); + path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; @@ -54,7 +42,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put_longterm(&old_pwd); + path_put(&old_pwd); } static inline int replace_path(struct path *p, const struct path *old, const struct path *new) @@ -84,7 +72,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_end(&fs->seq); while (hits--) { count++; - path_get_longterm(new_root); + path_get(new_root); } spin_unlock(&fs->lock); } @@ -92,13 +80,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put_longterm(old_root); + path_put(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put_longterm(&fs->root); - path_put_longterm(&fs->pwd); + path_put(&fs->root); + path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -132,9 +120,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock(&old->lock); fs->root = old->root; - path_get_longterm(&fs->root); + path_get(&fs->root); fs->pwd = old->pwd; - path_get_longterm(&fs->pwd); + path_get(&fs->pwd); spin_unlock(&old->lock); } return fs; diff --git a/fs/internal.h b/fs/internal.h index 18bc216ea09d..d2a23ff61b40 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -50,8 +50,6 @@ extern int copy_mount_string(const void __user *, char **); extern struct vfsmount *lookup_mnt(struct path *); extern int finish_automount(struct vfsmount *, struct path *); -extern void mnt_make_longterm(struct vfsmount *); -extern void mnt_make_shortterm(struct vfsmount *); extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); diff --git a/fs/mount.h b/fs/mount.h index 4ef36d93e5a2..05a2a1185efc 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -22,7 +22,6 @@ struct mount { struct vfsmount mnt; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longterm; /* how many of the refs are longterm */ #else int mnt_count; int mnt_writers; @@ -49,6 +48,8 @@ struct mount { int mnt_ghosts; }; +#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ + static inline struct mount *real_mount(struct vfsmount *mnt) { return container_of(mnt, struct mount, mnt); @@ -59,6 +60,12 @@ static inline int mnt_has_parent(struct mount *mnt) return mnt != mnt->mnt_parent; } +static inline int is_mounted(struct vfsmount *mnt) +{ + /* neither detached nor internal? */ + return !IS_ERR_OR_NULL(real_mount(mnt)); +} + extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); static inline void get_mnt_ns(struct mnt_namespace *ns) diff --git a/fs/namespace.c b/fs/namespace.c index 1e4a5fe3d7b7..a524ea4dbd80 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -621,21 +621,6 @@ static void attach_mnt(struct mount *mnt, struct path *path) list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); } -static inline void __mnt_make_longterm(struct mount *mnt) -{ -#ifdef CONFIG_SMP - atomic_inc(&mnt->mnt_longterm); -#endif -} - -/* needs vfsmount lock for write */ -static inline void __mnt_make_shortterm(struct mount *mnt) -{ -#ifdef CONFIG_SMP - atomic_dec(&mnt->mnt_longterm); -#endif -} - /* * vfsmount lock must be held for write */ @@ -649,10 +634,8 @@ static void commit_tree(struct mount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) { + list_for_each_entry(m, &head, mnt_list) m->mnt_ns = n; - __mnt_make_longterm(m); - } list_splice(&head, n->list.prev); @@ -804,7 +787,8 @@ static void mntput_no_expire(struct mount *mnt) put_again: #ifdef CONFIG_SMP br_read_lock(&vfsmount_lock); - if (likely(atomic_read(&mnt->mnt_longterm))) { + if (likely(mnt->mnt_ns)) { + /* shouldn't be the last one */ mnt_add_count(mnt, -1); br_read_unlock(&vfsmount_lock); return; @@ -1074,8 +1058,6 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); - if (p->mnt_ns) - __mnt_make_shortterm(p); p->mnt_ns = NULL; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { @@ -2209,23 +2191,6 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } -void mnt_make_longterm(struct vfsmount *mnt) -{ - __mnt_make_longterm(real_mount(mnt)); -} - -void mnt_make_shortterm(struct vfsmount *m) -{ -#ifdef CONFIG_SMP - struct mount *mnt = real_mount(m); - if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) - return; - br_write_lock(&vfsmount_lock); - atomic_dec(&mnt->mnt_longterm); - br_write_unlock(&vfsmount_lock); -#endif -} - /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2265,18 +2230,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new; while (p) { q->mnt_ns = new_ns; - __mnt_make_longterm(q); if (fs) { if (&p->mnt == fs->root.mnt) { fs->root.mnt = mntget(&q->mnt); - __mnt_make_longterm(q); - mnt_make_shortterm(&p->mnt); rootmnt = &p->mnt; } if (&p->mnt == fs->pwd.mnt) { fs->pwd.mnt = mntget(&q->mnt); - __mnt_make_longterm(q); - mnt_make_shortterm(&p->mnt); pwdmnt = &p->mnt; } } @@ -2320,7 +2280,6 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) if (!IS_ERR(new_ns)) { struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; - __mnt_make_longterm(mnt); new_ns->root = mnt; list_add(&new_ns->list, &mnt->mnt_list); } else { @@ -2615,7 +2574,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) * it is a longterm mount, don't release mnt until * we unmount before file sys is unregistered */ - mnt_make_longterm(mnt); + real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; } return mnt; } @@ -2625,7 +2584,9 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { - mnt_make_shortterm(mnt); + br_write_lock(&vfsmount_lock); + real_mount(mnt)->mnt_ns = NULL; + br_write_unlock(&vfsmount_lock); mntput(mnt); } } -- cgit v1.2.3 From 6ce6e24e72233073c8ead9419fc5040d44803dae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 01:16:59 -0400 Subject: get rid of magic in proc_namespace.c don't rely on proc_mounts->m being the first field; container_of() is there for purpose. No need to bother with ->private, while we are at it - the same container_of will do nicely. Signed-off-by: Al Viro --- fs/mount.h | 4 +++- fs/namespace.c | 6 +++--- fs/proc_namespace.c | 7 +++---- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/mount.h b/fs/mount.h index 05a2a1185efc..4f291f9de641 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -74,10 +74,12 @@ static inline void get_mnt_ns(struct mnt_namespace *ns) } struct proc_mounts { - struct seq_file m; /* must be the first element */ + struct seq_file m; struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); }; +#define proc_mounts(p) (container_of((p), struct proc_mounts, m)) + extern const struct seq_operations mounts_op; diff --git a/fs/namespace.c b/fs/namespace.c index a524ea4dbd80..8f412abcb67f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -923,7 +923,7 @@ EXPORT_SYMBOL(replace_mount_options); /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); down_read(&namespace_sem); return seq_list_start(&p->ns->list, *pos); @@ -931,7 +931,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); return seq_list_next(v, &p->ns->list, pos); } @@ -943,7 +943,7 @@ static void m_stop(struct seq_file *m, void *v) static int m_show(struct seq_file *m, void *v) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); struct mount *r = list_entry(v, struct mount, mnt_list); return p->show(m, &r->mnt); } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 5e289a7cbad1..5fe34c355e85 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -17,7 +17,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) { - struct proc_mounts *p = file->private_data; + struct proc_mounts *p = proc_mounts(file->private_data); struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; @@ -121,7 +121,7 @@ out: static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) { - struct proc_mounts *p = m->private; + struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; @@ -268,7 +268,6 @@ static int mounts_open_common(struct inode *inode, struct file *file, if (ret) goto err_free; - p->m.private = p; p->ns = ns; p->root = root; p->m.poll_event = ns->event; @@ -288,7 +287,7 @@ static int mounts_open_common(struct inode *inode, struct file *file, static int mounts_release(struct inode *inode, struct file *file) { - struct proc_mounts *p = file->private_data; + struct proc_mounts *p = proc_mounts(file->private_data); path_put(&p->root); put_mnt_ns(p->ns); return seq_release(inode, file); -- cgit v1.2.3 From 63a44583f3a4408b902a3d7ba18b4ab13d1309ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:49:04 -0400 Subject: qnx6: don't bother with ->i_dentry in inode-freeing callback we'll initialize it in inode_init_always() when we allocate that object again. Signed-off-by: Al Viro --- fs/qnx6/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index e44012dc5645..2049c814bda4 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -622,7 +622,6 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb) static void qnx6_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode)); } -- cgit v1.2.3 From e6f9f8d0296aad7fbaf01de38ccaa1bf654bbda4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:50:36 -0400 Subject: cifs: don't bother with ->i_dentry in ->destroy_inode() Signed-off-by: Al Viro --- fs/cifs/cifsfs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8b6e344eb0ba..bcab12c87146 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -257,7 +257,6 @@ cifs_alloc_inode(struct super_block *sb) static void cifs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); } -- cgit v1.2.3 From 7968ce12e9645c5eb5bb3f4320e43c2e402d580c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:51:12 -0400 Subject: adfs: don't bother with ->i_dentry in ->destroy_inode() Signed-off-by: Al Viro --- fs/adfs/super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 06fdcc9382c4..bdaec92353c2 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -246,7 +246,6 @@ static struct inode *adfs_alloc_inode(struct super_block *sb) static void adfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -- cgit v1.2.3 From 3084ee95f08ce353ae26c18c7627c4e9786983ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:03:04 -0400 Subject: affs: get rid of open-coded list_for_each_entry() Signed-off-by: Al Viro --- fs/affs/amigaffs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 52a6407682e6..1c7fd7928d1f 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -126,18 +126,13 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) { struct inode *inode = dentry->d_inode; void *data = dentry->d_fsdata; - struct list_head *head, *next; spin_lock(&inode->i_lock); - head = &inode->i_dentry; - next = head->next; - while (next != head) { - dentry = list_entry(next, struct dentry, d_alias); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = data; break; } - next = next->next; } spin_unlock(&inode->i_lock); } -- cgit v1.2.3 From 12447c40394695c9a19920c65fea124bdf3ea034 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:06:09 -0400 Subject: affs: unobfuscate affs_fix_dcache() and add a comment on what it's doing Signed-off-by: Al Viro --- fs/affs/amigaffs.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 1c7fd7928d1f..843cdc994804 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -122,15 +122,13 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) } static void -affs_fix_dcache(struct dentry *dentry, u32 entry_ino) +affs_fix_dcache(struct inode *inode, u32 entry_ino) { - struct inode *inode = dentry->d_inode; - void *data = dentry->d_fsdata; - + struct dentry *dentry; spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { - dentry->d_fsdata = data; + dentry->d_fsdata = (void *)inode->i_ino; break; } } @@ -172,7 +170,11 @@ affs_remove_link(struct dentry *dentry) } affs_lock_dir(dir); - affs_fix_dcache(dentry, link_ino); + /* + * if there's a dentry for that block, make it + * refer to inode itself. + */ + affs_fix_dcache(inode, link_ino); retval = affs_remove_hash(dir, link_bh); if (retval) { affs_unlock_dir(dir); -- cgit v1.2.3 From a614a092bf28d58c742b9ec43209f3f78c3d9fb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:09:15 -0400 Subject: ocfs2: use list_for_each_entry in ocfs2_find_local_alias() Signed-off-by: Al Viro --- fs/ocfs2/dcache.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index e5ba34818332..a40edc1e1d86 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -170,13 +170,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { - struct list_head *p; - struct dentry *dentry = NULL; + struct dentry *dentry; spin_lock(&inode->i_lock); - list_for_each(p, &inode->i_dentry) { - dentry = list_entry(p, struct dentry, d_alias); - + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, @@ -184,16 +181,13 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, dget_dlock(dentry); spin_unlock(&dentry->d_lock); - break; + spin_unlock(&inode->i_lock); + return dentry; } spin_unlock(&dentry->d_lock); - - dentry = NULL; } - spin_unlock(&inode->i_lock); - - return dentry; + return NULL; } DEFINE_SPINLOCK(dentry_attach_lock); -- cgit v1.2.3 From 9f713878f22e0b2d34d62df0ca55f65166375634 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:19:12 -0400 Subject: ext4: get rid of open-coded d_find_any_alias() Signed-off-by: Al Viro --- fs/ext4/fsync.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index bb6c7d811313..4359a4d30069 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode) inode = igrab(inode); while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = NULL; - spin_lock(&inode->i_lock); - if (!list_empty(&inode->i_dentry)) { - dentry = list_first_entry(&inode->i_dentry, - struct dentry, d_alias); - dget(dentry); - } - spin_unlock(&inode->i_lock); + dentry = d_find_any_alias(inode); if (!dentry) break; next = igrab(dentry->d_parent->d_inode); -- cgit v1.2.3 From b3d9b7a3c752dc4b6976a4ff7b8298887a5b734d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:51:19 -0400 Subject: vfs: switch i_dentry/d_alias to hlist Signed-off-by: Al Viro --- fs/affs/amigaffs.c | 3 ++- fs/btrfs/inode.c | 2 +- fs/cifs/inode.c | 5 +++-- fs/dcache.c | 33 ++++++++++++++++++--------------- fs/exportfs/expfs.c | 3 ++- fs/ext4/fsync.c | 2 +- fs/fuse/dir.c | 2 +- fs/inode.c | 2 +- fs/nfs/getroot.c | 2 +- fs/notify/fsnotify.c | 3 ++- fs/ocfs2/dcache.c | 3 ++- include/linux/dcache.h | 2 +- include/linux/fs.h | 2 +- 13 files changed, 36 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 843cdc994804..eb82ee53ee0b 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,8 +125,9 @@ static void affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; + struct hlist_node *p; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7d1921ac76b..a101572f1cea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6987,7 +6987,7 @@ void btrfs_destroy_inode(struct inode *inode) struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(!hlist_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); WARN_ON(BTRFS_I(inode)->outstanding_extents); WARN_ON(BTRFS_I(inode)->reserved_extents); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 745da3d0653e..8e8bb49112ff 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -800,7 +800,7 @@ cifs_find_inode(struct inode *inode, void *opaque) return 0; /* if it's not a directory or has no dentries, then flag it */ - if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; return 1; @@ -825,9 +825,10 @@ static bool inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; + struct hlist_node *p; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/dcache.c b/fs/dcache.c index 44acb5b29ae4..015586f1ffc6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -218,7 +218,7 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!list_empty(&dentry->d_alias)); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); @@ -267,7 +267,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -291,7 +291,7 @@ static void dentry_unlink_inode(struct dentry * dentry) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -699,10 +699,11 @@ EXPORT_SYMBOL(dget_parent); static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; + struct hlist_node *p; again: discon_alias = NULL; - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -737,7 +738,7 @@ struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; - if (!list_empty(&inode->i_dentry)) { + if (!hlist_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); spin_unlock(&inode->i_lock); @@ -753,9 +754,10 @@ EXPORT_SYMBOL(d_find_alias); void d_prune_aliases(struct inode *inode) { struct dentry *dentry; + struct hlist_node *p; restart: spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -977,7 +979,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -1312,7 +1314,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); + INIT_HLIST_NODE(&dentry->d_alias); INIT_LIST_HEAD(&dentry->d_u.d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); @@ -1400,7 +1402,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) { if (unlikely(IS_AUTOMOUNT(inode))) dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - list_add(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_alias, &inode->i_dentry); } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); @@ -1425,7 +1427,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1458,13 +1460,14 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; + struct hlist_node *p; if (!inode) { __d_instantiate(entry, NULL); return NULL; } - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or @@ -1490,7 +1493,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1531,9 +1534,9 @@ static struct dentry * __d_find_any_alias(struct inode *inode) { struct dentry *alias; - if (list_empty(&inode->i_dentry)) + if (hlist_empty(&inode->i_dentry)) return NULL; - alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); __dget(alias); return alias; } @@ -1607,7 +1610,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - list_add(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b0201ca6e9c6..b42063cf1b2d 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -44,13 +44,14 @@ find_acceptable_alias(struct dentry *result, { struct dentry *dentry, *toput = NULL; struct inode *inode; + struct hlist_node *p; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 4359a4d30069..2a1dcea4f12e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -225,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!journal) { ret = __sync_inode(inode, datasync); - if (!ret && !list_empty(&inode->i_dentry)) + if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); goto out; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 334e0b18a014..f7543f72897e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -249,7 +249,7 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry, /* This tries to shrink the subtree below alias */ fuse_invalidate_entry(alias); dput(alias); - if (!list_empty(&inode->i_dentry)) + if (!hlist_empty(&inode->i_dentry)) return ERR_PTR(-EBUSY); } else { dput(alias); diff --git a/fs/inode.c b/fs/inode.c index c99163b1b310..775cbabd4fa5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -182,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) } inode->i_private = NULL; inode->i_mapping = mapping; - INIT_LIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ + INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ #ifdef CONFIG_FS_POSIX_ACL inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; #endif diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8abfb19bd3aa..a67990f90bd7 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - list_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b39c5c161adb..6baadb5a8430 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,6 +52,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; + struct hlist_node *p; int watched; if (!S_ISDIR(inode->i_mode)) @@ -63,7 +64,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index a40edc1e1d86..af4488268e49 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -170,10 +170,11 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { + struct hlist_node *p; struct dentry *dentry; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 094789ff3e9f..8ca255518204 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -128,7 +128,7 @@ struct dentry { struct rcu_head d_rcu; } d_u; struct list_head d_subdirs; /* our children */ - struct list_head d_alias; /* inode alias list */ + struct hlist_node d_alias; /* inode alias list */ }; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 17fd887c798f..f06db6bd5a74 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -826,7 +826,7 @@ struct inode { struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; union { - struct list_head i_dentry; + struct hlist_head i_dentry; struct rcu_head i_rcu; }; u64 i_version; -- cgit v1.2.3 From 1d674107ea4b68669e012e654d64369b7f2bb250 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 19:52:19 -0400 Subject: coda: use list_for_each_entry Signed-off-by: Al Viro --- fs/coda/cache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 690157876184..958ae0e0ff8c 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -89,17 +89,13 @@ int coda_cache_check(struct inode *inode, int mask) /* this won't do any harm: just flag all children */ static void coda_flag_children(struct dentry *parent, int flag) { - struct list_head *child; struct dentry *de; spin_lock(&parent->d_lock); - list_for_each(child, &parent->d_subdirs) - { - de = list_entry(child, struct dentry, d_u.d_child); + list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) { /* don't know what to do with negative dentries */ - if ( ! de->d_inode ) - continue; - coda_flag_inode(de->d_inode, flag); + if (de->d_inode ) + coda_flag_inode(de->d_inode, flag); } spin_unlock(&parent->d_lock); return; -- cgit v1.2.3 From 6d7b5aaed7d887b34f29f900244cdbd17a86637c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 04:15:17 -0400 Subject: namei.c: let follow_link() do put_link() on failure no need for kludgy "set cookie to ERR_PTR(...) because we failed before we did actual ->follow_link() and want to suppress put_link()", no pointless check in put_link() itself. Callers checked if follow_link() has failed anyway; might as well break out of their loops if that happened, without bothering to call put_link() first. [AV: folded fixes from hch] Signed-off-by: Al Viro --- fs/namei.c | 74 ++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 7d694194024a..6135a14d5a84 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -605,7 +605,7 @@ static inline void path_to_nameidata(const struct path *path, static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; - if (!IS_ERR(cookie) && inode->i_op->put_link) + if (inode->i_op->put_link) inode->i_op->put_link(link->dentry, nd, cookie); path_put(link); } @@ -613,19 +613,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { - int error; struct dentry *dentry = link->dentry; + int error; + char *s; BUG_ON(nd->flags & LOOKUP_RCU); if (link->mnt == nd->path.mnt) mntget(link->mnt); - if (unlikely(current->total_link_count >= 40)) { - *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ - path_put(&nd->path); - return -ELOOP; - } + error = -ELOOP; + if (unlikely(current->total_link_count >= 40)) + goto out_put_nd_path; + cond_resched(); current->total_link_count++; @@ -633,30 +633,37 @@ follow_link(struct path *link, struct nameidata *nd, void **p) nd_set_link(nd, NULL); error = security_inode_follow_link(link->dentry, nd); - if (error) { - *p = ERR_PTR(error); /* no ->put_link(), please */ - path_put(&nd->path); - return error; - } + if (error) + goto out_put_nd_path; nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); - if (!IS_ERR(*p)) { - char *s = nd_get_link(nd); - error = 0; - if (s) - error = __vfs_follow_link(nd, s); - else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (IS_ERR(*p)) + goto out_put_link; + + error = 0; + s = nd_get_link(nd); + if (s) { + error = __vfs_follow_link(nd, s); + } else if (nd->last_type == LAST_BIND) { + nd->flags |= LOOKUP_JUMPED; + nd->inode = nd->path.dentry->d_inode; + if (nd->inode->i_op->follow_link) { + /* stepped on a _really_ weird one */ + path_put(&nd->path); + error = -ELOOP; } } + if (unlikely(error)) + put_link(nd, link, *p); + + return error; + +out_put_nd_path: + path_put(&nd->path); +out_put_link: + path_put(link); return error; } @@ -1383,9 +1390,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) void *cookie; res = follow_link(&link, nd, &cookie); - if (!res) - res = walk_component(nd, path, &nd->last, - nd->last_type, LOOKUP_FOLLOW); + if (res) + break; + res = walk_component(nd, path, &nd->last, + nd->last_type, LOOKUP_FOLLOW); put_link(nd, &link, cookie); } while (res > 0); @@ -1777,8 +1785,9 @@ static int path_lookupat(int dfd, const char *name, struct path link = path; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); - if (!err) - err = lookup_last(nd, &path); + if (err) + break; + err = lookup_last(nd, &path); put_link(nd, &link, cookie); } } @@ -2475,9 +2484,8 @@ static struct file *path_openat(int dfd, const char *pathname, nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - filp = ERR_PTR(error); - else - filp = do_last(nd, &path, op, pathname); + goto out_filp; + filp = do_last(nd, &path, op, pathname); put_link(nd, &link, cookie); } out: -- cgit v1.2.3 From 37d7fffc9cafe75ded8a840fa30ba625f99ed7ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:12 +0200 Subject: vfs: do_last(): inline lookup_slow() Copy lookup_slow() into do_last(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 6135a14d5a84..68742e3cb98d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2254,9 +2254,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error < 0) goto exit; - error = lookup_slow(nd, &nd->last, path); - if (error < 0) + BUG_ON(nd->inode != dir->d_inode); + + mutex_lock(&dir->d_inode->i_mutex); + dentry = __lookup_hash(&nd->last, dir, nd); + mutex_unlock(&dir->d_inode->i_mutex); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) goto exit; + path->mnt = nd->path.mnt; + path->dentry = dentry; + error = follow_managed(path, nd->flags); + if (unlikely(error < 0)) + goto exit_dput; + + if (error) + nd->flags |= LOOKUP_JUMPED; inode = path->dentry->d_inode; } -- cgit v1.2.3 From b6183df7b294997a748eeb9991daa126986ead12 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:13 +0200 Subject: vfs: do_last(): separate O_CREAT specific code Check O_CREAT on the slow lookup paths where necessary. This allows the rest to be shared with plain open. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 68742e3cb98d..12ed29712b4e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2274,22 +2274,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } goto finish_lookup; - } - - /* create side of things */ - /* - * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been - * cleared when we got to the last component we are about to look up - */ - error = complete_walk(nd); - if (error) - return ERR_PTR(error); + } else { + /* create side of things */ + /* + * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED + * has been cleared when we got to the last component we are + * about to look up + */ + error = complete_walk(nd); + if (error) + return ERR_PTR(error); - audit_inode(pathname, dir); - error = -EISDIR; - /* trailing slashes? */ - if (nd->last.name[nd->last.len]) - goto exit; + audit_inode(pathname, dir); + error = -EISDIR; + /* trailing slashes? */ + if (nd->last.name[nd->last.len]) + goto exit; + } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); @@ -2305,7 +2306,7 @@ retry_lookup: path->mnt = nd->path.mnt; /* Negative dentry, just create the file */ - if (!dentry->d_inode) { + if (!dentry->d_inode && (open_flag & O_CREAT)) { umode_t mode = op->mode; if (!IS_POSIXACL(dir->d_inode)) mode &= ~current_umask(); -- cgit v1.2.3 From 7157486541bffc0dfec912e21ae639b029dae3d3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:14 +0200 Subject: vfs: do_last(): common slow lookup Make the slow lookup part of O_CREAT and non-O_CREAT opens common. This allows atomic_open to be hooked into the slow lookup part. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 12ed29712b4e..285e62e925f7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2250,30 +2250,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, symlink_ok = 1; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); - if (unlikely(error)) { - if (error < 0) - goto exit; - - BUG_ON(nd->inode != dir->d_inode); - - mutex_lock(&dir->d_inode->i_mutex); - dentry = __lookup_hash(&nd->last, dir, nd); - mutex_unlock(&dir->d_inode->i_mutex); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto exit; - path->mnt = nd->path.mnt; - path->dentry = dentry; - error = follow_managed(path, nd->flags); - if (unlikely(error < 0)) - goto exit_dput; + if (likely(!error)) + goto finish_lookup; - if (error) - nd->flags |= LOOKUP_JUMPED; + if (error < 0) + goto exit; - inode = path->dentry->d_inode; - } - goto finish_lookup; + BUG_ON(nd->inode != dir->d_inode); } else { /* create side of things */ /* -- cgit v1.2.3 From d58ffd35c1e595df2cf8ac4803f178c8be95ca7a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:15 +0200 Subject: vfs: add lookup_open() Split out lookup + maybe create from do_last(). This is the part under i_mutex protection. The function is called lookup_open() and returns a filp even though the open part is not used yet. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 99 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 285e62e925f7..fad7117dbb28 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,6 +2196,60 @@ static inline int open_to_namei_flags(int flag) return flag; } +/* + * Lookup, maybe create and open the last component + * + * Must be called with i_mutex held on parent. + * + * Returns open file or NULL on success, error otherwise. NULL means no open + * was performed, only lookup. + */ +static struct file *lookup_open(struct nameidata *nd, struct path *path, + const struct open_flags *op, + int *want_write, bool *created) +{ + struct dentry *dir = nd->path.dentry; + struct dentry *dentry; + int error; + + *created = false; + dentry = lookup_hash(nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + /* Negative dentry, just create the file */ + if (!dentry->d_inode && (op->open_flag & O_CREAT)) { + umode_t mode = op->mode; + if (!IS_POSIXACL(dir->d_inode)) + mode &= ~current_umask(); + /* + * This write is needed to ensure that a + * rw->ro transition does not occur between + * the time when the file is created and when + * a permanent write count is taken through + * the 'struct file' in nameidata_to_filp(). + */ + error = mnt_want_write(nd->path.mnt); + if (error) + goto out_dput; + *want_write = 1; + *created = true; + error = security_path_mknod(&nd->path, dentry, mode, 0); + if (error) + goto out_dput; + error = vfs_create(dir->d_inode, dentry, mode, nd); + if (error) + goto out_dput; + } + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; + +out_dput: + dput(dentry); + return ERR_PTR(error); +} + /* * Handle the last step of open() */ @@ -2203,13 +2257,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, const struct open_flags *op, const char *pathname) { struct dentry *dir = nd->path.dentry; - struct dentry *dentry; int open_flag = op->open_flag; int will_truncate = open_flag & O_TRUNC; int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; + bool created; int symlink_ok = 0; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2277,53 +2331,24 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); + filp = lookup_open(nd, path, op, &want_write, &created); + mutex_unlock(&dir->d_inode->i_mutex); - dentry = lookup_hash(nd); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - mutex_unlock(&dir->d_inode->i_mutex); - goto exit; - } - - path->dentry = dentry; - path->mnt = nd->path.mnt; + if (IS_ERR(filp)) + goto out; - /* Negative dentry, just create the file */ - if (!dentry->d_inode && (open_flag & O_CREAT)) { - umode_t mode = op->mode; - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current_umask(); - /* - * This write is needed to ensure that a - * rw->ro transition does not occur between - * the time when the file is created and when - * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). - */ - error = mnt_want_write(nd->path.mnt); - if (error) - goto exit_mutex_unlock; - want_write = 1; + if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = 0; acc_mode = MAY_OPEN; - error = security_path_mknod(&nd->path, dentry, mode, 0); - if (error) - goto exit_mutex_unlock; - error = vfs_create(dir->d_inode, dentry, mode, nd); - if (error) - goto exit_mutex_unlock; - mutex_unlock(&dir->d_inode->i_mutex); - dput(nd->path.dentry); - nd->path.dentry = dentry; + path_to_nameidata(path, nd); goto common; } /* * It already exists. */ - mutex_unlock(&dir->d_inode->i_mutex); audit_inode(pathname, path->dentry); error = -EEXIST; @@ -2432,8 +2457,6 @@ out: terminate_walk(nd); return filp; -exit_mutex_unlock: - mutex_unlock(&dir->d_inode->i_mutex); exit_dput: path_put_conditional(path, nd); exit: -- cgit v1.2.3 From 54ef487241e863a6046536ac5b1fcd5d7cde86e5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:16 +0200 Subject: vfs: lookup_open(): expand lookup_hash() Copy __lookup_hash() into lookup_open(). The next patch will insert the atomic open call just before the real lookup. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index fad7117dbb28..ccb0eb17f528 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2209,14 +2209,24 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int *want_write, bool *created) { struct dentry *dir = nd->path.dentry; + struct inode *dir_inode = dir->d_inode; struct dentry *dentry; int error; + bool need_lookup; *created = false; - dentry = lookup_hash(nd); + dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); + if (need_lookup) { + BUG_ON(dentry->d_inode); + + dentry = lookup_real(dir_inode, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + } + /* Negative dentry, just create the file */ if (!dentry->d_inode && (op->open_flag & O_CREAT)) { umode_t mode = op->mode; -- cgit v1.2.3 From d18e9008c377dc6a6d2166a6840bf3a23a5867fd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:17 +0200 Subject: vfs: add i_op->atomic_open() Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 4 + Documentation/filesystems/vfs.txt | 11 +++ fs/internal.h | 5 + fs/namei.c | 203 +++++++++++++++++++++++++++++++++++++- fs/open.c | 42 ++++++++ include/linux/fs.h | 7 ++ 6 files changed, 270 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8e2da1e06e3b..8157488c3463 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,9 @@ ata *); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); locking rules: all may block @@ -89,6 +92,7 @@ listxattr: no removexattr: yes fiemap: no update_time: no +atomic_open: yes Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index efd23f481704..beb6e691f70a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); }; Again, all methods are called without any locks being held, unless @@ -476,6 +479,14 @@ otherwise noted. an inode. If this is not defined the VFS will update the inode itself and call mark_inode_dirty_sync. + atomic_open: called on the last component of an open. Using this optional + method the filesystem can look up, possibly create and open the file in + one atomic operation. If it cannot perform this (e.g. the file type + turned out to be wrong) it may signal this by returning NULL instead of + an open struct file pointer. This method is only called if the last + component is negative or needs lookup. Cached positive dentries are + still handled by f_op->open(). + The Address Space Object ======================== diff --git a/fs/internal.h b/fs/internal.h index d2a23ff61b40..70067775df2e 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t); struct nameidata; extern struct file *nameidata_to_filp(struct nameidata *); extern void release_open_intent(struct nameidata *); +struct opendata { + struct dentry *dentry; + struct vfsmount *mnt; + struct file **filp; +}; struct open_flags { int open_flag; umode_t mode; diff --git a/fs/namei.c b/fs/namei.c index ccb0eb17f528..9e11ae83bff6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag) return flag; } +static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) +{ + int error = security_path_mknod(dir, dentry, mode, 0); + if (error) + return error; + + error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + if (error) + return error; + + return security_inode_create(dir->dentry->d_inode, dentry, mode); +} + +static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, const struct open_flags *op, + int *want_write, bool need_lookup, + bool *created) +{ + struct inode *dir = nd->path.dentry->d_inode; + unsigned open_flag = open_to_namei_flags(op->open_flag); + umode_t mode; + int error; + int acc_mode; + struct opendata od; + struct file *filp; + int create_error = 0; + struct dentry *const DENTRY_NOT_SET = (void *) -1UL; + + BUG_ON(dentry->d_inode); + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(dir))) { + filp = ERR_PTR(-ENOENT); + goto out; + } + + mode = op->mode & S_IALLUGO; + if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) + mode &= ~current_umask(); + + if (open_flag & O_EXCL) { + open_flag &= ~O_TRUNC; + *created = true; + } + + /* + * Checking write permission is tricky, bacuse we don't know if we are + * going to actually need it: O_CREAT opens should work as long as the + * file exists. But checking existence breaks atomicity. The trick is + * to check access and if not granted clear O_CREAT from the flags. + * + * Another problem is returing the "right" error value (e.g. for an + * O_EXCL open we want to return EEXIST not EROFS). + */ + if ((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) { + error = mnt_want_write(nd->path.mnt); + if (!error) { + *want_write = 1; + } else if (!(open_flag & O_CREAT)) { + /* + * No O_CREATE -> atomicity not a requirement -> fall + * back to lookup + open + */ + goto no_open; + } else if (open_flag & (O_EXCL | O_TRUNC)) { + /* Fall back and fail with the right error */ + create_error = error; + goto no_open; + } else { + /* No side effects, safe to clear O_CREAT */ + create_error = error; + open_flag &= ~O_CREAT; + } + } + + if (open_flag & O_CREAT) { + error = may_o_create(&nd->path, dentry, op->mode); + if (error) { + create_error = error; + if (open_flag & O_EXCL) + goto no_open; + open_flag &= ~O_CREAT; + } + } + + if (nd->flags & LOOKUP_DIRECTORY) + open_flag |= O_DIRECTORY; + + od.dentry = DENTRY_NOT_SET; + od.mnt = nd->path.mnt; + od.filp = &nd->intent.open.file; + filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + created); + if (IS_ERR(filp)) { + if (WARN_ON(od.dentry != DENTRY_NOT_SET)) + dput(od.dentry); + + if (create_error && PTR_ERR(filp) == -ENOENT) + filp = ERR_PTR(create_error); + goto out; + } + + acc_mode = op->acc_mode; + if (*created) { + fsnotify_create(dir, dentry); + acc_mode = MAY_OPEN; + } + + if (!filp) { + if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + filp = ERR_PTR(-EIO); + goto out; + } + if (od.dentry) { + dput(dentry); + dentry = od.dentry; + } + goto looked_up; + } + + /* + * We didn't have the inode before the open, so check open permission + * here. + */ + error = may_open(&filp->f_path, acc_mode, open_flag); + if (error) + goto out_fput; + + error = open_check_o_direct(filp); + if (error) + goto out_fput; + +out: + dput(dentry); + return filp; + +out_fput: + fput(filp); + filp = ERR_PTR(error); + goto out; + +no_open: + if (need_lookup) { + dentry = lookup_real(dir, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (create_error) { + int open_flag = op->open_flag; + + filp = ERR_PTR(create_error); + if ((open_flag & O_EXCL)) { + if (!dentry->d_inode) + goto out; + } else if (!dentry->d_inode) { + goto out; + } else if ((open_flag & O_TRUNC) && + S_ISREG(dentry->d_inode->i_mode)) { + goto out; + } + /* will fail later, go on to get the right error */ + } + } +looked_up: + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; +} + /* * Lookup, maybe create and open the last component * @@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (IS_ERR(dentry)) return ERR_CAST(dentry); + /* Cached positive dentry: will open in f_op->open */ + if (!need_lookup && dentry->d_inode) + goto out_no_open; + + if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { + return atomic_open(nd, dentry, path, op, want_write, + need_lookup, created); + } + if (need_lookup) { BUG_ON(dentry->d_inode); @@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; } +out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; return NULL; @@ -2344,8 +2524,16 @@ retry_lookup: filp = lookup_open(nd, path, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); - if (IS_ERR(filp)) - goto out; + if (filp) { + if (IS_ERR(filp)) + goto out; + + if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + will_truncate = 0; + + audit_inode(pathname, filp->f_path.dentry); + goto opened; + } if (created) { /* Don't check for write permission, don't truncate */ @@ -2361,6 +2549,16 @@ retry_lookup: */ audit_inode(pathname, path->dentry); + /* + * If atomic_open() acquired write access it is dropped now due to + * possible mount and symlink following (this might be optimized away if + * necessary...) + */ + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + error = -EEXIST; if (open_flag & O_EXCL) goto exit_dput; @@ -2444,6 +2642,7 @@ common: retried = true; goto retry_lookup; } +opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { diff --git a/fs/open.c b/fs/open.c index 1540632d8387..13bece4f36a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -810,6 +810,48 @@ out_err: } EXPORT_SYMBOL_GPL(lookup_instantiate_filp); +/** + * finish_open - finish opening a file + * @od: opaque open data + * @dentry: pointer to dentry + * @open: open callback + * + * This can be used to finish opening a file passed to i_op->atomic_open(). + * + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *finish_open(struct opendata *od, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + struct file *res; + + mntget(od->mnt); + dget(dentry); + + res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + if (!IS_ERR(res)) + *od->filp = NULL; + + return res; +} +EXPORT_SYMBOL(finish_open); + +/** + * finish_no_open - finish ->atomic_open() without opening the file + * + * @od: opaque open data + * @dentry: dentry or NULL (as returned from ->lookup()) + * + * This can be used to set the result of a successful lookup in ->atomic_open(). + * The filesystem's atomic_open() method shall return NULL after calling this. + */ +void finish_no_open(struct opendata *od, struct dentry *dentry) +{ + od->dentry = dentry; +} +EXPORT_SYMBOL(finish_no_open); + /** * nameidata_to_filp - convert a nameidata to an open filp. * @nd: pointer to nameidata diff --git a/include/linux/fs.h b/include/linux/fs.h index f06db6bd5a74..0314635cf833 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -427,6 +427,7 @@ struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; +struct opendata; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1693,6 +1694,9 @@ struct inode_operations { int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); } ____cacheline_aligned; struct seq_file; @@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); +extern struct file *finish_open(struct opendata *od, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); +extern void finish_no_open(struct opendata *od, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v1.2.3 From 0dd2b474d0b69d58859399b1df7fdc699ea005d4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:18 +0200 Subject: nfs: implement i_op->atomic_open() Replace NFS4 specific ->lookup implementation with ->atomic_open impelementation and use the generic nfs_lookup for other lookups. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 183 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 97 insertions(+), 86 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057ff3b3..0d8c71271d1a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,11 +111,15 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd); +static struct file *nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + bool *); +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_open_create, - .lookup = nfs_atomic_lookup, + .create = nfs4_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, @@ -1403,120 +1407,132 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +static struct file *nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags) { struct file *filp; - int ret = 0; + int err; + + if (ctx->dentry != dentry) { + dput(ctx->dentry); + ctx->dentry = dget(dentry); + } /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { - ret = nfs_may_open(ctx->dentry->d_inode, - ctx->cred, - nd->intent.open.flags); - if (ret < 0) + err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); + if (err < 0) { + filp = ERR_PTR(err); goto out; + } } - filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); - if (IS_ERR(filp)) - ret = PTR_ERR(filp); - else + + filp = finish_open(od, dentry, do_open); + if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); + out: put_nfs_open_context(ctx); - return ret; + return filp; } -static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, bool *created) { struct nfs_open_context *ctx; - struct iattr attr; - struct dentry *res = NULL; + struct dentry *res; + struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - int open_flags; + struct file *filp; int err; - dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + /* Expect a negative dentry */ + BUG_ON(dentry->d_inode); + + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - /* Check that we are indeed trying to open this file */ - if (!is_atomic_open(nd)) + /* NFS only supports OPEN on regular files */ + if ((open_flags & O_DIRECTORY)) { + err = -ENOENT; + if (!d_unhashed(dentry)) { + /* + * Hashed negative dentry with O_DIRECTORY: dentry was + * revalidated and is fine, no need to perform lookup + * again + */ + goto out_err; + } goto no_open; - - if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { - res = ERR_PTR(-ENAMETOOLONG); - goto out; } - /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash - * the dentry. */ - if (nd->flags & LOOKUP_EXCL) { - d_instantiate(dentry, NULL); - goto out; - } - - open_flags = nd->intent.open.flags; - attr.ia_valid = ATTR_OPEN; - - ctx = create_nfs_open_context(dentry, open_flags); - res = ERR_CAST(ctx); - if (IS_ERR(ctx)) - goto out; + err = -ENAMETOOLONG; + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + goto out_err; - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; + if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; - attr.ia_mode &= ~current_umask(); - } else - open_flags &= ~(O_EXCL | O_CREAT); - + attr.ia_mode = mode & ~current_umask(); + } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; } - /* Open the file on the server */ + ctx = create_nfs_open_context(dentry, open_flags); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err; + nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + d_drop(dentry); if (IS_ERR(inode)) { nfs_unblock_sillyrename(dentry->d_parent); put_nfs_open_context(ctx); - switch (PTR_ERR(inode)) { - /* Make a negative dentry */ - case -ENOENT: - d_add(dentry, NULL); - res = NULL; - goto out; - /* This turned out not to be a regular file */ - case -EISDIR: - case -ENOTDIR: + err = PTR_ERR(inode); + switch (err) { + case -ENOENT: + d_add(dentry, NULL); + break; + case -EISDIR: + case -ENOTDIR: + goto no_open; + case -ELOOP: + if (!(open_flags & O_NOFOLLOW)) goto no_open; - case -ELOOP: - if (!(nd->intent.open.flags & O_NOFOLLOW)) - goto no_open; + break; /* case -EINVAL: */ - default: - res = ERR_CAST(inode); - goto out; + default: + break; } + goto out_err; } res = d_add_unique(dentry, inode); - nfs_unblock_sillyrename(dentry->d_parent); - if (res != NULL) { - dput(ctx->dentry); - ctx->dentry = dget(res); + if (res != NULL) dentry = res; - } - err = nfs_intent_set_file(nd, ctx); - if (err < 0) { - if (res != NULL) - dput(res); - return ERR_PTR(err); - } -out: + + nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - return res; + + filp = nfs_finish_open(ctx, dentry, od, open_flags); + + dput(res); + return filp; + +out_err: + return ERR_PTR(err); + no_open: - return nfs_lookup(dir, dentry, nd); + res = nfs_lookup(dir, dentry, NULL); + err = PTR_ERR(res); + if (IS_ERR(res)) + goto out_err; + + finish_no_open(od, res); + return NULL; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1566,8 +1582,8 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs_open_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd) { struct nfs_open_context *ctx = NULL; struct iattr attr; @@ -1591,19 +1607,14 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); if (error != 0) goto out_put_ctx; - if (nd) { - error = nfs_intent_set_file(nd, ctx); - if (error < 0) - goto out_err; - } else { - put_nfs_open_context(ctx); - } + + put_nfs_open_context(ctx); + return 0; out_put_ctx: put_nfs_open_context(ctx); out_err_drop: d_drop(dentry); -out_err: return error; } -- cgit v1.2.3 From 8867fe5899010a0c0ac36dadfdacf1072b1c990c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:19 +0200 Subject: nfs: clean up ->create in nfs_rpc_ops Don't pass nfs_open_context() to ->create(). Only the NFS4 implementation needed that and only because it wanted to return an open file using open intents. That task has been replaced by ->atomic_open so it is not necessary anymore to pass the context to the create rpc operation. Despite nfs4_proc_create apparently being okay with a NULL context it Oopses somewhere down the call chain. So allocate a context here. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 42 ++---------------------------------------- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 37 ++++++++++--------------------------- fs/nfs/proc.c | 2 +- include/linux/nfs_xdr.h | 2 +- 5 files changed, 15 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0d8c71271d1a..45015d32a865 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -114,10 +114,8 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, bool *); -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs4_create, + .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, .link = nfs_link, @@ -1582,42 +1580,6 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) -{ - struct nfs_open_context *ctx = NULL; - struct iattr attr; - int error; - int open_flags = O_CREAT|O_EXCL; - - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - - attr.ia_mode = mode; - attr.ia_valid = ATTR_MODE; - - if (nd) - open_flags = nd->intent.open.flags; - - ctx = create_nfs_open_context(dentry, open_flags); - error = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out_err_drop; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); - if (error != 0) - goto out_put_ctx; - - put_nfs_open_context(ctx); - - return 0; -out_put_ctx: - put_nfs_open_context(ctx); -out_err_drop: - d_drop(dentry); - return error; -} - #endif /* CONFIG_NFSV4 */ /* @@ -1684,7 +1646,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, if (nd) open_flags = nd->intent.open.flags; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; return 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0fd2bff..3187e24e8f78 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs3_createdata *data; umode_t mode = sattr->ia_mode; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4664ed..c157b2089b47 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, } /* - * Got race? - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this create/open method is prone to inefficiency and race - * conditions due to the lookup, create, and open VFS calls from sys_open() - * placed on the wire. - * - * Given the above sorry state of affairs, I'm simply sending an OPEN. - * The file will be opened again in the subsequent VFS open call - * (nfs4_proc_file_open). - * - * The open for read will just hang around to be used by any process that - * opens the file O_RDONLY. This will all be resolved with the VFS changes. + * This is just for mknod. open(O_CREAT) will always do ->open_context(). */ - static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { - struct dentry *de = dentry; + struct nfs_open_context *ctx; struct nfs4_state *state; - struct rpc_cred *cred = NULL; - fmode_t fmode = 0; int status = 0; - if (ctx != NULL) { - cred = ctx->cred; - de = ctx->dentry; - fmode = ctx->mode; - } + ctx = alloc_nfs_open_context(dentry, FMODE_READ); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (ctx != NULL) - ctx->state = state; - else - nfs4_close_sync(state, fmode); + ctx->state = state; out: + put_nfs_open_context(ctx); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c7419a08e..4433806e116f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs_createdata *data; struct rpc_message msg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8aadd90b808a..d3b7c18b18f4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1374,7 +1374,7 @@ struct nfs_rpc_ops { int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, - struct iattr *, int, struct nfs_open_context *); + struct iattr *, int); int (*remove) (struct inode *, struct qstr *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); -- cgit v1.2.3 From 50de348c3604f7684a89ce64180666d4dd74623f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:20 +0200 Subject: nfs: don't use nd->intent.open.flags Instead check LOOKUP_EXCL in nd->flags, which is basically what the open intent flags were used for. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 45015d32a865..0432f474771b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1538,7 +1538,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - int openflags, ret = 0; + int ret = 0; if (nd->flags & LOOKUP_RCU) return -ECHILD; @@ -1562,9 +1562,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open_dput; - openflags = nd->intent.open.flags; /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + if (nd && nd->flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ @@ -1643,8 +1642,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd) - open_flags = nd->intent.open.flags; + if (nd && !(nd->flags & LOOKUP_EXCL)) + open_flags = O_CREAT; error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) -- cgit v1.2.3 From eda72afb9ef9f45941fb09260c0f268ff81ec40d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:21 +0200 Subject: nfs: don't use intents for checking atomic open is_atomic_open() is now only used by nfs4_lookup_revalidate() to check whether it's okay to skip normal revalidation. It does a racy check for mount read-onlyness and falls back to normal revalidation if the open would fail. This makes little sense now that this function isn't used for determining whether to actually open the file or not. The d_mountpoint() check still makes sense since it is an indication that we might be following a mount and so open may not revalidate the dentry. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0432f474771b..e6d55dc93ffd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1366,24 +1366,6 @@ const struct dentry_operations nfs4_dentry_operations = { .d_release = nfs_d_release, }; -/* - * Use intent information to determine whether we need to substitute - * the NFSv4-style stateful OPEN for the LOOKUP call - */ -static int is_atomic_open(struct nameidata *nd) -{ - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) - return 0; - /* NFS does not (yet) have a stateful open for directories */ - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ - if (__mnt_is_readonly(nd->path.mnt) && - (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) - return 0; - return 1; -} - static fmode_t flags_to_mode(int flags) { fmode_t res = (__force fmode_t)flags & FMODE_EXEC; @@ -1543,10 +1525,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (nd->flags & LOOKUP_RCU) return -ECHILD; - inode = dentry->d_inode; - if (!is_atomic_open(nd) || d_mountpoint(dentry)) + if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + goto no_open; + if (d_mountpoint(dentry)) goto no_open; + inode = dentry->d_inode; parent = dget_parent(dentry); dir = parent->d_inode; -- cgit v1.2.3 From c8ccbe032feb127a977c66865cb63d72d9a6e08b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:22 +0200 Subject: fuse: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic open+create operation implemented via ->create. No functionality is changed. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/fuse/dir.c | 94 ++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f7543f72897e..e42442f1da16 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -369,8 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static int fuse_create_open(struct inode *dir, struct dentry *entry, - umode_t mode, struct nameidata *nd) +static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode) { int err; struct inode *inode; @@ -382,14 +383,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_file *ff; struct file *file; - int flags = nd->intent.open.flags; - - if (fc->no_create) - return -ENOSYS; forget = fuse_alloc_forget(); + err = -ENOMEM; if (!forget) - return -ENOMEM; + goto out_err; req = fuse_get_req(fc); err = PTR_ERR(req); @@ -428,11 +426,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, req->out.args[1].value = &outopen; fuse_request_send(fc, req); err = req->out.h.error; - if (err) { - if (err == -ENOSYS) - fc->no_create = 1; + if (err) goto out_free_ff; - } err = -EIO; if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) @@ -448,28 +443,78 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(ff, flags); fuse_queue_forget(fc, forget, outentry.nodeid, 1); - return -ENOMEM; + err = -ENOMEM; + goto out_err; } kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = lookup_instantiate_filp(nd, entry, generic_file_open); + file = finish_open(od, entry, generic_file_open); if (IS_ERR(file)) { fuse_sync_release(ff, flags); - return PTR_ERR(file); + } else { + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); } - file->private_data = fuse_file_get(ff); - fuse_finish_open(inode, file); - return 0; + return file; - out_free_ff: +out_free_ff: fuse_file_free(ff); - out_put_request: +out_put_request: fuse_put_request(fc, req); - out_put_forget_req: +out_put_forget_req: kfree(forget); - return err; +out_err: + return ERR_PTR(err); +} + +static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); +static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, bool *created) +{ + int err; + struct fuse_conn *fc = get_fuse_conn(dir); + struct file *file; + struct dentry *res = NULL; + + if (d_unhashed(entry)) { + res = fuse_lookup(dir, entry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + entry = res; + } + + if (!(flags & O_CREAT) || entry->d_inode) + goto no_open; + + /* Only creates */ + *created = true; + + if (fc->no_create) + goto mknod; + + file = fuse_create_open(dir, entry, od, flags, mode); + if (PTR_ERR(file) == -ENOSYS) { + fc->no_create = 1; + goto mknod; + } +out_dput: + dput(res); + return file; + +mknod: + err = fuse_mknod(dir, entry, mode, 0); + if (err) { + file = ERR_PTR(err); + goto out_dput; + } +no_open: + finish_no_open(od, res); + return NULL; } /* @@ -573,12 +618,6 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, struct nameidata *nd) { - if (nd) { - int err = fuse_create_open(dir, entry, mode, nd); - if (err != -ENOSYS) - return err; - /* Fall back on mknod */ - } return fuse_mknod(dir, entry, mode, 0); } @@ -1646,6 +1685,7 @@ static const struct inode_operations fuse_dir_inode_operations = { .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, + .atomic_open = fuse_atomic_open, .mknod = fuse_mknod, .permission = fuse_permission, .getattr = fuse_getattr, -- cgit v1.2.3 From d2c127197dfc0b2bae62a52e1e0d3e3ff493919e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:23 +0200 Subject: cifs: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic lookup+open+create operation implemented via ->lookup and ->create operations. Signed-off-by: Miklos Szeredi CC: Steve French Signed-off-by: Al Viro --- fs/cifs/cifsfs.c | 1 + fs/cifs/cifsfs.h | 3 + fs/cifs/dir.c | 441 ++++++++++++++++++++++++++++++------------------------- 3 files changed, 247 insertions(+), 198 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index bcab12c87146..c0c2751a7573 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -777,6 +777,7 @@ struct file_system_type cifs_fs_type = { }; const struct inode_operations cifs_dir_inode_ops = { .create = cifs_create, + .atomic_open = cifs_atomic_open, .lookup = cifs_lookup, .getattr = cifs_getattr, .unlink = cifs_unlink, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 65365358c976..3a572bf5947f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -46,6 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +extern struct file *cifs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + bool *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ec4e9a2a12f8..7a3dcd15d681 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -133,108 +133,141 @@ cifs_bp_rename_retry: return full_path; } +/* + * Don't allow the separator character in a path component. + * The VFS will not allow "/", but "\" is allowed by posix. + */ +static int +check_name(struct dentry *direntry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + int i; + + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + for (i = 0; i < direntry->d_name.len; i++) { + if (direntry->d_name.name[i] == '\\') { + cFYI(1, "Invalid file name"); + return -EINVAL; + } + } + } + return 0; +} + + /* Inode operations in similar order to how they appear in Linux file fs.h */ -int -cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, - struct nameidata *nd) +static int cifs_do_create(struct inode *inode, struct dentry *direntry, + int xid, struct tcon_link *tlink, unsigned oflags, + umode_t mode, __u32 *oplock, __u16 *fileHandle, + bool *created) { int rc = -ENOENT; - int xid; int create_options = CREATE_NOT_DIR; - __u32 oplock = 0; - int oflags; - /* - * BB below access is probably too much for mknod to request - * but we have to do query and setpathinfo so requesting - * less could fail (unless we want to request getatr and setatr - * permissions (only). At least for POSIX we do not have to - * request so much. - */ - int desiredAccess = GENERIC_READ | GENERIC_WRITE; - __u16 fileHandle; - struct cifs_sb_info *cifs_sb; - struct tcon_link *tlink; - struct cifs_tcon *tcon; + int desiredAccess; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_tcon *tcon = tlink_tcon(tlink); char *full_path = NULL; FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; - int disposition = FILE_OVERWRITE_IF; - - xid = GetXid(); - - cifs_sb = CIFS_SB(inode->i_sb); - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - FreeXid(xid); - return PTR_ERR(tlink); - } - tcon = tlink_tcon(tlink); + int disposition; + *oplock = 0; if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - - if (nd) - oflags = nd->intent.open.file->f_flags; - else - oflags = O_RDONLY | O_CREAT; + *oplock = REQ_OPLOCK; full_path = build_path_from_dentry(direntry); if (full_path == NULL) { rc = -ENOMEM; - goto cifs_create_out; + goto out; } if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && + !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_open(full_path, &newinode, - inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); - /* EIO could indicate that (posix open) operation is not - supported, despite what server claimed in capability - negotiation. EREMOTE indicates DFS junction, which is not - handled in posix open */ - - if (rc == 0) { - if (newinode == NULL) /* query inode info */ + inode->i_sb, mode, oflags, oplock, fileHandle, xid); + switch (rc) { + case 0: + if (newinode == NULL) { + /* query inode info */ goto cifs_create_get_file_info; - else /* success, no need to query */ - goto cifs_create_set_dentry; - } else if ((rc != -EIO) && (rc != -EREMOTE) && - (rc != -EOPNOTSUPP) && (rc != -EINVAL)) - goto cifs_create_out; - /* else fallthrough to retry, using older open call, this is - case where server does not support this SMB level, and - falsely claims capability (also get here for DFS case - which should be rare for path not covered on files) */ - } + } - if (nd) { - /* if the file is going to stay open, then we - need to set the desired access properly */ - desiredAccess = 0; - if (OPEN_FMODE(oflags) & FMODE_READ) - desiredAccess |= GENERIC_READ; /* is this too little? */ - if (OPEN_FMODE(oflags) & FMODE_WRITE) - desiredAccess |= GENERIC_WRITE; - - if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - disposition = FILE_CREATE; - else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) - disposition = FILE_OVERWRITE_IF; - else if ((oflags & O_CREAT) == O_CREAT) - disposition = FILE_OPEN_IF; - else - cFYI(1, "Create flag not set in create function"); + if (!S_ISREG(newinode->i_mode)) { + /* + * The server may allow us to open things like + * FIFOs, but the client isn't set up to deal + * with that. If it's not a regular file, just + * close it and proceed as if it were a normal + * lookup. + */ + CIFSSMBClose(xid, tcon, *fileHandle); + goto cifs_create_get_file_info; + } + /* success, no need to query */ + goto cifs_create_set_dentry; + + case -ENOENT: + goto cifs_create_get_file_info; + + case -EIO: + case -EINVAL: + /* + * EIO could indicate that (posix open) operation is not + * supported, despite what server claimed in capability + * negotiation. + * + * POSIX open in samba versions 3.3.1 and earlier could + * incorrectly fail with invalid parameter. + */ + tcon->broken_posix_open = true; + break; + + case -EREMOTE: + case -EOPNOTSUPP: + /* + * EREMOTE indicates DFS junction, which is not handled + * in posix open. If either that or op not supported + * returned, follow the normal lookup. + */ + break; + + default: + goto out; + } + /* + * fallthrough to retry, using older open call, this is case + * where server does not support this SMB level, and falsely + * claims capability (also get here for DFS case which should be + * rare for path not covered on files) + */ } + desiredAccess = 0; + if (OPEN_FMODE(oflags) & FMODE_READ) + desiredAccess |= GENERIC_READ; /* is this too little? */ + if (OPEN_FMODE(oflags) & FMODE_WRITE) + desiredAccess |= GENERIC_WRITE; + + disposition = FILE_OVERWRITE_IF; + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; + else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; + else if ((oflags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else + cFYI(1, "Create flag not set in create function"); + /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { rc = -ENOMEM; - goto cifs_create_out; + goto out; } /* @@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, if (tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, create_options, - &fileHandle, &oplock, buf, cifs_sb->local_nls, + fileHandle, oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else rc = -EIO; /* no NT SMB support fall into legacy open below */ @@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, tcon, full_path, disposition, desiredAccess, create_options, - &fileHandle, &oplock, buf, cifs_sb->local_nls, + fileHandle, oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } if (rc) { cFYI(1, "cifs_create returned 0x%x", rc); - goto cifs_create_out; + goto out; } /* If Open reported that we actually created a file then we now have to set the mode if possible */ - if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { + if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, .ctime = NO_CHANGE_64, @@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, .device = 0, }; + *created = true; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64) current_fsuid(); if (inode->i_mode & S_ISGID) @@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle, + CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, current->tgid); } else { /* BB implement mode setting via Windows security @@ -305,11 +339,11 @@ cifs_create_get_file_info: inode->i_sb, xid); else { rc = cifs_get_inode_info(&newinode, full_path, buf, - inode->i_sb, xid, &fileHandle); + inode->i_sb, xid, fileHandle); if (newinode) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; - if ((oplock & CIFS_CREATE_ACTION) && + if ((*oplock & CIFS_CREATE_ACTION) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) @@ -321,37 +355,139 @@ cifs_create_get_file_info: } cifs_create_set_dentry: - if (rc == 0) - d_instantiate(direntry, newinode); - else + if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); + goto out; + } + d_drop(direntry); + d_add(direntry, newinode); - if (newinode && nd) { - struct cifsFileInfo *pfile_info; - struct file *filp; + /* ENOENT for create? How weird... */ + rc = -ENOENT; + if (!newinode) { + CIFSSMBClose(xid, tcon, *fileHandle); + goto out; + } + rc = 0; - filp = lookup_instantiate_filp(nd, direntry, generic_file_open); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - CIFSSMBClose(xid, tcon, fileHandle); - goto cifs_create_out; - } +out: + kfree(buf); + kfree(full_path); + return rc; +} - pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); - if (pfile_info == NULL) { - fput(filp); - CIFSSMBClose(xid, tcon, fileHandle); - rc = -ENOMEM; - } - } else { +struct file * +cifs_atomic_open(struct inode *inode, struct dentry *direntry, + struct opendata *od, unsigned oflags, umode_t mode, + bool *created) +{ + int rc; + int xid; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + __u16 fileHandle; + __u32 oplock; + struct file *filp; + struct cifsFileInfo *pfile_info; + + /* Posix open is only called (at lookup time) for file create now. For + * opens (rather than creates), because we do not know if it is a file + * or directory yet, and current Samba no longer allows us to do posix + * open on dirs, we could end up wasting an open call on what turns out + * to be a dir. For file opens, we wait to call posix open till + * cifs_open. It could be added to atomic_open in the future but the + * performance tradeoff of the extra network request when EISDIR or + * EACCES is returned would have to be weighed against the 50% reduction + * in network traffic in the other paths. + */ + if (!(oflags & O_CREAT)) { + struct dentry *res = cifs_lookup(inode, direntry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + finish_no_open(od, res); + return NULL; + } + + rc = check_name(direntry); + if (rc) + return ERR_PTR(rc); + + xid = GetXid(); + + cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", + inode, direntry->d_name.name, direntry); + + tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + filp = ERR_CAST(tlink); + if (IS_ERR(tlink)) + goto free_xid; + + tcon = tlink_tcon(tlink); + + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, + &oplock, &fileHandle, created); + + if (rc) { + filp = ERR_PTR(rc); + goto out; + } + + filp = finish_open(od, direntry, generic_file_open); + if (IS_ERR(filp)) { CIFSSMBClose(xid, tcon, fileHandle); + goto out; } -cifs_create_out: - kfree(buf); - kfree(full_path); + pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); + if (pfile_info == NULL) { + CIFSSMBClose(xid, tcon, fileHandle); + fput(filp); + filp = ERR_PTR(-ENOMEM); + } + +out: + cifs_put_tlink(tlink); +free_xid: + FreeXid(xid); + return filp; +} + +int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, + struct nameidata *nd) +{ + int rc; + int xid = GetXid(); + /* + * BB below access is probably too much for mknod to request + * but we have to do query and setpathinfo so requesting + * less could fail (unless we want to request getatr and setatr + * permissions (only). At least for POSIX we do not have to + * request so much. + */ + unsigned oflags = O_EXCL | O_CREAT | O_RDWR; + struct tcon_link *tlink; + __u16 fileHandle; + __u32 oplock; + bool created = true; + + cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p", + inode, direntry->d_name.name, direntry); + + tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + rc = PTR_ERR(tlink); + if (IS_ERR(tlink)) + goto free_xid; + + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, + &oplock, &fileHandle, &created); + if (!rc) + CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); + cifs_put_tlink(tlink); +free_xid: FreeXid(xid); + return rc; } @@ -492,16 +628,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock; - __u16 fileHandle = 0; - bool posix_open = false; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct cifsFileInfo *cfile; struct inode *newInode = NULL; char *full_path = NULL; - struct file *filp; xid = GetXid(); @@ -518,31 +649,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; - - /* - * Don't allow the separator character in a path component. - * The VFS will not allow "/", but "\" is allowed by posix. - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { - int i; - for (i = 0; i < direntry->d_name.len; i++) - if (direntry->d_name.name[i] == '\\') { - cFYI(1, "Invalid file name"); - rc = -EINVAL; - goto lookup_out; - } - } - - /* - * O_EXCL: optimize away the lookup, but don't hash the dentry. Let - * the VFS handle the create. - */ - if (nd && (nd->flags & LOOKUP_EXCL)) { - d_instantiate(direntry, NULL); - rc = 0; + rc = check_name(direntry); + if (rc) goto lookup_out; - } /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) @@ -560,80 +669,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode); - /* Posix open is only called (at lookup time) for file create now. - * For opens (rather than creates), because we do not know if it - * is a file or directory yet, and current Samba no longer allows - * us to do posix open on dirs, we could end up wasting an open call - * on what turns out to be a dir. For file opens, we wait to call posix - * open till cifs_open. It could be added here (lookup) in the future - * but the performance tradeoff of the extra network request when EISDIR - * or EACCES is returned would have to be weighed against the 50% - * reduction in network traffic in the other paths. - */ if (pTcon->unix_ext) { - if (nd && !(nd->flags & LOOKUP_DIRECTORY) && - (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && - (nd->intent.open.file->f_flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, - nd->intent.open.create_mode, - nd->intent.open.file->f_flags, &oplock, - &fileHandle, xid); - /* - * The check below works around a bug in POSIX - * open in samba versions 3.3.1 and earlier where - * open could incorrectly fail with invalid parameter. - * If either that or op not supported returned, follow - * the normal lookup. - */ - switch (rc) { - case 0: - /* - * The server may allow us to open things like - * FIFOs, but the client isn't set up to deal - * with that. If it's not a regular file, just - * close it and proceed as if it were a normal - * lookup. - */ - if (newInode && !S_ISREG(newInode->i_mode)) { - CIFSSMBClose(xid, pTcon, fileHandle); - break; - } - case -ENOENT: - posix_open = true; - case -EOPNOTSUPP: - break; - default: - pTcon->broken_posix_open = true; - } - } - if (!posix_open) - rc = cifs_get_inode_info_unix(&newInode, full_path, - parent_dir_inode->i_sb, xid); - } else + rc = cifs_get_inode_info_unix(&newInode, full_path, + parent_dir_inode->i_sb, xid); + } else { rc = cifs_get_inode_info(&newInode, full_path, NULL, parent_dir_inode->i_sb, xid, NULL); + } if ((rc == 0) && (newInode != NULL)) { d_add(direntry, newInode); - if (posix_open) { - filp = lookup_instantiate_filp(nd, direntry, - generic_file_open); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - CIFSSMBClose(xid, pTcon, fileHandle); - goto lookup_out; - } - - cfile = cifs_new_fileinfo(fileHandle, filp, tlink, - oplock); - if (cfile == NULL) { - fput(filp); - CIFSSMBClose(xid, pTcon, fileHandle); - rc = -ENOMEM; - goto lookup_out; - } - } /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); -- cgit v1.2.3 From 3819219b592159725069eb16a7a46f58e4ecef32 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:24 +0200 Subject: ceph: remove unused arg from ceph_lookup_open() What was the purpose of this? Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro --- fs/ceph/dir.c | 4 ++-- fs/ceph/file.c | 3 +-- fs/ceph/super.h | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3e8094be4604..c4b7832c38b5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -599,7 +599,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, (nd->flags & LOOKUP_OPEN) && !(nd->intent.open.flags & O_CREAT)) { int mode = nd->intent.open.create_mode & ~current->fs->umask; - return ceph_lookup_open(dir, dentry, nd, mode, 1); + return ceph_lookup_open(dir, dentry, nd, mode); } /* can we conclude ENOENT locally? */ @@ -710,7 +710,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (nd) { BUG_ON((nd->flags & LOOKUP_OPEN) == 0); - dentry = ceph_lookup_open(dir, dentry, nd, mode, 0); + dentry = ceph_lookup_open(dir, dentry, nd, mode); /* hrm, what should i do here if we get aliased? */ if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 988d4f302e48..4bf9773e6a36 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -219,8 +219,7 @@ out: * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE */ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode, - int locked_dir) + struct nameidata *nd, int mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc35036d258d..8471db98b62c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -807,8 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages, extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode, - int locked_dir); + struct nameidata *nd, int mode); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ -- cgit v1.2.3 From 2d83bde9a16e18eafdc73a3a1f4a8eb110e49672 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:25 +0200 Subject: ceph: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic lookup+open+create operation implemented via ->lookup and ->create operations. Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro --- fs/ceph/dir.c | 68 ++++++++++++++++++++++++++++++++++++--------------------- fs/ceph/file.c | 21 +++++++++--------- fs/ceph/super.h | 5 +++-- 3 files changed, 56 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c4b7832c38b5..75df600ec9b4 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, if (err < 0) return ERR_PTR(err); - /* open (but not create!) intent? */ - if (nd && - (nd->flags & LOOKUP_OPEN) && - !(nd->intent.open.flags & O_CREAT)) { - int mode = nd->intent.open.create_mode & ~current->fs->umask; - return ceph_lookup_open(dir, dentry, nd, mode); - } - /* can we conclude ENOENT locally? */ if (dentry->d_inode == NULL) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -642,6 +634,47 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } +struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + bool *created) +{ + int err; + struct dentry *res = NULL; + struct file *filp; + + if (!(flags & O_CREAT)) { + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + + err = ceph_init_dentry(dentry); + if (err < 0) + return ERR_PTR(err); + + return ceph_lookup_open(dir, dentry, od, flags, mode); + } + + if (d_unhashed(dentry)) { + res = ceph_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* We don't deal with positive dentries here */ + if (dentry->d_inode) { + finish_no_open(od, res); + return NULL; + } + + *created = true; + filp = ceph_lookup_open(dir, dentry, od, flags, mode); + dput(res); + + return filp; +} + /* * If we do a create but get no trace back from the MDS, follow up with * a lookup (the VFS expects us to link up the provided dentry). @@ -702,23 +735,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { - dout("create in dir %p dentry %p name '%.*s'\n", - dir, dentry, dentry->d_name.len, dentry->d_name.name); - - if (ceph_snap(dir) != CEPH_NOSNAP) - return -EROFS; - - if (nd) { - BUG_ON((nd->flags & LOOKUP_OPEN) == 0); - dentry = ceph_lookup_open(dir, dentry, nd, mode); - /* hrm, what should i do here if we get aliased? */ - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - return 0; - } - - /* fall back to mknod */ - return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0); + return ceph_mknod(dir, dentry, mode, 0); } static int ceph_symlink(struct inode *dir, struct dentry *dentry, @@ -1357,6 +1374,7 @@ const struct inode_operations ceph_dir_iops = { .rmdir = ceph_unlink, .rename = ceph_rename, .create = ceph_create, + .atomic_open = ceph_atomic_open, }; const struct dentry_operations ceph_dentry_ops = { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4bf9773e6a36..e34dc22e75a9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,21 +213,15 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -/* - * flags - * path_lookup_open -> LOOKUP_OPEN - * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE - */ -struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode) +struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file; + struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; - int flags = nd->intent.open.flags; dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); @@ -253,14 +247,19 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open); if (IS_ERR(file)) err = PTR_ERR(file); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); dout("ceph_lookup_open result=%p\n", ret); - return ret; + + if (IS_ERR(ret)) + return ERR_CAST(ret); + + dput(ret); + return err ? ERR_PTR(err) : file; } int ceph_release(struct inode *inode, struct file *file) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8471db98b62c..e61e54673e56 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,8 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode); +extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, + umode_t mode); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ -- cgit v1.2.3 From e43ae79c540270865918dab5ac914c74f43101e2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:26 +0200 Subject: 9p: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic open+create operation implemented via ->create. No functionality is changed. Signed-off-by: Miklos Szeredi CC: Eric Van Hensbergen Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 169 +++++++++++++++++++++++++++++-------------------- fs/9p/vfs_inode_dotl.c | 52 ++++++++++----- 2 files changed, 137 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 57ccb7537dae..e8c42ceb89ba 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -712,11 +712,14 @@ error: } /** - * v9fs_vfs_create - VFS hook to create files + * v9fs_vfs_create - VFS hook to create a regular file + * + * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called + * for mknod(2). + * * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions - * @nd: path information * */ @@ -724,76 +727,19 @@ static int v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { - int err; - u32 perm; - int flags; - struct file *filp; - struct v9fs_inode *v9inode; - struct v9fs_session_info *v9ses; - struct p9_fid *fid, *inode_fid; - - err = 0; - fid = NULL; - v9ses = v9fs_inode2v9ses(dir); - perm = unixmode2p9mode(v9ses, mode); - if (nd) - flags = nd->intent.open.flags; - else - flags = O_RDWR; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + u32 perm = unixmode2p9mode(v9ses, mode); + struct p9_fid *fid; - fid = v9fs_create(v9ses, dir, dentry, NULL, perm, - v9fs_uflags2omode(flags, - v9fs_proto_dotu(v9ses))); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; - goto error; - } + /* P9_OEXCL? */ + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR); + if (IS_ERR(fid)) + return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); - /* if we are opening a file, assign the open fid to the file */ - if (nd) { - v9inode = V9FS_I(dentry->d_inode); - mutex_lock(&v9inode->v_mutex); - if (v9ses->cache && !v9inode->writeback_fid && - ((flags & O_ACCMODE) != O_RDONLY)) { - /* - * clone a fid and add it to writeback_fid - * we do it during open time instead of - * page dirty time via write_begin/page_mkwrite - * because we want write after unlink usecase - * to work. - */ - inode_fid = v9fs_writeback_fid(dentry); - if (IS_ERR(inode_fid)) { - err = PTR_ERR(inode_fid); - mutex_unlock(&v9inode->v_mutex); - goto error; - } - v9inode->writeback_fid = (void *) inode_fid; - } - mutex_unlock(&v9inode->v_mutex); - filp = lookup_instantiate_filp(nd, dentry, generic_file_open); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto error; - } - - filp->private_data = fid; -#ifdef CONFIG_9P_FSCACHE - if (v9ses->cache) - v9fs_cache_inode_set_cookie(dentry->d_inode, filp); -#endif - } else - p9_client_clunk(fid); + p9_client_clunk(fid); return 0; - -error: - if (fid) - p9_client_clunk(fid); - - return err; } /** @@ -910,6 +856,93 @@ error: return ERR_PTR(result); } +static struct file * +v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + bool *created) +{ + int err; + u32 perm; + struct file *filp; + struct v9fs_inode *v9inode; + struct v9fs_session_info *v9ses; + struct p9_fid *fid, *inode_fid; + struct dentry *res = NULL; + + if (d_unhashed(dentry)) { + res = v9fs_vfs_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* Only creates */ + if (!(flags & O_CREAT) || dentry->d_inode) { + finish_no_open(od, res); + return NULL; + } + + err = 0; + fid = NULL; + v9ses = v9fs_inode2v9ses(dir); + perm = unixmode2p9mode(v9ses, mode); + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, + v9fs_uflags2omode(flags, + v9fs_proto_dotu(v9ses))); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + v9fs_invalidate_inode_attr(dir); + v9inode = V9FS_I(dentry->d_inode); + mutex_lock(&v9inode->v_mutex); + if (v9ses->cache && !v9inode->writeback_fid && + ((flags & O_ACCMODE) != O_RDONLY)) { + /* + * clone a fid and add it to writeback_fid + * we do it during open time instead of + * page dirty time via write_begin/page_mkwrite + * because we want write after unlink usecase + * to work. + */ + inode_fid = v9fs_writeback_fid(dentry); + if (IS_ERR(inode_fid)) { + err = PTR_ERR(inode_fid); + mutex_unlock(&v9inode->v_mutex); + goto error; + } + v9inode->writeback_fid = (void *) inode_fid; + } + mutex_unlock(&v9inode->v_mutex); + filp = finish_open(od, dentry, generic_file_open); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto error; + } + + filp->private_data = fid; +#ifdef CONFIG_9P_FSCACHE + if (v9ses->cache) + v9fs_cache_inode_set_cookie(dentry->d_inode, filp); +#endif + + *created = true; +out: + dput(res); + return filp; + +error: + if (fid) + p9_client_clunk(fid); + + filp = ERR_PTR(err); + goto out; +} + /** * v9fs_vfs_unlink - VFS unlink hook to delete an inode * @i: inode that is being unlinked @@ -1488,6 +1521,7 @@ out: static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, + .atomic_open = v9fs_vfs_atomic_open, .symlink = v9fs_vfs_symlink, .link = v9fs_vfs_link, .unlink = v9fs_vfs_unlink, @@ -1502,6 +1536,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { static const struct inode_operations v9fs_dir_inode_operations = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, + .atomic_open = v9fs_vfs_atomic_open, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index e3dd2a1e2bfc..a354fe2cb234 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -230,17 +230,23 @@ int v9fs_open_to_dotl_flags(int flags) * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions - * @nd: path information * */ static int v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct nameidata *nd) +{ + return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); +} + +static struct file * +v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t omode, + bool *created) { int err = 0; gid_t gid; - int flags; umode_t mode; char *name = NULL; struct file *filp; @@ -251,19 +257,25 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct p9_fid *dfid, *ofid, *inode_fid; struct v9fs_session_info *v9ses; struct posix_acl *pacl = NULL, *dacl = NULL; + struct dentry *res = NULL; - v9ses = v9fs_inode2v9ses(dir); - if (nd) - flags = nd->intent.open.flags; - else { - /* - * create call without LOOKUP_OPEN is due - * to mknod of regular files. So use mknod - * operation. - */ - return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); + if (d_unhashed(dentry)) { + res = v9fs_vfs_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* Only creates */ + if (!(flags & O_CREAT) || dentry->d_inode) { + finish_no_open(od, res); + return NULL; } + v9ses = v9fs_inode2v9ses(dir); + name = (char *) dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", name, flags, omode); @@ -272,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); - return err; + goto err_return; } /* clone a fid to use for creation */ @@ -280,7 +292,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); - return err; + goto err_return; } gid = v9fs_get_fsgid_for_create(dir); @@ -345,7 +357,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = lookup_instantiate_filp(nd, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto err_clunk_old_fid; @@ -355,7 +367,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, filp); #endif - return 0; + *created = true; +out: + dput(res); + return filp; error: if (fid) @@ -364,7 +379,9 @@ err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); v9fs_set_create_acl(NULL, &dacl, &pacl); - return err; +err_return: + filp = ERR_PTR(err); + goto out; } /** @@ -982,6 +999,7 @@ out: const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create_dotl, + .atomic_open = v9fs_vfs_atomic_open_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, -- cgit v1.2.3 From 015c3bbcd88df2305aae5b017a9c91c08b380aa1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:27 +0200 Subject: vfs: remove open intents from nameidata All users of open intents have been converted to use ->atomic_{open,create}. This patch gets rid of nd->intent.open and related infrastructure. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/internal.h | 5 +-- fs/namei.c | 95 ++++++++++++++++++++++++--------------------------- fs/open.c | 87 ++-------------------------------------------- include/linux/namei.h | 14 -------- 4 files changed, 48 insertions(+), 153 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index 70067775df2e..ae69a3b150d7 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -82,13 +82,10 @@ extern struct super_block *user_get_super(dev_t); /* * open.c */ -struct nameidata; -extern struct file *nameidata_to_filp(struct nameidata *); -extern void release_open_intent(struct nameidata *); struct opendata { struct dentry *dentry; struct vfsmount *mnt; - struct file **filp; + struct file *filp; }; struct open_flags { int open_flag; diff --git a/fs/namei.c b/fs/namei.c index 9e11ae83bff6..0ed876259f8b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,22 +463,6 @@ err_root: return -ECHILD; } -/** - * release_open_intent - free up open intent resources - * @nd: pointer to nameidata - */ -void release_open_intent(struct nameidata *nd) -{ - struct file *file = nd->intent.open.file; - - if (file && !IS_ERR(file)) { - if (file->f_path.dentry == NULL) - put_filp(file); - else - fput(file); - } -} - static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { return dentry->d_op->d_revalidate(dentry, nd); @@ -2210,7 +2194,8 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, const struct open_flags *op, + struct path *path, struct opendata *od, + const struct open_flags *op, int *want_write, bool need_lookup, bool *created) { @@ -2219,7 +2204,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct opendata od; struct file *filp; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2285,14 +2269,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od.dentry = DENTRY_NOT_SET; - od.mnt = nd->path.mnt; - od.filp = &nd->intent.open.file; - filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + od->dentry = DENTRY_NOT_SET; + od->mnt = nd->path.mnt; + filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, created); if (IS_ERR(filp)) { - if (WARN_ON(od.dentry != DENTRY_NOT_SET)) - dput(od.dentry); + if (WARN_ON(od->dentry != DENTRY_NOT_SET)) + dput(od->dentry); if (create_error && PTR_ERR(filp) == -ENOENT) filp = ERR_PTR(create_error); @@ -2306,13 +2289,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (!filp) { - if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od.dentry) { + if (od->dentry) { dput(dentry); - dentry = od.dentry; + dentry = od->dentry; } goto looked_up; } @@ -2375,6 +2358,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, + struct opendata *od, const struct open_flags *op, int *want_write, bool *created) { @@ -2394,7 +2378,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, op, want_write, + return atomic_open(nd, dentry, path, od, op, want_write, need_lookup, created); } @@ -2416,7 +2400,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, * rw->ro transition does not occur between * the time when the file is created and when * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). + * the 'struct file' in finish_open(). */ error = mnt_want_write(nd->path.mnt); if (error) @@ -2444,7 +2428,8 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - const struct open_flags *op, const char *pathname) + struct opendata *od, const struct open_flags *op, + const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2521,7 +2506,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2627,7 +2612,8 @@ common: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - filp = nameidata_to_filp(nd); + od->mnt = nd->path.mnt; + filp = finish_open(od, nd->path.dentry, NULL); if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { BUG_ON(save_parent.dentry != dir); path_put(&nd->path); @@ -2642,6 +2628,11 @@ common: retried = true; goto retry_lookup; } + if (IS_ERR(filp)) + goto out; + error = open_check_o_direct(filp); + if (error) + goto exit_fput; opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); @@ -2671,24 +2662,26 @@ exit_dput: exit: filp = ERR_PTR(error); goto out; +exit_fput: + fput(filp); + goto exit; + } static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct file *filp; + struct opendata od; + struct file *res; struct path path; int error; - filp = get_empty_filp(); - if (!filp) + od.filp = get_empty_filp(); + if (!od.filp) return ERR_PTR(-ENFILE); - filp->f_flags = op->open_flag; - nd->intent.open.file = filp; - nd->intent.open.flags = open_to_namei_flags(op->open_flag); - nd->intent.open.create_mode = op->mode; + od.filp->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2699,14 +2692,14 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); - while (unlikely(!filp)) { /* trailing symlink */ + res = do_last(nd, &path, &od, op, pathname); + while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - filp = ERR_PTR(-ELOOP); + res = ERR_PTR(-ELOOP); break; } nd->flags |= LOOKUP_PARENT; @@ -2714,7 +2707,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); + res = do_last(nd, &path, &od, op, pathname); put_link(nd, &link, cookie); } out: @@ -2722,17 +2715,20 @@ out: path_put(&nd->root); if (base) fput(base); - release_open_intent(nd); - if (filp == ERR_PTR(-EOPENSTALE)) { + if (od.filp) { + BUG_ON(od.filp->f_path.dentry); + put_filp(od.filp); + } + if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) - filp = ERR_PTR(-ECHILD); + res = ERR_PTR(-ECHILD); else - filp = ERR_PTR(-ESTALE); + res = ERR_PTR(-ESTALE); } - return filp; + return res; out_filp: - filp = ERR_PTR(error); + res = ERR_PTR(error); goto out; } @@ -2788,7 +2784,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path goto out; nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; - nd.intent.open.flags = O_EXCL; /* * Do the final lookup. diff --git a/fs/open.c b/fs/open.c index 13bece4f36a4..937f4ec20180 100644 --- a/fs/open.c +++ b/fs/open.c @@ -770,46 +770,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, return res; } -/** - * lookup_instantiate_filp - instantiates the open intent filp - * @nd: pointer to nameidata - * @dentry: pointer to dentry - * @open: open callback - * - * Helper for filesystems that want to use lookup open intents and pass back - * a fully instantiated struct file to the caller. - * This function is meant to be called from within a filesystem's - * lookup method. - * Beware of calling it for non-regular files! Those ->open methods might block - * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, - * leading to a deadlock, as nobody can open that fifo anymore, because - * another process to open fifo will block on locked parent when doing lookup). - * Note that in case of error, nd->intent.open.file is destroyed, but the - * path information remains valid. - * If the open callback is set to NULL, then the standard f_op->open() - * filesystem callback is substituted. - */ -struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) -{ - const struct cred *cred = current_cred(); - - if (IS_ERR(nd->intent.open.file)) - goto out; - if (IS_ERR(dentry)) - goto out_err; - nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), - nd->intent.open.file, - open, cred); -out: - return nd->intent.open.file; -out_err: - release_open_intent(nd); - nd->intent.open.file = ERR_CAST(dentry); - goto out; -} -EXPORT_SYMBOL_GPL(lookup_instantiate_filp); - /** * finish_open - finish opening a file * @od: opaque open data @@ -829,9 +789,9 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, mntget(od->mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) - *od->filp = NULL; + od->filp = NULL; return res; } @@ -852,49 +812,6 @@ void finish_no_open(struct opendata *od, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/** - * nameidata_to_filp - convert a nameidata to an open filp. - * @nd: pointer to nameidata - * @flags: open flags - * - * Note that this function destroys the original nameidata - */ -struct file *nameidata_to_filp(struct nameidata *nd) -{ - const struct cred *cred = current_cred(); - struct file *filp; - - /* Pick up the filp from the open intent */ - filp = nd->intent.open.file; - - /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry != NULL) { - nd->intent.open.file = NULL; - } else { - struct file *res; - - path_get(&nd->path); - res = do_dentry_open(nd->path.dentry, nd->path.mnt, - filp, NULL, cred); - if (!IS_ERR(res)) { - int error; - - nd->intent.open.file = NULL; - BUG_ON(res != filp); - - error = open_check_o_direct(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } else { - /* Allow nd->intent.open.file to be recycled */ - filp = res; - } - } - return filp; -} - /* * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an * error. diff --git a/include/linux/namei.h b/include/linux/namei.h index ffc02135c483..23d859879210 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -7,12 +7,6 @@ struct vfsmount; -struct open_intent { - int flags; - int create_mode; - struct file *file; -}; - enum { MAX_NESTED_LINKS = 8 }; struct nameidata { @@ -25,11 +19,6 @@ struct nameidata { int last_type; unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; - - /* Intent data */ - union { - struct open_intent open; - } intent; }; /* @@ -82,9 +71,6 @@ extern int kern_path_parent(const char *, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)); - extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern int follow_down_one(struct path *); -- cgit v1.2.3 From aa4caadb70b782999ce5d150ac2f4b1d18e2fc75 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:28 +0200 Subject: vfs: do_last(): clean up error handling Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0ed876259f8b..044215a7bb0c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2634,21 +2634,14 @@ common: if (error) goto exit_fput; opened: - if (!IS_ERR(filp)) { - error = ima_file_check(filp, op->acc_mode); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } - if (!IS_ERR(filp)) { - if (will_truncate) { - error = handle_truncate(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } + error = ima_file_check(filp, op->acc_mode); + if (error) + goto exit_fput; + + if (will_truncate) { + error = handle_truncate(filp); + if (error) + goto exit_fput; } out: if (want_write) -- cgit v1.2.3 From e83db167229702da0f48957641e0dbf36b2644aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:29 +0200 Subject: vfs: do_last(): clean up labels Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 044215a7bb0c..ea24376cfa94 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2463,13 +2463,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -EISDIR; goto exit; } - goto ok; + goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) return ERR_PTR(error); audit_inode(pathname, dir); - goto ok; + goto finish_open; } if (!(open_flag & O_CREAT)) { @@ -2526,7 +2526,7 @@ retry_lookup: will_truncate = 0; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); - goto common; + goto finish_open_created; } /* @@ -2598,7 +2598,7 @@ finish_lookup: if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto exit; audit_inode(pathname, nd->path.dentry); -ok: +finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; @@ -2608,7 +2608,7 @@ ok: goto exit; want_write = 1; } -common: +finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; -- cgit v1.2.3 From 77d660a8a83036432dc33f092a367d06563d233e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:30 +0200 Subject: vfs: do_last(): clean up bool Consistently use bool for boolean values in do_last(). Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ea24376cfa94..6bdb8d732538 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,7 +2196,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool need_lookup, + bool *want_write, bool need_lookup, bool *created) { struct inode *dir = nd->path.dentry->d_inode; @@ -2238,7 +2238,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, (open_flag & O_ACCMODE) != O_RDONLY) { error = mnt_want_write(nd->path.mnt); if (!error) { - *want_write = 1; + *want_write = true; } else if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall @@ -2360,7 +2360,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool *created) + bool *want_write, bool *created) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2405,7 +2405,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, error = mnt_want_write(nd->path.mnt); if (error) goto out_dput; - *want_write = 1; + *want_write = true; *created = true; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2433,13 +2433,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; - int will_truncate = open_flag & O_TRUNC; - int want_write = 0; + bool will_truncate = (open_flag & O_TRUNC) != 0; + bool want_write = false; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; bool created; - int symlink_ok = 0; + bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; int error; @@ -2476,7 +2476,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) - symlink_ok = 1; + symlink_ok = true; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); if (likely(!error)) @@ -2514,7 +2514,7 @@ retry_lookup: goto out; if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - will_truncate = 0; + will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; @@ -2523,7 +2523,7 @@ retry_lookup: if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; - will_truncate = 0; + will_truncate = false; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); goto finish_open_created; @@ -2541,7 +2541,7 @@ retry_lookup: */ if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } error = -EEXIST; @@ -2600,13 +2600,13 @@ finish_lookup: audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) - will_truncate = 0; + will_truncate = false; if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) goto exit; - want_write = 1; + want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2623,7 +2623,7 @@ finish_open_created: save_parent.dentry = NULL; if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } retried = true; goto retry_lookup; -- cgit v1.2.3 From f60dc3db6e24b7c36445cf1feb56b34c799074b3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:31 +0200 Subject: vfs: do_last(): clean up retry Move the lookup retry logic to the bottom of the function to make the normal case simpler to read. Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 6bdb8d732538..183a769537fe 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2614,22 +2614,11 @@ finish_open_created: goto exit; od->mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL); - if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (want_write) { - mnt_drop_write(nd->path.mnt); - want_write = false; - } - retried = true; - goto retry_lookup; - } - if (IS_ERR(filp)) + if (IS_ERR(filp)) { + if (filp == ERR_PTR(-EOPENSTALE)) + goto stale_open; goto out; + } error = open_check_o_direct(filp); if (error) goto exit_fput; @@ -2659,6 +2648,23 @@ exit_fput: fput(filp); goto exit; +stale_open: + /* If no saved parent or already retried then can't retry */ + if (!save_parent.dentry || retried) + goto out; + + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = false; + } + retried = true; + goto retry_lookup; } static struct file *path_openat(int dfd, const char *pathname, -- cgit v1.2.3 From a8277b9baa6268de386529a33061775bc716198b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:32 +0200 Subject: vfs: move O_DIRECT check to common code Perform open_check_o_direct() in a common place in do_last after opening the file. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 183a769537fe..4bc4bc6a6938 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2305,22 +2305,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * here. */ error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) - goto out_fput; - - error = open_check_o_direct(filp); - if (error) - goto out_fput; + if (error) { + fput(filp); + filp = ERR_PTR(error); + } out: dput(dentry); return filp; -out_fput: - fput(filp); - filp = ERR_PTR(error); - goto out; - no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); @@ -2619,10 +2612,10 @@ finish_open_created: goto stale_open; goto out; } +opened: error = open_check_o_direct(filp); if (error) goto exit_fput; -opened: error = ima_file_check(filp, op->acc_mode); if (error) goto exit_fput; -- cgit v1.2.3 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- fs/9p/vfs_inode.c | 6 +++--- fs/9p/vfs_inode_dotl.c | 6 +++--- fs/ceph/dir.c | 8 ++++---- fs/ceph/file.c | 5 +++-- fs/ceph/super.h | 2 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 12 ++++++------ fs/fuse/dir.c | 10 +++++----- fs/namei.c | 33 +++++++++++++++++---------------- fs/nfs/dir.c | 11 ++++++----- fs/open.c | 7 +++++-- include/linux/fs.h | 9 +++++++-- 14 files changed, 63 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8157488c3463..af4e45bd6cfa 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -64,7 +64,7 @@ ata *); void (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); locking rules: all may block diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index beb6e691f70a..d7121051afcd 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -366,7 +366,7 @@ struct inode_operations { void (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); }; Again, all methods are called without any locks being held, unless diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e8c42ceb89ba..de626b3b342f 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -859,7 +859,7 @@ error: static struct file * v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, - bool *created) + int *opened) { int err; u32 perm; @@ -918,7 +918,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - filp = finish_open(od, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open, opened); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto error; @@ -930,7 +930,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9fs_cache_inode_set_cookie(dentry->d_inode, filp); #endif - *created = true; + *opened |= FILE_CREATED; out: dput(res); return filp; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a354fe2cb234..3db55471bc93 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -243,7 +243,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static struct file * v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t omode, - bool *created) + int *opened) { int err = 0; gid_t gid; @@ -357,7 +357,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = finish_open(od, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open, opened); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto err_clunk_old_fid; @@ -367,7 +367,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, filp); #endif - *created = true; + *opened |= FILE_CREATED; out: dput(res); return filp; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 75df600ec9b4..81e5e908df9d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -636,7 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, - bool *created) + int *opened) { int err; struct dentry *res = NULL; @@ -650,7 +650,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err < 0) return ERR_PTR(err); - return ceph_lookup_open(dir, dentry, od, flags, mode); + return ceph_lookup_open(dir, dentry, od, flags, mode, opened); } if (d_unhashed(dentry)) { @@ -668,8 +668,8 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, return NULL; } - *created = true; - filp = ceph_lookup_open(dir, dentry, od, flags, mode); + *opened |= FILE_CREATED; + filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened); dput(res); return filp; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e34dc22e75a9..4c304a90d046 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,7 +214,8 @@ out: * ceph_release gets called). So fear not! */ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode) + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -247,7 +248,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open, opened); if (IS_ERR(file)) err = PTR_ERR(file); out: diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e61e54673e56..f9a325108b49 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -808,7 +808,7 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, - umode_t mode); + umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 3a572bf5947f..92a7c3d8a031 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -48,7 +48,7 @@ extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern struct file *cifs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, - bool *); + int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 7a3dcd15d681..6cdf23fd70ee 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -160,7 +160,7 @@ check_name(struct dentry *direntry) static int cifs_do_create(struct inode *inode, struct dentry *direntry, int xid, struct tcon_link *tlink, unsigned oflags, umode_t mode, __u32 *oplock, __u16 *fileHandle, - bool *created) + int *created) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; @@ -311,7 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, .device = 0, }; - *created = true; + *created |= FILE_CREATED; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64) current_fsuid(); if (inode->i_mode & S_ISGID) @@ -379,7 +379,7 @@ out: struct file * cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct opendata *od, unsigned oflags, umode_t mode, - bool *created) + int *opened) { int rc; int xid; @@ -426,14 +426,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, tcon = tlink_tcon(tlink); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, created); + &oplock, &fileHandle, opened); if (rc) { filp = ERR_PTR(rc); goto out; } - filp = finish_open(od, direntry, generic_file_open); + filp = finish_open(od, direntry, generic_file_open, opened); if (IS_ERR(filp)) { CIFSSMBClose(xid, tcon, fileHandle); goto out; @@ -469,7 +469,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, struct tcon_link *tlink; __u16 fileHandle; __u32 oplock; - bool created = true; + int created = FILE_CREATED; cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p", inode, direntry->d_name.name, direntry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e42442f1da16..345f78ee5c9d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -371,7 +371,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, */ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, struct opendata *od, unsigned flags, - umode_t mode) + umode_t mode, int *opened) { int err; struct inode *inode; @@ -450,7 +450,7 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = finish_open(od, entry, generic_file_open); + file = finish_open(od, entry, generic_file_open, opened); if (IS_ERR(file)) { fuse_sync_release(ff, flags); } else { @@ -472,7 +472,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, struct opendata *od, unsigned flags, - umode_t mode, bool *created) + umode_t mode, int *opened) { int err; struct fuse_conn *fc = get_fuse_conn(dir); @@ -492,12 +492,12 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, goto no_open; /* Only creates */ - *created = true; + *opened |= FILE_CREATED; if (fc->no_create) goto mknod; - file = fuse_create_open(dir, entry, od, flags, mode); + file = fuse_create_open(dir, entry, od, flags, mode, opened); if (PTR_ERR(file) == -ENOSYS) { fc->no_create = 1; goto mknod; diff --git a/fs/namei.c b/fs/namei.c index 4bc4bc6a6938..7a33f074e5bd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2197,7 +2197,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, bool *want_write, bool need_lookup, - bool *created) + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); @@ -2222,7 +2222,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (open_flag & O_EXCL) { open_flag &= ~O_TRUNC; - *created = true; + *opened |= FILE_CREATED; } /* @@ -2272,7 +2272,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, - created); + opened); if (IS_ERR(filp)) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); @@ -2283,7 +2283,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } acc_mode = op->acc_mode; - if (*created) { + if (*opened & FILE_CREATED) { fsnotify_create(dir, dentry); acc_mode = MAY_OPEN; } @@ -2353,7 +2353,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - bool *want_write, bool *created) + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2361,7 +2361,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int error; bool need_lookup; - *created = false; + *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); @@ -2372,7 +2372,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { return atomic_open(nd, dentry, path, od, op, want_write, - need_lookup, created); + need_lookup, opened); } if (need_lookup) { @@ -2399,7 +2399,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; *want_write = true; - *created = true; + *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; @@ -2422,7 +2422,7 @@ out_dput: */ static struct file *do_last(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - const char *pathname) + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2431,7 +2431,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; - bool created; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2499,21 +2498,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { if (IS_ERR(filp)) goto out; - if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if ((*opened & FILE_CREATED) || + !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; } - if (created) { + if (*opened & FILE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; @@ -2606,7 +2606,7 @@ finish_open_created: if (error) goto exit; od->mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL); + filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) goto stale_open; @@ -2667,6 +2667,7 @@ static struct file *path_openat(int dfd, const char *pathname, struct opendata od; struct file *res; struct path path; + int opened = 0; int error; od.filp = get_empty_filp(); @@ -2684,7 +2685,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2700,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); put_link(nd, &link, cookie); } out: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e6d55dc93ffd..6deb2549ead5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -113,7 +113,7 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, - bool *); + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1389,7 +1389,8 @@ static int do_open(struct inode *inode, struct file *filp) static struct file *nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags) + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1408,7 +1409,7 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, } } - filp = finish_open(od, dentry, do_open); + filp = finish_open(od, dentry, do_open, opened); if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); @@ -1419,7 +1420,7 @@ out: static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned open_flags, - umode_t mode, bool *created) + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; @@ -1497,7 +1498,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags); + filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); return filp; diff --git a/fs/open.c b/fs/open.c index 937f4ec20180..89589bd3993c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -782,7 +782,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * filesystem callback is substituted. */ struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; @@ -790,8 +791,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + if (!IS_ERR(res)) { + *opened |= FILE_OPENED; od->filp = NULL; + } return res; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 0314635cf833..a7618cf28d0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1696,7 +1696,7 @@ struct inode_operations { int (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); } ____cacheline_aligned; struct seq_file; @@ -2065,8 +2065,13 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); +enum { + FILE_CREATED = 1, + FILE_OPENED = 2 +}; extern struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *)); + int (*open)(struct inode *, struct file *), + int *opened); extern void finish_no_open(struct opendata *od, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v1.2.3 From 3d8a00d2099ebc6d5a6e95fadaf861709d9919a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:04:43 -0400 Subject: don't modify od->filp at all make put_filp() conditional on flag set by finish_open() Signed-off-by: Al Viro --- fs/namei.c | 4 +--- fs/open.c | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 7a33f074e5bd..18b9326d951f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2708,10 +2708,8 @@ out: path_put(&nd->root); if (base) fput(base); - if (od.filp) { - BUG_ON(od.filp->f_path.dentry); + if (!(opened & FILE_OPENED)) put_filp(od.filp); - } if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); diff --git a/fs/open.c b/fs/open.c index 89589bd3993c..c87f98201c29 100644 --- a/fs/open.c +++ b/fs/open.c @@ -786,15 +786,14 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, int *opened) { struct file *res; + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(od->mnt); dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) { + if (!IS_ERR(res)) *opened |= FILE_OPENED; - od->filp = NULL; - } return res; } -- cgit v1.2.3 From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:39:14 +0400 Subject: make ->atomic_open() return int Change of calling conventions: old new NULL 1 file 0 ERR_PTR(-ve) -ve Caller *knows* that struct file *; no need to return it. Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 6 ++--- fs/9p/vfs_inode.c | 10 +++---- fs/9p/vfs_inode_dotl.c | 14 +++++----- fs/ceph/dir.c | 19 +++++++------ fs/ceph/file.c | 12 ++++----- fs/ceph/super.h | 6 ++--- fs/cifs/cifsfs.h | 6 ++--- fs/cifs/dir.c | 17 ++++++------ fs/fuse/dir.c | 33 +++++++++++------------ fs/namei.c | 14 +++++----- fs/nfs/dir.c | 57 +++++++++++++++++++-------------------- include/linux/fs.h | 6 ++--- 13 files changed, 97 insertions(+), 105 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index af4e45bd6cfa..46a24a6ed095 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,7 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, umode_t create_mode, int *opened); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d7121051afcd..d0d690bbc4c7 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,7 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, umode_t create_mode, int *opened); }; @@ -482,8 +482,8 @@ otherwise noted. atomic_open: called on the last component of an open. Using this optional method the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type - turned out to be wrong) it may signal this by returning NULL instead of - an open struct file pointer. This method is only called if the last + turned out to be wrong) it may signal this by returning 1 instead of + usual 0 or -ve . This method is only called if the last component is negative or needs lookup. Cached positive dentries are still handled by f_op->open(). diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index de626b3b342f..62ce8daefa95 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -856,7 +856,7 @@ error: return ERR_PTR(result); } -static struct file * +static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, int *opened) @@ -872,7 +872,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = v9fs_vfs_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } err = 0; @@ -933,13 +933,11 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, *opened |= FILE_CREATED; out: dput(res); - return filp; + return err; error: if (fid) p9_client_clunk(fid); - - filp = ERR_PTR(err); goto out; } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 3db55471bc93..69f05109f75d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -240,7 +240,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); } -static struct file * +static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t omode, int *opened) @@ -262,7 +262,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = v9fs_vfs_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -271,7 +271,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } v9ses = v9fs_inode2v9ses(dir); @@ -284,7 +284,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); - goto err_return; + goto out; } /* clone a fid to use for creation */ @@ -292,7 +292,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); - goto err_return; + goto out; } gid = v9fs_get_fsgid_for_create(dir); @@ -370,7 +370,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, *opened |= FILE_CREATED; out: dput(res); - return filp; + return err; error: if (fid) @@ -379,8 +379,6 @@ err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); v9fs_set_create_acl(NULL, &dacl, &pacl); -err_return: - filp = ERR_PTR(err); goto out; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 81e5e908df9d..d8bfabeeaa25 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -634,21 +634,20 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } -struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, - int *opened) +int ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { int err; struct dentry *res = NULL; - struct file *filp; if (!(flags & O_CREAT)) { if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); + return -ENAMETOOLONG; err = ceph_init_dentry(dentry); if (err < 0) - return ERR_PTR(err); + return err; return ceph_lookup_open(dir, dentry, od, flags, mode, opened); } @@ -656,7 +655,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = ceph_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -665,14 +664,14 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, /* We don't deal with positive dentries here */ if (dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } *opened |= FILE_CREATED; - filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened); + err = ceph_lookup_open(dir, dentry, od, flags, mode, opened); dput(res); - return filp; + return err; } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4c304a90d046..b8cc3ee5401e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,9 +213,9 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, - int *opened) +int ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -230,7 +230,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_CAST(req); + return PTR_ERR(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { @@ -257,10 +257,10 @@ out: dout("ceph_lookup_open result=%p\n", ret); if (IS_ERR(ret)) - return ERR_CAST(ret); + return PTR_ERR(ret); dput(ret); - return err ? ERR_PTR(err) : file; + return err; } int ceph_release(struct inode *inode, struct file *file) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f9a325108b49..f7e8e82ec47f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened); +extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 92a7c3d8a031..58d9aca46a40 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -46,9 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); -extern struct file *cifs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, - int *); +extern int cifs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6cdf23fd70ee..8ca70b102b95 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -376,7 +376,7 @@ out: return rc; } -struct file * +int cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct opendata *od, unsigned oflags, umode_t mode, int *opened) @@ -403,15 +403,15 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (!(oflags & O_CREAT)) { struct dentry *res = cifs_lookup(inode, direntry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); finish_no_open(od, res); - return NULL; + return 1; } rc = check_name(direntry); if (rc) - return ERR_PTR(rc); + return rc; xid = GetXid(); @@ -428,13 +428,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fileHandle, opened); - if (rc) { - filp = ERR_PTR(rc); + if (rc) goto out; - } filp = finish_open(od, direntry, generic_file_open, opened); if (IS_ERR(filp)) { + rc = PTR_ERR(filp); CIFSSMBClose(xid, tcon, fileHandle); goto out; } @@ -443,14 +442,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (pfile_info == NULL) { CIFSSMBClose(xid, tcon, fileHandle); fput(filp); - filp = ERR_PTR(-ENOMEM); + rc = -ENOMEM; } out: cifs_put_tlink(tlink); free_xid: FreeXid(xid); - return filp; + return rc; } int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 345f78ee5c9d..8a9ca09e87d4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -369,9 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened) +static int fuse_create_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened) { int err; struct inode *inode; @@ -452,12 +452,14 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, fuse_invalidate_attr(dir); file = finish_open(od, entry, generic_file_open, opened); if (IS_ERR(file)) { + err = PTR_ERR(file); fuse_sync_release(ff, flags); } else { file->private_data = fuse_file_get(ff); fuse_finish_open(inode, file); + err = 0; } - return file; + return err; out_free_ff: fuse_file_free(ff); @@ -466,23 +468,22 @@ out_put_request: out_put_forget_req: kfree(forget); out_err: - return ERR_PTR(err); + return err; } static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); -static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened) +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct file *file; struct dentry *res = NULL; if (d_unhashed(entry)) { res = fuse_lookup(dir, entry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) entry = res; @@ -497,24 +498,22 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - file = fuse_create_open(dir, entry, od, flags, mode, opened); - if (PTR_ERR(file) == -ENOSYS) { + err = fuse_create_open(dir, entry, od, flags, mode, opened); + if (err == -ENOSYS) { fc->no_create = 1; goto mknod; } out_dput: dput(res); - return file; + return err; mknod: err = fuse_mknod(dir, entry, mode, 0); - if (err) { - file = ERR_PTR(err); + if (err) goto out_dput; - } no_open: finish_no_open(od, res); - return NULL; + return 1; } /* diff --git a/fs/namei.c b/fs/namei.c index 18b9326d951f..f0dae0057ec9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2204,7 +2204,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct file *filp; + struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2271,14 +2271,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; - filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); - if (IS_ERR(filp)) { + if (error < 0) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); - if (create_error && PTR_ERR(filp) == -ENOENT) - filp = ERR_PTR(create_error); + if (create_error && error == -ENOENT) + error = create_error; + filp = ERR_PTR(error); goto out; } @@ -2288,7 +2289,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, acc_mode = MAY_OPEN; } - if (!filp) { + if (error) { /* returned 1, that is */ if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; @@ -2304,6 +2305,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ + filp = od->filp; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6deb2549ead5..b56f4b36ed41 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,9 +111,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct file *nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, - int *); +static int nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1387,10 +1387,10 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static struct file *nfs_finish_open(struct nfs_open_context *ctx, - struct dentry *dentry, - struct opendata *od, unsigned open_flags, - int *opened) +static int nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1403,30 +1403,31 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); - if (err < 0) { - filp = ERR_PTR(err); + if (err < 0) goto out; - } } filp = finish_open(od, dentry, do_open, opened); - if (!IS_ERR(filp)) - nfs_file_set_open_context(filp, ctx); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + nfs_file_set_open_context(filp, ctx); + err = 0; out: put_nfs_open_context(ctx); - return filp; + return err; } -static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, - umode_t mode, int *opened) +static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - struct file *filp; int err; /* Expect a negative dentry */ @@ -1437,21 +1438,19 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { - err = -ENOENT; if (!d_unhashed(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup * again */ - goto out_err; + return -ENOENT; } goto no_open; } - err = -ENAMETOOLONG; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) - goto out_err; + return -ENAMETOOLONG; if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; @@ -1465,7 +1464,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, ctx = create_nfs_open_context(dentry, open_flags); err = PTR_ERR(ctx); if (IS_ERR(ctx)) - goto out_err; + goto out; nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); @@ -1489,7 +1488,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, default: break; } - goto out_err; + goto out; } res = d_add_unique(dentry, inode); if (res != NULL) @@ -1498,22 +1497,20 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); - return filp; - -out_err: - return ERR_PTR(err); +out: + return err; no_open: res = nfs_lookup(dir, dentry, NULL); err = PTR_ERR(res); if (IS_ERR(res)) - goto out_err; + goto out; finish_no_open(od, res); - return NULL; + return 1; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/include/linux/fs.h b/include/linux/fs.h index a7618cf28d0e..33bda922988a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1694,9 +1694,9 @@ struct inode_operations { int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, - umode_t create_mode, int *opened); + int (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, int *opened); } ____cacheline_aligned; struct seq_file; -- cgit v1.2.3 From a4a3bdd778715999ddfeefdc52ab76254580fa76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:55:37 -0400 Subject: kill opendata->{mnt,dentry} ->filp->f_path is there for purpose... Signed-off-by: Al Viro --- fs/internal.h | 2 -- fs/namei.c | 15 ++++++--------- fs/open.c | 6 +++--- 3 files changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index ae69a3b150d7..09003a02292d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -83,8 +83,6 @@ extern struct super_block *user_get_super(dev_t); * open.c */ struct opendata { - struct dentry *dentry; - struct vfsmount *mnt; struct file *filp; }; struct open_flags { diff --git a/fs/namei.c b/fs/namei.c index f0dae0057ec9..af83ede92a4f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2269,14 +2269,11 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->dentry = DENTRY_NOT_SET; - od->mnt = nd->path.mnt; + od->filp->f_path.dentry = DENTRY_NOT_SET; + od->filp->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); if (error < 0) { - if (WARN_ON(od->dentry != DENTRY_NOT_SET)) - dput(od->dentry); - if (create_error && error == -ENOENT) error = create_error; filp = ERR_PTR(error); @@ -2290,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->dentry) { + if (od->filp->f_path.dentry) { dput(dentry); - dentry = od->dentry; + dentry = od->filp->f_path.dentry; } goto looked_up; } @@ -2607,7 +2604,7 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->mnt = nd->path.mnt; + od->filp->f_path.mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) diff --git a/fs/open.c b/fs/open.c index c87f98201c29..2b1654d8bfbd 100644 --- a/fs/open.c +++ b/fs/open.c @@ -788,10 +788,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->mnt); + mntget(od->filp->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) *opened |= FILE_OPENED; @@ -810,7 +810,7 @@ EXPORT_SYMBOL(finish_open); */ void finish_no_open(struct opendata *od, struct dentry *dentry) { - od->dentry = dentry; + od->filp->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); -- cgit v1.2.3 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- fs/9p/vfs_inode.c | 15 +++++------- fs/9p/vfs_inode_dotl.c | 15 +++++------- fs/ceph/dir.c | 8 +++---- fs/ceph/file.c | 7 ++---- fs/ceph/super.h | 2 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 9 ++++---- fs/fuse/dir.c | 15 +++++------- fs/internal.h | 3 --- fs/namei.c | 48 ++++++++++++++++++++------------------- fs/nfs/dir.c | 20 +++++++--------- fs/open.c | 20 ++++++++-------- include/linux/fs.h | 11 ++++----- 15 files changed, 81 insertions(+), 98 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 46a24a6ed095..33e5243948f0 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -63,7 +63,7 @@ ata *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); locking rules: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d0d690bbc4c7..279de2190365 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -365,7 +365,7 @@ struct inode_operations { int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 62ce8daefa95..2b05651e0c3d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -858,12 +858,11 @@ error: static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; u32 perm; - struct file *filp; struct v9fs_inode *v9inode; struct v9fs_session_info *v9ses; struct p9_fid *fid, *inode_fid; @@ -880,7 +879,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -918,16 +917,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - filp = finish_open(od, dentry, generic_file_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, generic_file_open, opened); + if (err) goto error; - } - filp->private_data = fid; + file->private_data = fid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache) - v9fs_cache_inode_set_cookie(dentry->d_inode, filp); + v9fs_cache_inode_set_cookie(dentry->d_inode, file); #endif *opened |= FILE_CREATED; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 69f05109f75d..cfaebdef9743 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -242,14 +242,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t omode, + struct file *file, unsigned flags, umode_t omode, int *opened) { int err = 0; gid_t gid; umode_t mode; char *name = NULL; - struct file *filp; struct p9_qid qid; struct inode *inode; struct p9_fid *fid = NULL; @@ -270,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -357,15 +356,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = finish_open(od, dentry, generic_file_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, generic_file_open, opened); + if (err) goto err_clunk_old_fid; - } - filp->private_data = ofid; + file->private_data = ofid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache) - v9fs_cache_inode_set_cookie(inode, filp); + v9fs_cache_inode_set_cookie(inode, file); #endif *opened |= FILE_CREATED; out: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d8bfabeeaa25..80c848e05390 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -635,7 +635,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, } int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -649,7 +649,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err < 0) return err; - return ceph_lookup_open(dir, dentry, od, flags, mode, opened); + return ceph_lookup_open(dir, dentry, file, flags, mode, opened); } if (d_unhashed(dentry)) { @@ -663,12 +663,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, /* We don't deal with positive dentries here */ if (dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } *opened |= FILE_CREATED; - err = ceph_lookup_open(dir, dentry, od, flags, mode, opened); + err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); dput(res); return err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b8cc3ee5401e..1b81d6c31878 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,12 +214,11 @@ out: * ceph_release gets called). So fear not! */ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; @@ -248,9 +247,7 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open, opened); - if (IS_ERR(file)) - err = PTR_ERR(file); + err = finish_open(file, req->r_dentry, ceph_open, opened); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f7e8e82ec47f..f4d5522cb619 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -807,7 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages, extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, + struct file *od, unsigned flags, umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 58d9aca46a40..48bb474ce294 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -47,7 +47,7 @@ extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern int cifs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, + struct file *, unsigned, umode_t, int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 8ca70b102b95..c00c192f17e9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -378,7 +378,7 @@ out: int cifs_atomic_open(struct inode *inode, struct dentry *direntry, - struct opendata *od, unsigned oflags, umode_t mode, + struct file *file, unsigned oflags, umode_t mode, int *opened) { int rc; @@ -405,7 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (IS_ERR(res)) return PTR_ERR(res); - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -431,9 +431,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (rc) goto out; - filp = finish_open(od, direntry, generic_file_open, opened); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); + rc = finish_open(file, direntry, generic_file_open, opened); + if (rc) { CIFSSMBClose(xid, tcon, fileHandle); goto out; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8a9ca09e87d4..110db5425dc1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -370,7 +370,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * 'mknod' + 'open' requests. */ static int fuse_create_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -382,7 +382,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; - struct file *file; forget = fuse_alloc_forget(); err = -ENOMEM; @@ -450,14 +449,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = finish_open(od, entry, generic_file_open, opened); - if (IS_ERR(file)) { - err = PTR_ERR(file); + err = finish_open(file, entry, generic_file_open, opened); + if (err) { fuse_sync_release(ff, flags); } else { file->private_data = fuse_file_get(ff); fuse_finish_open(inode, file); - err = 0; } return err; @@ -473,7 +470,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -498,7 +495,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, od, flags, mode, opened); + err = fuse_create_open(dir, entry, file, flags, mode, opened); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -512,7 +509,7 @@ mknod: if (err) goto out_dput; no_open: - finish_no_open(od, res); + finish_no_open(file, res); return 1; } diff --git a/fs/internal.h b/fs/internal.h index 09003a02292d..8a9f5fa840f1 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -82,9 +82,6 @@ extern struct super_block *user_get_super(dev_t); /* * open.c */ -struct opendata { - struct file *filp; -}; struct open_flags { int open_flag; umode_t mode; diff --git a/fs/namei.c b/fs/namei.c index af83ede92a4f..aaff8a862151 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2194,7 +2194,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct opendata *od, + struct path *path, struct file *file, const struct open_flags *op, bool *want_write, bool need_lookup, int *opened) @@ -2269,9 +2269,9 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->filp->f_path.dentry = DENTRY_NOT_SET; - od->filp->f_path.mnt = nd->path.mnt; - error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + file->f_path.dentry = DENTRY_NOT_SET; + file->f_path.mnt = nd->path.mnt; + error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode, opened); if (error < 0) { if (create_error && error == -ENOENT) @@ -2287,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->filp->f_path.dentry) { + if (file->f_path.dentry) { dput(dentry); - dentry = od->filp->f_path.dentry; + dentry = file->f_path.dentry; } goto looked_up; } @@ -2302,7 +2302,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = od->filp; + filp = file; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); @@ -2350,7 +2350,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct opendata *od, + struct file *file, const struct open_flags *op, bool *want_write, int *opened) { @@ -2370,7 +2370,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, od, op, want_write, + return atomic_open(nd, dentry, path, file, op, want_write, need_lookup, opened); } @@ -2420,7 +2420,7 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - struct opendata *od, const struct open_flags *op, + struct file *file, const struct open_flags *op, int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; @@ -2497,7 +2497,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, opened); + filp = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2604,13 +2604,15 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->filp->f_path.mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL, opened); - if (IS_ERR(filp)) { - if (filp == ERR_PTR(-EOPENSTALE)) + file->f_path.mnt = nd->path.mnt; + error = finish_open(file, nd->path.dentry, NULL, opened); + if (error) { + filp = ERR_PTR(error); + if (error == -EOPENSTALE) goto stale_open; goto out; } + filp = file; opened: error = open_check_o_direct(filp); if (error) @@ -2663,17 +2665,17 @@ static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct opendata od; + struct file *file; struct file *res; struct path path; int opened = 0; int error; - od.filp = get_empty_filp(); - if (!od.filp) + file = get_empty_filp(); + if (!file) return ERR_PTR(-ENFILE); - od.filp->f_flags = op->open_flag; + file->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2684,7 +2686,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2701,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2708,7 +2710,7 @@ out: if (base) fput(base); if (!(opened & FILE_OPENED)) - put_filp(od.filp); + put_filp(file); if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b56f4b36ed41..dafc86c1c35e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static int nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, + struct file *, unsigned, umode_t, int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, @@ -1389,10 +1389,9 @@ static int do_open(struct inode *inode, struct file *filp) static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, int *opened) { - struct file *filp; int err; if (ctx->dentry != dentry) { @@ -1407,13 +1406,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx, goto out; } - filp = finish_open(od, dentry, do_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, do_open, opened); + if (err) goto out; - } - nfs_file_set_open_context(filp, ctx); - err = 0; + nfs_file_set_open_context(file, ctx); out: put_nfs_open_context(ctx); @@ -1421,7 +1417,7 @@ out: } static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, umode_t mode, int *opened) { struct nfs_open_context *ctx; @@ -1497,7 +1493,7 @@ static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - err = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, file, open_flags, opened); dput(res); out: @@ -1509,7 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(od, res); + finish_no_open(file, res); return 1; } diff --git a/fs/open.c b/fs/open.c index 2b1654d8bfbd..fc829d6c3a4b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -781,21 +781,23 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. */ -struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened) +int finish_open(struct file *file, struct dentry *dentry, + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->filp->f_path.mnt); + mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!IS_ERR(res)) { *opened |= FILE_OPENED; + return 0; + } - return res; + return PTR_ERR(res); } EXPORT_SYMBOL(finish_open); @@ -808,9 +810,9 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct opendata *od, struct dentry *dentry) +void finish_no_open(struct file *file, struct dentry *dentry) { - od->filp->f_path.dentry = dentry; + file->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index 33bda922988a..1dcc75c95763 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -427,7 +427,6 @@ struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; -struct opendata; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1695,7 +1694,7 @@ struct inode_operations { u64 len); int (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); } ____cacheline_aligned; @@ -2069,10 +2068,10 @@ enum { FILE_CREATED = 1, FILE_OPENED = 2 }; -extern struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened); -extern void finish_no_open(struct opendata *od, struct dentry *dentry); +extern int finish_open(struct file *file, struct dentry *dentry, + int (*open)(struct inode *, struct file *), + int *opened); +extern void finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v1.2.3 From 2675a4eb6a9f1240098721c8a84ede28abd9d7b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:41:10 +0400 Subject: fs/namei.c: get do_last() and friends return int Same conventions as for ->atomic_open(). Trimmed the forest of labels a bit, while we are at it... Signed-off-by: Al Viro --- fs/namei.c | 150 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 70 insertions(+), 80 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index aaff8a862151..16256d915cb8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2193,18 +2193,17 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } -static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct file *file, - const struct open_flags *op, - bool *want_write, bool need_lookup, - int *opened) +static int atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, struct file *file, + const struct open_flags *op, + bool *want_write, bool need_lookup, + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); umode_t mode; int error; int acc_mode; - struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2212,7 +2211,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) { - filp = ERR_PTR(-ENOENT); + error = -ENOENT; goto out; } @@ -2276,7 +2275,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error < 0) { if (create_error && error == -ENOENT) error = create_error; - filp = ERR_PTR(error); goto out; } @@ -2288,7 +2286,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { - filp = ERR_PTR(-EIO); + error = -EIO; goto out; } if (file->f_path.dentry) { @@ -2302,27 +2300,24 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = file; - error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } + error = may_open(&file->f_path, acc_mode, open_flag); + if (error) + fput(file); out: dput(dentry); - return filp; + return error; no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); if (create_error) { int open_flag = op->open_flag; - filp = ERR_PTR(create_error); + error = create_error; if ((open_flag & O_EXCL)) { if (!dentry->d_inode) goto out; @@ -2338,7 +2333,7 @@ no_open: looked_up: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; } /* @@ -2349,10 +2344,10 @@ looked_up: * Returns open file or NULL on success, error otherwise. NULL means no open * was performed, only lookup. */ -static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct file *file, - const struct open_flags *op, - bool *want_write, int *opened) +static int lookup_open(struct nameidata *nd, struct path *path, + struct file *file, + const struct open_flags *op, + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2363,7 +2358,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); /* Cached positive dentry: will open in f_op->open */ if (!need_lookup && dentry->d_inode) @@ -2379,7 +2374,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, dentry = lookup_real(dir_inode, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); } /* Negative dentry, just create the file */ @@ -2409,26 +2404,25 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; out_dput: dput(dentry); - return ERR_PTR(error); + return error; } /* * Handle the last step of open() */ -static struct file *do_last(struct nameidata *nd, struct path *path, - struct file *file, const struct open_flags *op, - int *opened, const char *pathname) +static int do_last(struct nameidata *nd, struct path *path, + struct file *file, const struct open_flags *op, + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool want_write = false; int acc_mode = op->acc_mode; - struct file *filp; struct inode *inode; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; @@ -2443,22 +2437,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, case LAST_DOT: error = handle_dots(nd, nd->last_type); if (error) - return ERR_PTR(error); + return error; /* fallthrough */ case LAST_ROOT: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, nd->path.dentry); if (open_flag & O_CREAT) { error = -EISDIR; - goto exit; + goto out; } goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); goto finish_open; } @@ -2474,7 +2468,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, goto finish_lookup; if (error < 0) - goto exit; + goto out; BUG_ON(nd->inode != dir->d_inode); } else { @@ -2486,29 +2480,29 @@ static struct file *do_last(struct nameidata *nd, struct path *path, */ error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) - goto exit; + goto out; } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); - if (filp) { - if (IS_ERR(filp)) + if (error <= 0) { + if (error) goto out; if ((*opened & FILE_CREATED) || - !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + !S_ISREG(file->f_path.dentry->d_inode->i_mode)) will_truncate = false; - audit_inode(pathname, filp->f_path.dentry); + audit_inode(pathname, file->f_path.dentry); goto opened; } @@ -2554,18 +2548,18 @@ finish_lookup: error = -ENOENT; if (!inode) { path_to_nameidata(path, nd); - goto exit; + goto out; } if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; - goto exit; + goto out; } } BUG_ON(inode != path->dentry->d_inode); - return NULL; + return 1; } if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { @@ -2581,14 +2575,14 @@ finish_lookup: error = complete_walk(nd); if (error) { path_put(&save_parent); - return ERR_PTR(error); + return error; } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) - goto exit; + goto out; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) - goto exit; + goto out; audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) @@ -2597,32 +2591,30 @@ finish_open: if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) - goto exit; + goto out; want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) - goto exit; + goto out; file->f_path.mnt = nd->path.mnt; error = finish_open(file, nd->path.dentry, NULL, opened); if (error) { - filp = ERR_PTR(error); if (error == -EOPENSTALE) goto stale_open; goto out; } - filp = file; opened: - error = open_check_o_direct(filp); + error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(filp, op->acc_mode); + error = ima_file_check(file, op->acc_mode); if (error) goto exit_fput; if (will_truncate) { - error = handle_truncate(filp); + error = handle_truncate(file); if (error) goto exit_fput; } @@ -2631,16 +2623,14 @@ out: mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); - return filp; + return error; exit_dput: path_put_conditional(path, nd); -exit: - filp = ERR_PTR(error); goto out; exit_fput: - fput(filp); - goto exit; + fput(file); + goto out; stale_open: /* If no saved parent or already retried then can't retry */ @@ -2666,7 +2656,6 @@ static struct file *path_openat(int dfd, const char *pathname, { struct file *base = NULL; struct file *file; - struct file *res; struct path path; int opened = 0; int error; @@ -2679,29 +2668,29 @@ static struct file *path_openat(int dfd, const char *pathname, error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) - goto out_filp; + goto out; current->total_link_count = 0; error = link_path_walk(pathname, nd); if (unlikely(error)) - goto out_filp; + goto out; - res = do_last(nd, &path, file, op, &opened, pathname); - while (unlikely(!res)) { /* trailing symlink */ + error = do_last(nd, &path, file, op, &opened, pathname); + while (unlikely(error > 0)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - res = ERR_PTR(-ELOOP); + error = -ELOOP; break; } nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - goto out_filp; - res = do_last(nd, &path, file, op, &opened, pathname); + break; + error = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2709,19 +2698,20 @@ out: path_put(&nd->root); if (base) fput(base); - if (!(opened & FILE_OPENED)) + if (!(opened & FILE_OPENED)) { + BUG_ON(!error); put_filp(file); - if (res == ERR_PTR(-EOPENSTALE)) { - if (flags & LOOKUP_RCU) - res = ERR_PTR(-ECHILD); - else - res = ERR_PTR(-ESTALE); } - return res; - -out_filp: - res = ERR_PTR(error); - goto out; + if (unlikely(error)) { + if (error == -EOPENSTALE) { + if (flags & LOOKUP_RCU) + error = -ECHILD; + else + error = -ESTALE; + } + file = ERR_PTR(error); + } + return file; } struct file *do_filp_open(int dfd, const char *pathname, -- cgit v1.2.3 From e45198a6ac24bd2c4ad4a43b670c2f1a23dd2df3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 06:48:09 -0400 Subject: make finish_no_open() return int namely, 1 ;-) That's what we want to return from ->atomic_open() instances after finish_no_open(). Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 6 ++---- fs/9p/vfs_inode_dotl.c | 6 ++---- fs/ceph/dir.c | 6 ++---- fs/cifs/dir.c | 3 +-- fs/fuse/dir.c | 3 +-- fs/nfs/dir.c | 3 +-- fs/open.c | 3 ++- include/linux/fs.h | 2 +- 8 files changed, 12 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 2b05651e0c3d..eae476fb401c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -878,10 +878,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (!(flags & O_CREAT) || dentry->d_inode) + return finish_no_open(file, res); err = 0; fid = NULL; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index cfaebdef9743..1ee10c89df97 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -268,10 +268,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (!(flags & O_CREAT) || dentry->d_inode) + return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 80c848e05390..d42eee1c5de3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -662,10 +662,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } /* We don't deal with positive dentries here */ - if (dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (dentry->d_inode) + return finish_no_open(file, res); *opened |= FILE_CREATED; err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c00c192f17e9..e8c53c80dbd5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -405,8 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (IS_ERR(res)) return PTR_ERR(res); - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } rc = check_name(direntry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 110db5425dc1..ccdab3ac4223 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -509,8 +509,7 @@ mknod: if (err) goto out_dput; no_open: - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dafc86c1c35e..f167c7a1d67b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1505,8 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/open.c b/fs/open.c index fc829d6c3a4b..d51c1b71b062 100644 --- a/fs/open.c +++ b/fs/open.c @@ -810,9 +810,10 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct file *file, struct dentry *dentry) +int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; + return 1; } EXPORT_SYMBOL(finish_no_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1dcc75c95763..17ee20dba86c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2071,7 +2071,7 @@ enum { extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened); -extern void finish_no_open(struct file *file, struct dentry *dentry); +extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v1.2.3 From 96b7e579addd3cdc806c1667bf5b6b126070827c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:22:04 -0400 Subject: switch do_dentry_open() to returning int Signed-off-by: Al Viro --- fs/open.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index d51c1b71b062..1241c597d317 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,10 +667,10 @@ int open_check_o_direct(struct file *f) return 0; } -static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -699,7 +699,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; - return f; + return 0; } f->f_op = fops_get(inode->i_fop); @@ -726,7 +726,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - return f; + return 0; cleanup_all: fops_put(f->f_op); @@ -749,7 +749,7 @@ cleanup_all: cleanup_file: dput(dentry); mntput(mnt); - return ERR_PTR(error); + return error; } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, @@ -757,17 +757,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int (*open)(struct inode *, struct file *), const struct cred *cred) { - struct file *res = do_dentry_open(dentry, mnt, f, open, cred); - if (!IS_ERR(res)) { - int error = open_check_o_direct(f); + int error; + error = do_dentry_open(dentry, mnt, f, open, cred); + if (!error) { + error = open_check_o_direct(f); if (error) { - fput(res); - res = ERR_PTR(error); + fput(f); + f = ERR_PTR(error); } - } else { + } else { put_filp(f); + f = ERR_PTR(error); } - return res; + return f; } /** @@ -785,19 +787,17 @@ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened) { - struct file *res; + int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); - if (!IS_ERR(res)) { + error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!error) *opened |= FILE_OPENED; - return 0; - } - return PTR_ERR(res); + return error; } EXPORT_SYMBOL(finish_open); -- cgit v1.2.3 From 2a027e7a1873812240cbdac0f55c4734ff0042a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:24:38 -0400 Subject: fold __dentry_open() into its sole caller Signed-off-by: Al Viro --- fs/open.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 1241c597d317..28fbacbd5e31 100644 --- a/fs/open.c +++ b/fs/open.c @@ -752,26 +752,6 @@ cleanup_file: return error; } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) -{ - int error; - error = do_dentry_open(dentry, mnt, f, open, cred); - if (!error) { - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); - f = ERR_PTR(error); - } - return f; -} - /** * finish_open - finish opening a file * @od: opaque open data @@ -841,7 +821,18 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - return __dentry_open(dentry, mnt, f, NULL, cred); + error = do_dentry_open(dentry, mnt, f, NULL, cred); + if (!error) { + error = open_check_o_direct(f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } else { + put_filp(f); + f = ERR_PTR(error); + } + return f; } EXPORT_SYMBOL(dentry_open); -- cgit v1.2.3 From 02e5180d991f203441687cecd0b7e6a2ba0a34d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:32:45 -0400 Subject: do_dentry_open(): take initialization of file->f_path to caller ... and get rid of a couple of arguments and a pointless reassignment in finish_open() case. Signed-off-by: Al Viro --- fs/open.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 28fbacbd5e31..124ccb1d38a0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,8 +667,7 @@ int open_check_o_direct(struct file *f) return 0; } -static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, +static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { @@ -682,9 +681,9 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; - inode = dentry->d_inode; + inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { - error = __get_file_write_access(inode, mnt); + error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) @@ -692,8 +691,6 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, } f->f_mapping = inode->i_mapping; - f->f_path.dentry = dentry; - f->f_path.mnt = mnt; f->f_pos = 0; file_sb_list_add(f, inode->i_sb); @@ -740,15 +737,14 @@ cleanup_all: * here, so just reset the state. */ file_reset_write(f); - mnt_drop_write(mnt); + mnt_drop_write(f->f_path.mnt); } } file_sb_list_del(f); - f->f_path.dentry = NULL; - f->f_path.mnt = NULL; cleanup_file: - dput(dentry); - mntput(mnt); + path_put(&f->f_path); + f->f_path.mnt = NULL; + f->f_path.dentry = NULL; return error; } @@ -771,9 +767,9 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); - dget(dentry); + file->f_path.dentry = dget(dentry); - error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -821,7 +817,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - error = do_dentry_open(dentry, mnt, f, NULL, cred); + f->f_path.mnt = mnt; + f->f_path.dentry = dentry; + error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); if (error) { -- cgit v1.2.3 From 93420b40bb19433c3bc01c37c6c908ae7ce13228 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:18:15 -0400 Subject: switch nfs_lookup_check_intent() away from nameidata just pass the flags Signed-off-by: Al Viro --- fs/nfs/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f167c7a1d67b..48485f1f0bda 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1037,10 +1037,10 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path and none of them is set before that last * component. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, +static inline unsigned int nfs_lookup_check_intent(unsigned int flags, unsigned int mask) { - return nd->flags & mask; + return flags & mask; } /* @@ -1051,7 +1051,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); + return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); } /* @@ -1074,7 +1074,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (nd->flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && !(server->flags & NFS_MOUNT_NOCTO) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) @@ -1098,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) + if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; -- cgit v1.2.3 From facc3530fb5c89a40bc83045422add392b8db4a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:33:51 -0400 Subject: nfs_lookup_verify_inode() - nd is *always* non-NULL here Signed-off-by: Al Viro --- fs/nfs/dir.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48485f1f0bda..ad5aef4995aa 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1069,19 +1069,16 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (IS_AUTOMOUNT(inode)) return 0; - if (nd != NULL) { - /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) - goto out_force; - /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) - goto out_force; - return 0; - } - return nfs_revalidate_inode(server, inode); + /* VFS wants an on-the-wire revalidation */ + if (nd->flags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) + goto out_force; + return 0; out_force: return __nfs_revalidate_inode(server, inode); } -- cgit v1.2.3 From fa3c56bbda6c2ac2a57d96ba501dbe85cccd312b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:36:40 -0400 Subject: fs/nfs/dir.c: switch to passing nd->flags instead of nd wherever possible Signed-off-by: Al Viro --- fs/nfs/dir.c | 51 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ad5aef4995aa..71a199435ca9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1030,28 +1030,15 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) return 1; } -/* - * Return the intent data that applies to this particular path component - * - * Note that the current set of intents only apply to the very last - * component of the path and none of them is set before that last - * component. - */ -static inline unsigned int nfs_lookup_check_intent(unsigned int flags, - unsigned int mask) -{ - return flags & mask; -} - /* * Use intent information to check whether or not we're going to do * an O_EXCL create using this path component. */ -static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); + return flags & LOOKUP_EXCL; } /* @@ -1063,20 +1050,18 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) * */ static inline -int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); if (IS_AUTOMOUNT(inode)) return 0; /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) + if (flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) + if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; return 0; out_force: @@ -1092,10 +1077,10 @@ out_force: */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) + if (flags & LOOKUP_CREATE) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1115,6 +1100,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, */ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1122,7 +1108,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); @@ -1131,7 +1117,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, nd)) + if (nfs_neg_need_reval(dir, dentry, flags)) goto out_bad; goto out_valid_noent; } @@ -1147,8 +1133,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, nd)) + if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) goto out_zap_parent; goto out_valid; } @@ -1306,7 +1292,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, nd)) { + if (nd && nfs_is_exclusive_create(dir, nd->flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1507,15 +1493,16 @@ no_open: static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; int ret = 0; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; - if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto no_open; if (d_mountpoint(dentry)) goto no_open; @@ -1528,7 +1515,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) * optimize away revalidation of negative dentries. */ if (inode == NULL) { - if (!nfs_neg_need_reval(dir, dentry, nd)) + if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; goto out; } @@ -1537,7 +1524,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (!S_ISREG(inode->i_mode)) goto no_open_dput; /* We cannot do exclusive creation on a positive dentry */ - if (nd && nd->flags & LOOKUP_EXCL) + if (flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ -- cgit v1.2.3 From 0b728e1911cbe6e24020727c3870628b9653f32a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:03:43 -0400 Subject: stop passing nameidata * to ->d_revalidate() Just the lookup flags. Die, bastard, die... Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/porting | 5 +++++ Documentation/filesystems/vfs.txt | 8 ++++---- fs/9p/vfs_dentry.c | 4 ++-- fs/afs/dir.c | 6 +++--- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 8 ++++---- fs/coda/dir.c | 6 +++--- fs/ecryptfs/dentry.c | 20 ++++---------------- fs/fat/namei_vfat.c | 12 ++++++------ fs/fuse/dir.c | 4 ++-- fs/gfs2/dentry.c | 6 +++--- fs/hfs/sysdep.c | 4 ++-- fs/jfs/namei.c | 6 +++--- fs/namei.c | 2 +- fs/ncpfs/dir.c | 6 +++--- fs/nfs/dir.c | 10 ++++------ fs/ocfs2/dcache.c | 5 ++--- fs/proc/base.c | 22 +++++++++++----------- fs/proc/internal.h | 2 +- fs/proc/namespaces.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 4 ++-- include/linux/dcache.h | 2 +- 25 files changed, 74 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33e5243948f0..52a057367f6f 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -9,7 +9,7 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index ed9fbc23ece0..56750b714d1e 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -431,3 +431,8 @@ release it yourself. d_alloc_root() is gone, along with a lot of bugs caused by code misusing it. Replacement: d_make_root(inode). The difference is, d_make_root() drops the reference to inode if dentry allocation fails. + +-- +[mandatory] + The witch is dead! Well, 1/3 of it, anyway. ->d_revalidate() does *not* +take struct nameidata anymore; just the flags. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 279de2190365..b9a406b2ed0f 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -902,7 +902,7 @@ the VFS uses a default. As of kernel 2.6.22, the following members are defined: struct dentry_operations { - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -921,11 +921,11 @@ struct dentry_operations { dcache. Most filesystems leave this as NULL, because all their dentries in the dcache are valid - d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU). + d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU). If in rcu-walk mode, the filesystem must revalidate the dentry without blocking or storing to the dentry, d_parent and d_inode should not be - used without care (because they can go NULL), instead nd->inode should - be used. + used without care (because they can change and, in d_inode case, even + become NULL under us). If a situation is encountered that rcu-walk cannot handle, return -ECHILD and it will be called again in ref-walk mode. diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index d529437ff442..64600b5d0522 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -100,13 +100,13 @@ static void v9fs_dentry_release(struct dentry *dentry) } } -static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct p9_fid *fid; struct inode *inode; struct v9fs_inode *v9inode; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e22dc4b4a503..65c54ab04733 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); -static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, @@ -598,7 +598,7 @@ success: * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ -static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); @@ -607,7 +607,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) void *dir_version; int ret; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; vnode = AFS_FS_I(dentry->d_inode); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d42eee1c5de3..8898eef8bca9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1042,12 +1042,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) /* * Check if cached dentry can be trusted. */ -static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) { int valid = 0; struct inode *dir; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, @@ -1094,7 +1094,7 @@ static void ceph_d_release(struct dentry *dentry) } static int ceph_snapdir_d_revalidate(struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* * Eventually, we'll want to revalidate snapped metadata diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e8c53c80dbd5..b97ff48b7df6 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -700,9 +700,9 @@ lookup_out: } static int -cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) +cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { - if (nd && (nd->flags & LOOKUP_RCU)) + if (flags & LOOKUP_RCU) return -ECHILD; if (direntry->d_inode) { @@ -731,7 +731,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -739,7 +739,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 177515829062..7f8f1a7c6d87 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -46,7 +46,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, static int coda_readdir(struct file *file, void *buf, filldir_t filldir); /* dentry ops */ -static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); +static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); static int coda_dentry_delete(const struct dentry *); /* support routines */ @@ -536,12 +536,12 @@ out: } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) +static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) { struct inode *inode; struct coda_inode_info *cii; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = de->d_inode; diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 534c1d46e69e..1b5d9af937df 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -32,7 +32,7 @@ /** * ecryptfs_d_revalidate - revalidate an ecryptfs dentry * @dentry: The ecryptfs dentry - * @nd: The associated nameidata + * @flags: lookup flags * * Called when the VFS needs to revalidate a dentry. This * is called whenever a name lookup finds a dentry in the @@ -42,32 +42,20 @@ * Returns 1 if valid, 0 otherwise. * */ -static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry; struct vfsmount *lower_mnt; - struct dentry *dentry_save = NULL; - struct vfsmount *vfsmount_save = NULL; int rc = 1; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; - if (nd) { - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; - } - rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); - if (nd) { - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; - } + rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); if (dentry->d_inode) { struct inode *lower_inode = ecryptfs_inode_to_lower(dentry->d_inode); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 98ae804f5273..0bbdf3990060 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -41,9 +41,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry) return ret; } -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +static int vfat_revalidate(struct dentry *dentry, unsigned int flags) { - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; /* This is not negative dentry. Always valid. */ @@ -52,9 +52,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) return vfat_revalidate_shortname(dentry); } -static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) { - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; /* @@ -74,7 +74,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -82,7 +82,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; return vfat_revalidate_shortname(dentry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccdab3ac4223..eba30bd9ba2b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ -static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) +static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) { struct inode *inode; @@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!inode) return 0; - if (nd && (nd->flags & LOOKUP_RCU)) + if (flags & LOOKUP_RCU) return -ECHILD; fc = get_fuse_conn(inode); diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 0da8da2c991d..4fddb3c22d25 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -25,7 +25,7 @@ /** * gfs2_drevalidate - Check directory lookup consistency * @dentry: the mapping to check - * @nd: + * @flags: lookup flags * * Check to make sure the lookup necessary to arrive at this inode from its * parent is still good. @@ -33,7 +33,7 @@ * Returns: 1 if the dentry is ok, 0 if it isn't */ -static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) +static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) { struct dentry *parent; struct gfs2_sbd *sdp; @@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) int error; int had_lock = 0; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 19cf291eb91f..91b91fd3a901 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -13,12 +13,12 @@ /* dentry case-handling: just lowercase everything */ -static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) +static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) { struct inode *inode; int diff; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 07c91ca6017d..f37977fb0871 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1570,7 +1570,7 @@ out: return result; } -static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) +static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) { /* * This is not negative dentry. Always valid. @@ -1589,7 +1589,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -1597,7 +1597,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; return 1; } diff --git a/fs/namei.c b/fs/namei.c index 16256d915cb8..1a5707aaed36 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -465,7 +465,7 @@ err_root: static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { - return dentry->d_op->d_revalidate(dentry, nd); + return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); } /** diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index aeed93a6bde0..32607f749588 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -72,7 +72,7 @@ const struct inode_operations ncp_dir_inode_operations = /* * Dentry operations routines */ -static int ncp_lookup_validate(struct dentry *, struct nameidata *); +static int ncp_lookup_validate(struct dentry *, unsigned int); static int ncp_hash_dentry(const struct dentry *, const struct inode *, struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct inode *, @@ -290,7 +290,7 @@ leave_me:; static int -ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) +ncp_lookup_validate(struct dentry *dentry, unsigned int flags) { struct ncp_server *server; struct dentry *parent; @@ -302,7 +302,7 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) if (dentry == dentry->d_sb->s_root) return 1; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 71a199435ca9..656f52e9aa2e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1098,9 +1098,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1339,7 +1338,7 @@ out: } #ifdef CONFIG_NFS_V4 -static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); +static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -1491,9 +1490,8 @@ no_open: return finish_no_open(file, res); } -static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; @@ -1537,7 +1535,7 @@ out: no_open_dput: dput(parent); no_open: - return nfs_lookup_revalidate(dentry, nd); + return nfs_lookup_revalidate(dentry, flags); } #endif /* CONFIG_NFSV4 */ diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index af4488268e49..8db4b58b2e4b 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -49,14 +49,13 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) } -static int ocfs2_dentry_revalidate(struct dentry *dentry, - struct nameidata *nd) +static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ struct ocfs2_super *osb; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/proc/base.c b/fs/proc/base.c index 437195f204e1..bf749cca4cc6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1601,13 +1601,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * made this apply to all per process world readable and executable * directories. */ -int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; const struct cred *cred; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; @@ -1781,7 +1781,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) return proc_fd_info(dentry->d_inode, path, NULL); } -static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) +static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -1789,7 +1789,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct files_struct *files; const struct cred *cred; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; @@ -1868,7 +1868,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) + if (tid_fd_revalidate(dentry, 0)) error = NULL; out: @@ -2003,7 +2003,7 @@ static int dname_to_vma_addr(struct dentry *dentry, return 0; } -static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; @@ -2013,7 +2013,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; int status = 0; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; if (!capable(CAP_SYS_ADMIN)) { @@ -2371,7 +2371,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) + if (tid_fd_revalidate(dentry, 0)) error = NULL; out: @@ -2430,7 +2430,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; @@ -3237,7 +3237,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; @@ -3508,7 +3508,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index eca4aca5b6e2..e0c2a48dab73 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -142,7 +142,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr); -int pid_revalidate(struct dentry *dentry, struct nameidata *nd); +int pid_revalidate(struct dentry *dentry, unsigned int flags); struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); extern const struct dentry_operations pid_dentry_operations; int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 0d9e23a39e49..40ceb40f9853 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -56,7 +56,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3476bca8f7af..fda69fa39099 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -794,9 +794,9 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) +static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) { - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; return !PROC_I(dentry->d_inode)->sysctl->unregistering; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46fc1c20a6b1..e6ad8d7dea64 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -942,7 +942,7 @@ int reiserfs_permission(struct inode *inode, int mask) return generic_permission(inode, mask); } -static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) +static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags) { return -EPERM; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e6bb9b2a4cbe..038e74b3af87 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -303,12 +303,12 @@ static int sysfs_dentry_delete(const struct dentry *dentry) return !!(sd->s_flags & SYSFS_FLAG_REMOVED); } -static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) +static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct sysfs_dirent *sd; int is_dir; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; sd = dentry->d_fsdata; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8ca255518204..caa34e50537e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -144,7 +144,7 @@ enum dentry_d_lock_class }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, -- cgit v1.2.3 From 4ce16ef3fed92c627b4b0136c02c85c81ee105e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:10:59 -0400 Subject: fs/namei.c: don't pass nameidata to d_revalidate() since the method wrapped by it doesn't need that anymore... Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 1a5707aaed36..91c637b68984 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,9 +463,9 @@ err_root: return -ECHILD; } -static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) +static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); + return dentry->d_op->d_revalidate(dentry, flags); } /** @@ -511,7 +511,7 @@ static int complete_walk(struct nameidata *nd) return 0; /* Note: we do not d_invalidate() */ - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (status > 0) return 0; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd); + error = d_revalidate(dentry, nd ? nd->flags : 0); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1158,7 +1158,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, if (unlikely(d_need_lookup(dentry))) goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status != -ECHILD) need_reval = 0; @@ -1188,7 +1188,7 @@ unlazy: } if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status < 0) { dput(dentry); -- cgit v1.2.3 From 201f956e43d4542723514e024d948011dd766d43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:42:10 +0400 Subject: fs/namei.c: don't pass namedata to lookup_dcache() just the flags... Signed-off-by: Al Viro --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 91c637b68984..2e943ab04f32 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1039,7 +1039,7 @@ static void follow_dotdot(struct nameidata *nd) * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, - struct nameidata *nd, bool *need_lookup) + unsigned int flags, bool *need_lookup) { struct dentry *dentry; int error; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd ? nd->flags : 0); + error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1104,7 +1104,7 @@ static struct dentry *__lookup_hash(struct qstr *name, bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd, &need_lookup); + dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); if (!need_lookup) return dentry; @@ -2356,7 +2356,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, bool need_lookup; *opened &= ~FILE_CREATED; - dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); + dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); if (IS_ERR(dentry)) return PTR_ERR(dentry); -- cgit v1.2.3 From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 3 +-- Documentation/filesystems/porting | 4 ++-- Documentation/filesystems/vfs.txt | 2 +- fs/9p/v9fs.h | 2 +- fs/9p/vfs_inode.c | 8 ++++---- fs/9p/vfs_inode_dotl.c | 2 +- fs/adfs/dir.c | 2 +- fs/affs/affs.h | 2 +- fs/affs/namei.c | 2 +- fs/afs/dir.c | 4 ++-- fs/afs/mntpt.c | 4 ++-- fs/autofs4/root.c | 4 ++-- fs/bad_inode.c | 2 +- fs/befs/linuxvfs.c | 4 ++-- fs/bfs/dir.c | 2 +- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 4 ++-- fs/coda/dir.c | 4 ++-- fs/configfs/dir.c | 2 +- fs/cramfs/inode.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/efs/efs.h | 2 +- fs/efs/namei.c | 3 ++- fs/exofs/namei.c | 2 +- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 2 +- fs/ext4/namei.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 2 +- fs/freevxfs/vxfs_lookup.c | 4 ++-- fs/fuse/dir.c | 4 ++-- fs/gfs2/inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dir.c | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/hppfs/hppfs.c | 2 +- fs/isofs/isofs.h | 2 +- fs/isofs/namei.c | 2 +- fs/jffs2/dir.c | 4 ++-- fs/jfs/namei.c | 2 +- fs/libfs.c | 2 +- fs/logfs/dir.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 2 +- fs/ncpfs/dir.c | 4 ++-- fs/nfs/dir.c | 8 ++++---- fs/nilfs2/namei.c | 2 +- fs/ntfs/namei.c | 2 +- fs/ocfs2/namei.c | 2 +- fs/omfs/dir.c | 2 +- fs/openpromfs/inode.c | 4 ++-- fs/proc/base.c | 18 ++++++++++-------- fs/proc/generic.c | 2 +- fs/proc/internal.h | 4 ++-- fs/proc/namespaces.c | 2 +- fs/proc/proc_net.c | 2 +- fs/proc/proc_sysctl.c | 2 +- fs/proc/root.c | 7 +++---- fs/qnx4/namei.c | 2 +- fs/qnx4/qnx4.h | 2 +- fs/qnx6/namei.c | 2 +- fs/qnx6/qnx6.h | 2 +- fs/reiserfs/namei.c | 2 +- fs/romfs/super.c | 2 +- fs/squashfs/namei.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/ubifs/dir.c | 2 +- fs/udf/namei.c | 2 +- fs/ufs/namei.c | 2 +- fs/xfs/xfs_iops.c | 4 ++-- include/linux/fs.h | 4 ++-- kernel/cgroup.c | 4 ++-- 79 files changed, 115 insertions(+), 114 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 52a057367f6f..33f2c8f1db81 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -38,8 +38,7 @@ d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid -ata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 56750b714d1e..690f573928b9 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -434,5 +434,5 @@ d_make_root() drops the reference to inode if dentry allocation fails. -- [mandatory] - The witch is dead! Well, 1/3 of it, anyway. ->d_revalidate() does *not* -take struct nameidata anymore; just the flags. + The witch is dead! Well, 2/3 of it, anyway. ->d_revalidate() and +->lookup() do *not* take struct nameidata anymore; just the flags. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b9a406b2ed0f..ee786354946c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -342,7 +342,7 @@ filesystem. As of kernel 2.6.22, the following members are defined: struct inode_operations { int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index e78956cbd702..34c59f14a1c9 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -144,7 +144,7 @@ extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nameidata); + unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index eae476fb401c..bb0d7627f95b 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -785,7 +785,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode */ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nameidata) + unsigned int flags) { struct dentry *res; struct super_block *sb; @@ -795,8 +795,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, char *name; int result = 0; - p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", - dir, dentry->d_name.name, dentry, nameidata); + p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n", + dir, dentry->d_name.name, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -869,7 +869,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *res = NULL; if (d_unhashed(dentry)) { - res = v9fs_vfs_lookup(dir, dentry, NULL); + res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 1ee10c89df97..b97619fed196 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -259,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct dentry *res = NULL; if (d_unhashed(dentry)) { - res = v9fs_vfs_lookup(dir, dentry, NULL); + res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 3d83075aaa2e..b3be2e7c5643 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -266,7 +266,7 @@ const struct dentry_operations adfs_dentry_operations = { }; static struct dentry * -adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct object_info obj; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 3a130e27eb15..49e4e3457bfd 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -153,7 +153,7 @@ extern void affs_free_bitmap(struct super_block *sb); /* namei.c */ extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); -extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); +extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 47806940aac0..7f9721be709f 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -211,7 +211,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry) } struct dentry * -affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 65c54ab04733..ffb33e36ea72 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -20,7 +20,7 @@ #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); @@ -516,7 +516,7 @@ out: * look up an entry in a directory */ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct afs_vnode *vnode; struct afs_fid fid; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 298cf8919ec7..9682c33d5daf 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -22,7 +22,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); static int afs_mntpt_open(struct inode *inode, struct file *file); static void afs_mntpt_expiry_timed_out(struct work_struct *work); @@ -104,7 +104,7 @@ out: */ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { _enter("%p,%p{%p{%s},%s}", dir, diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 75e5f1c8e028..e7396cfdb109 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -32,7 +32,7 @@ static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); #endif static int autofs4_dir_open(struct inode *inode, struct file *file); -static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); +static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int); static struct vfsmount *autofs4_d_automount(struct path *); static int autofs4_d_manage(struct dentry *, bool); static void autofs4_dentry_release(struct dentry *); @@ -458,7 +458,7 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) } /* Lookups in the root directory */ -static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct autofs_sb_info *sbi; struct autofs_info *ino; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 1b35d6bd06b0..d27e73c69ba4 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -179,7 +179,7 @@ static int bad_inode_create (struct inode *dir, struct dentry *dentry, } static struct dentry *bad_inode_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return ERR_PTR(-EIO); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e18da23d42b5..cf7f3c67c8b7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -34,7 +34,7 @@ static int befs_readdir(struct file *, void *, filldir_t); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); -static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int); static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); @@ -159,7 +159,7 @@ befs_get_block(struct inode *inode, sector_t block, } static struct dentry * -befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct super_block *sb = dir->i_sb; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index d12c7966db27..3f1cd3b71681 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -133,7 +133,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct buffer_head *bh; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a101572f1cea..e5f1f81b2d65 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4247,7 +4247,7 @@ static void btrfs_dentry_release(struct dentry *dentry) } static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8898eef8bca9..74b2f3c54fe7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -576,7 +576,7 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) * the MDS so that it gets our 'caps wanted' value in a single op. */ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -653,7 +653,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } if (d_unhashed(dentry)) { - res = ceph_lookup(dir, dentry, NULL); + res = ceph_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -678,7 +678,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, */ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) { - struct dentry *result = ceph_lookup(dir, dentry, NULL); + struct dentry *result = ceph_lookup(dir, dentry, 0); if (result && !IS_ERR(result)) { /* diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 48bb474ce294..1abd31fd5bf0 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -50,7 +50,7 @@ extern int cifs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, - struct nameidata *); + unsigned int); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index b97ff48b7df6..2d732b9276ee 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -401,7 +401,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * in network traffic in the other paths. */ if (!(oflags & O_CREAT)) { - struct dentry *res = cifs_lookup(inode, direntry, NULL); + struct dentry *res = cifs_lookup(inode, direntry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -621,7 +621,7 @@ mknod_out: struct dentry * cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, - struct nameidata *nd) + unsigned int flags) { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 7f8f1a7c6d87..da35e965861d 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -31,7 +31,7 @@ /* dir inode-ops */ static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd); -static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); +static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); static int coda_unlink(struct inode *dir_inode, struct dentry *entry); @@ -94,7 +94,7 @@ const struct file_operations coda_dir_operations = { /* inode operations for directories */ /* access routines: lookup, readlink, permission */ -static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) +static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { struct super_block *sb = dir->i_sb; const char *name = entry->d_name.name; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7e6c52d8a207..7414ae24a79b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den static struct dentry * configfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index d013c46402ed..28cca01ca9c9 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -417,7 +417,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Lookup and fill in the inode data.. */ -static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned int offset = 0; struct inode *inode = NULL; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a07441a0a878..4ab50c3f5ab2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -374,7 +374,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, */ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct dentry *ecryptfs_dentry, - struct nameidata *ecryptfs_nd) + unsigned int flags) { char *encrypted_and_encoded_name = NULL; size_t encrypted_and_encoded_name_size; diff --git a/fs/efs/efs.h b/fs/efs/efs.h index d8305b582ab0..5528926ac7f6 100644 --- a/fs/efs/efs.h +++ b/fs/efs/efs.h @@ -129,7 +129,7 @@ extern struct inode *efs_iget(struct super_block *, unsigned long); extern efs_block_t efs_map_block(struct inode *, efs_block_t); extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *efs_lookup(struct inode *, struct dentry *, unsigned int); extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 832b10ded82f..96f66d213a19 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -58,7 +58,8 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) return(0); } -struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { +struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ efs_ino_t inodenum; struct inode *inode = NULL; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index fc7161d6bf6b..909ed6ea4cf6 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -46,7 +46,7 @@ static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode) } static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode; ino_t ino; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index f663a67d7bf0..b3e6778cd1e7 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -55,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) * Methods themselves. */ -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode; ino_t ino; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index eeb63dfc5d20..86d25f3f6043 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1011,7 +1011,7 @@ errout: return NULL; } -static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode; struct ext3_dir_entry_2 * de; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5845cd97bf8b..4fba3cd42e2b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1312,7 +1312,7 @@ errout: return NULL; } -static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct ext4_dir_entry_2 *de; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c5938c9084b9..47c608b05294 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -201,7 +201,7 @@ static const struct dentry_operations msdos_dentry_operations = { /***** Get inode using directory and name */ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 0bbdf3990060..44152571524e 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -714,7 +714,7 @@ static int vfat_d_anon_disconn(struct dentry *dentry) } static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 3360f1e678ad..bd447e88f208 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -48,7 +48,7 @@ #define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize)) -static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); static int vxfs_readdir(struct file *, void *, filldir_t); const struct inode_operations vxfs_dir_inode_ops = { @@ -203,7 +203,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) * in the return pointer. */ static struct dentry * -vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) +vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) { struct inode *ip = NULL; ino_t ino; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index eba30bd9ba2b..385235ac137d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -316,7 +316,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, } static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) + unsigned int flags) { int err; struct fuse_entry_out outarg; @@ -478,7 +478,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct dentry *res = NULL; if (d_unhashed(entry)) { - res = fuse_lookup(dir, entry, NULL); + res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba2444e077..19e443b73354 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -775,7 +775,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, */ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && !IS_ERR(inode)) { diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 62fc14ea4b73..617b1ed71f52 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -18,7 +18,7 @@ * hfs_lookup() */ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { hfs_cat_rec rec; struct hfs_find_data fd; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 761ec06354b4..451c97281b83 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -489,7 +489,7 @@ out: } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; hfs_cat_rec rec; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 26b53fb09f68..90c2f78b2c79 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -25,7 +25,7 @@ static inline void hfsplus_instantiate(struct dentry *dentry, /* Find the entry inside dir named dentry->d_name */ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct hfs_find_data fd; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 82b69ee4dacc..7009265b746f 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -168,7 +168,7 @@ const struct dentry_operations hfsplus_dentry_operations = { }; static struct dentry *hfsplus_file_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct hfs_find_data fd; struct super_block *sb = dir->i_sb; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2afa5bbccf9b..0ea005228e1b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -595,7 +595,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode; char *name; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index b8472f803f4e..78e12b2e0ea2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -189,7 +189,7 @@ out: * to tell read_inode to read fnode or not. */ -struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { const unsigned char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c07ef1f1ced6..ac1ead194db5 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -220,7 +220,7 @@ extern const struct dentry_operations hpfs_dentry_operations; /* dir.c */ -struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *); +struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int); extern const struct file_operations hpfs_dir_ops; /* dnode.c */ diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index d4f93b52cec5..e5c06531dcc4 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -138,7 +138,7 @@ static int file_removed(struct dentry *dentry, const char *file) } static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *proc_dentry, *parent; struct qstr *name = &dentry->d_name; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 0e73f63d9274..3620ad1ea9bc 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -114,7 +114,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *); int get_acorn_filename(struct iso_directory_record *, char *, struct inode *); -extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int flags); extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 1e2946f2a69e..c167028844ed 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -163,7 +163,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, return 0; } -struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int found; unsigned long uninitialized_var(block); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index b56018896d5e..6a601673f89f 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -27,7 +27,7 @@ static int jffs2_readdir (struct file *, void *, filldir_t); static int jffs2_create (struct inode *,struct dentry *,umode_t, struct nameidata *); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, - struct nameidata *); + unsigned int); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); static int jffs2_symlink (struct inode *,struct dentry *,const char *); @@ -74,7 +74,7 @@ const struct inode_operations jffs2_dir_inode_operations = nice and simple */ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, - struct nameidata *nd) + unsigned int flags) { struct jffs2_inode_info *dir_f; struct jffs2_full_dirent *fd = NULL, *fd_list; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index f37977fb0871..34fe85555caf 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1436,7 +1436,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, return rc; } -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags) { struct btstack btstack; ino_t inum; diff --git a/fs/libfs.c b/fs/libfs.c index f86ec27a4230..ebd03f6910d5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -53,7 +53,7 @@ static int simple_delete_dentry(const struct dentry *dentry) * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. Set d_op to delete negative dentries. */ -struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { static const struct dentry_operations simple_dentry_operations = { .d_delete = simple_delete_dentry, diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index bea5d1b9954b..8a3dcc615b39 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -349,7 +349,7 @@ static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) } static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct page *page; struct logfs_disk_dentry *dd; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 2d0ee1786305..1f245240ea08 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -18,7 +18,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/namei.c b/fs/namei.c index 2e943ab04f32..175e81b8f261 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd); + old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); if (unlikely(old)) { dput(dentry); dentry = old; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 32607f749588..a0cff22bfc9b 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -31,7 +31,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *); -static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, umode_t); static int ncp_rmdir(struct inode *, struct dentry *); @@ -836,7 +836,7 @@ out: return result; } -static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct ncp_server *server = NCP_SERVER(dir); struct inode *inode = NULL; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 656f52e9aa2e..8f21205c5896 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -46,7 +46,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); @@ -1270,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_release = nfs_d_release, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1291,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nd && nfs_is_exclusive_create(dir, nd->flags)) { + if (nfs_is_exclusive_create(dir, flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1482,7 +1482,7 @@ out: return err; no_open: - res = nfs_lookup(dir, dentry, NULL); + res = nfs_lookup(dir, dentry, 0); err = PTR_ERR(res); if (IS_ERR(res)) goto out; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index b72847988b78..5e5f779db76f 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) */ static struct dentry * -nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 358273e59ade..436f36037e09 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -101,7 +101,7 @@ * Locking: Caller must hold i_mutex on the directory. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, - struct nameidata *nd) + unsigned int flags) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 9f39c640cddf..fd71f6e5841f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -98,7 +98,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int status; u64 blkno; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index f00576ec320f..3d254872e641 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -291,7 +291,7 @@ static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct buffer_head *bh; struct inode *inode = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index bc49c975d501..4a3477949bca 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -170,13 +170,13 @@ static const struct file_operations openprom_operations = { .llseek = generic_file_llseek, }; -static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, unsigned int); static const struct inode_operations openprom_inode_operations = { .lookup = openpromfs_lookup, }; -static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct op_inode_info *ent_oi, *oi = OP_I(dir); struct device_node *dp, *child; diff --git a/fs/proc/base.c b/fs/proc/base.c index bf749cca4cc6..8eaa5ea1c613 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1956,7 +1956,7 @@ out_no_task: } static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); } @@ -2145,7 +2145,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, } static struct dentry *proc_map_files_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; @@ -2380,7 +2380,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, static struct dentry *proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); } @@ -2630,7 +2630,7 @@ static const struct file_operations proc_attr_dir_operations = { }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); @@ -3114,7 +3114,8 @@ static const struct file_operations proc_tgid_base_operations = { .llseek = default_llseek, }; -static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } @@ -3243,7 +3244,7 @@ out: return error; } -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *result; struct task_struct *task; @@ -3470,7 +3471,8 @@ static int proc_tid_base_readdir(struct file * filp, tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); } -static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } @@ -3514,7 +3516,7 @@ out: return error; } -static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2edf34f2eb61..b3647fe6a608 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -446,7 +446,7 @@ out_unlock: } struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookup_de(PDE(dir), dir, dentry); } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e0c2a48dab73..e1167a1c9126 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -106,7 +106,7 @@ void pde_users_dec(struct proc_dir_entry *pde); extern spinlock_t proc_subdir_lock; -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); unsigned long task_vsize(struct mm_struct *); unsigned long task_statm(struct mm_struct *, @@ -132,7 +132,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data); * of the /proc/ subdirectories. */ int proc_readdir(struct file *, void *, filldir_t); -struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); +struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 40ceb40f9853..b178ed733c36 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -140,7 +140,7 @@ const struct file_operations proc_ns_dir_operations = { }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *error; struct task_struct *task = get_proc_task(dir); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 06e1cc17caf6..fe72cd073dea 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir) } static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *de; struct net *net; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fda69fa39099..dfafeb2b05a0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -433,7 +433,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; diff --git a/fs/proc/root.c b/fs/proc/root.c index 7c30fce037c0..568b20290c75 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -200,13 +200,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct return 0; } -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { - if (!proc_lookup(dir, dentry, nd)) { + if (!proc_lookup(dir, dentry, flags)) return NULL; - } - return proc_pid_lookup(dir, dentry, nd); + return proc_pid_lookup(dir, dentry, flags); } static int proc_root_readdir(struct file * filp, diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index a512c0b30e8e..d024505ba007 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -95,7 +95,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, return NULL; } -struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int ino; struct qnx4_inode_entry *de; diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h index 244d4620189b..34e2d329c97e 100644 --- a/fs/qnx4/qnx4.h +++ b/fs/qnx4/qnx4.h @@ -23,7 +23,7 @@ struct qnx4_inode_info { }; extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c index 8a97289e04ad..0561326a94f5 100644 --- a/fs/qnx6/namei.c +++ b/fs/qnx6/namei.c @@ -13,7 +13,7 @@ #include "qnx6.h" struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { unsigned ino; struct page *page; diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h index 6c5e02a0b6a8..b00fcc960d37 100644 --- a/fs/qnx6/qnx6.h +++ b/fs/qnx6/qnx6.h @@ -45,7 +45,7 @@ struct qnx6_inode_info { extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); #ifdef CONFIG_QNX6FS_DEBUG extern void qnx6_superblock_debug(struct qnx6_super_block *, diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 84e8a69cee9d..1d9cf248c471 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -322,7 +322,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, } static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int retval; int lock_depth; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index e64f6b5f7ae5..77c5f2173983 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -210,7 +210,7 @@ out: * look up an entry in a directory */ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { unsigned long offset, maxoff; struct inode *inode; diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index abcc58f3c152..7834a517f7f4 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -134,7 +134,7 @@ out: static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { const unsigned char *name = dentry->d_name.name; int len = dentry->d_name.len; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 038e74b3af87..efd373e3e0aa 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -764,7 +764,7 @@ int sysfs_create_dir(struct kobject * kobj) } static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret = NULL; struct dentry *parent = dentry->d_parent; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index d7466e293614..a8c4359cd0e1 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -43,7 +43,7 @@ const struct dentry_operations sysv_dentry_operations = { .d_hash = sysv_hash, }; -static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index a6d42efc76d2..845b2df08317 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -184,7 +184,7 @@ static int dbg_check_name(const struct ubifs_info *c, } static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int err; union ubifs_key key; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 18024178ac4c..929cc205985a 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -251,7 +251,7 @@ out_ok: } static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct fileIdentDesc cfi; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index a2281cadefa1..bc77fa170b9d 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -46,7 +46,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1a25fd802798..b41cfba14faf 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -197,7 +197,7 @@ STATIC struct dentry * xfs_vn_lookup( struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct xfs_inode *cip; struct xfs_name name; @@ -222,7 +222,7 @@ STATIC struct dentry * xfs_vn_ci_lookup( struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct xfs_inode *ip; struct xfs_name xname; diff --git a/include/linux/fs.h b/include/linux/fs.h index 17ee20dba86c..7a71709b7fa7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1666,7 +1666,7 @@ struct file_operations { }; struct inode_operations { - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); void * (*follow_link) (struct dentry *, struct nameidata *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); @@ -2571,7 +2571,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); -extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b303dfc7dce0..0cd1314acdaf 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = { .rename = cgroup_rename, }; -static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); -- cgit v1.2.3 From 72bd866a01fc62ccbc466f3eb7599b14c937e96b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:17:17 -0400 Subject: fs/namei.c: don't pass nameidata to __lookup_hash() and lookup_real() Signed-off-by: Al Viro --- fs/namei.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 175e81b8f261..fc01090a96c1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1080,7 +1080,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *old; @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); + old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { dput(dentry); dentry = old; @@ -1099,16 +1099,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, } static struct dentry *__lookup_hash(struct qstr *name, - struct dentry *base, struct nameidata *nd) + struct dentry *base, unsigned int flags) { bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); + dentry = lookup_dcache(name, base, flags, &need_lookup); if (!need_lookup) return dentry; - return lookup_real(base->d_inode, dentry, nd); + return lookup_real(base->d_inode, dentry, flags); } /* @@ -1227,7 +1227,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name, BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); - dentry = __lookup_hash(name, parent, nd); + dentry = __lookup_hash(name, parent, nd->flags); mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1859,7 +1859,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, */ static struct dentry *lookup_hash(struct nameidata *nd) { - return __lookup_hash(&nd->last, nd->path.dentry, nd); + return __lookup_hash(&nd->last, nd->path.dentry, nd->flags); } /** @@ -1906,7 +1906,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, NULL); + return __lookup_hash(&this, base, 0); } int user_path_at_empty(int dfd, const char __user *name, unsigned flags, @@ -2310,7 +2310,7 @@ out: no_open: if (need_lookup) { - dentry = lookup_real(dir, dentry, nd); + dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -2372,7 +2372,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (need_lookup) { BUG_ON(dentry->d_inode); - dentry = lookup_real(dir_inode, dentry, nd); + dentry = lookup_real(dir_inode, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); } -- cgit v1.2.3 From ebfc3b49a7ac25920cb5be5445f602e51d2ea559 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:05:36 -0400 Subject: don't pass nameidata to ->create() boolean "does it have to be exclusive?" flag is passed instead; Local filesystem should just ignore it - the object is guaranteed not to be there yet. Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/porting | 6 ++++++ Documentation/filesystems/vfs.txt | 2 +- fs/9p/vfs_inode.c | 2 +- fs/9p/vfs_inode_dotl.c | 2 +- fs/affs/affs.h | 2 +- fs/affs/namei.c | 2 +- fs/afs/dir.c | 4 ++-- fs/bad_inode.c | 2 +- fs/bfs/dir.c | 2 +- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 2 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 2 +- fs/coda/dir.c | 4 ++-- fs/ecryptfs/inode.c | 3 +-- fs/exofs/namei.c | 2 +- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 2 +- fs/ext4/namei.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 2 +- fs/fuse/dir.c | 2 +- fs/gfs2/inode.c | 5 +---- fs/hfs/dir.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/namei.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/jffs2/dir.c | 4 ++-- fs/jfs/namei.c | 2 +- fs/logfs/dir.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 3 +-- fs/ncpfs/dir.c | 4 ++-- fs/nfs/dir.c | 9 +++------ fs/nilfs2/namei.c | 2 +- fs/ocfs2/dlmfs/dlmfs.c | 2 +- fs/ocfs2/namei.c | 2 +- fs/omfs/dir.c | 2 +- fs/ramfs/inode.c | 2 +- fs/reiserfs/namei.c | 2 +- fs/reiserfs/xattr.c | 2 +- fs/sysv/namei.c | 2 +- fs/ubifs/dir.c | 2 +- fs/udf/namei.c | 2 +- fs/ufs/namei.c | 2 +- fs/xfs/xfs_iops.c | 2 +- include/linux/fs.h | 2 +- ipc/mqueue.c | 2 +- mm/shmem.c | 2 +- 51 files changed, 62 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33f2c8f1db81..e0cce2a5f820 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -37,7 +37,7 @@ d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: - int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *); + int (*create) (struct inode *,struct dentry *,umode_t, bool); struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 690f573928b9..2bef2b3843d1 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -436,3 +436,9 @@ d_make_root() drops the reference to inode if dentry allocation fails. [mandatory] The witch is dead! Well, 2/3 of it, anyway. ->d_revalidate() and ->lookup() do *not* take struct nameidata anymore; just the flags. +-- +[mandatory] + ->create() doesn't take struct nameidata *; unlike the previous +two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that +local filesystems can ignore tha argument - they are guaranteed that the +object doesn't exist. It's remote/distributed ones that might care... diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ee786354946c..aa754e01464e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -341,7 +341,7 @@ This describes how the VFS can manipulate an inode in your filesystem. As of kernel 2.6.22, the following members are defined: struct inode_operations { - int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *); + int (*create) (struct inode *,struct dentry *, umode_t, bool); struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bb0d7627f95b..cbf9dbb1b2a2 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -725,7 +725,7 @@ error: static int v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); u32 perm = unixmode2p9mode(v9ses, mode); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index b97619fed196..40895546e103 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -235,7 +235,7 @@ int v9fs_open_to_dotl_flags(int flags) static int v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, - struct nameidata *nd) + bool excl) { return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 49e4e3457bfd..6e216419f340 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -155,7 +155,7 @@ extern void affs_free_bitmap(struct super_block *sb); extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); -extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *); +extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); extern int affs_rmdir(struct inode *dir, struct dentry *dentry); extern int affs_link(struct dentry *olddentry, struct inode *dir, diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 7f9721be709f..ff65884a7839 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -255,7 +255,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry) } int -affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ffb33e36ea72..db477906ba4f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -29,7 +29,7 @@ static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd); + bool excl); static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static int afs_rmdir(struct inode *dir, struct dentry *dentry); static int afs_unlink(struct inode *dir, struct dentry *dentry); @@ -949,7 +949,7 @@ error: * create a regular file on an AFS filesystem */ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct afs_file_status status; struct afs_callback cb; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index d27e73c69ba4..b1342ffb3cf6 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops = }; static int bad_inode_create (struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { return -EIO; } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 3f1cd3b71681..2785ef91191a 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -85,7 +85,7 @@ const struct file_operations bfs_dir_operations = { extern void dump_imap(const char *, struct super_block *); static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int err; struct inode *inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5f1f81b2d65..fb8d671d00e6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4893,7 +4893,7 @@ out_unlock: } static int btrfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 74b2f3c54fe7..00894ff9246c 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -730,7 +730,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, } static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return ceph_mknod(dir, dentry, mode, 0); } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1abd31fd5bf0..1c49c5a9b27a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -45,7 +45,7 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf; extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, - struct nameidata *); + bool excl); extern int cifs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2d732b9276ee..a180265a10b5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -451,7 +451,7 @@ free_xid: } int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, - struct nameidata *nd) + bool excl) { int rc; int xid = GetXid(); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index da35e965861d..49fe52d25600 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -30,7 +30,7 @@ #include "coda_int.h" /* dir inode-ops */ -static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd); +static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl); static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); @@ -188,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir) } /* creation routines: create, mknod, mkdir, link, symlink */ -static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd) +static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl) { int error; const char *name=de->d_name.name; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4ab50c3f5ab2..f079dafea75a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -240,7 +240,6 @@ out: * @dir: The inode of the directory in which to create the file. * @dentry: The eCryptfs dentry * @mode: The mode of the new file. - * @nd: nameidata * * Creates a new file. * @@ -248,7 +247,7 @@ out: */ static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct inode *ecryptfs_inode; int rc; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 909ed6ea4cf6..4731fd991efe 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -60,7 +60,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, } static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode = exofs_new_inode(dir, mode); int err = PTR_ERR(inode); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index b3e6778cd1e7..9ba7de0e5903 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) +static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { struct inode *inode; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 86d25f3f6043..85286dbe2753 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1690,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle, * with d_instantiate(). */ static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, - struct nameidata *nd) + bool excl) { handle_t *handle; struct inode * inode; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4fba3cd42e2b..eca3e48a62f8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle, * with d_instantiate(). */ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { handle_t *handle; struct inode *inode; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 47c608b05294..70d993a93805 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -265,7 +265,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, /***** Create a file */ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 44152571524e..6cc480652433 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -772,7 +772,7 @@ error: } static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 385235ac137d..8964cf3999b2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -611,7 +611,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, } static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, - struct nameidata *nd) + bool excl) { return fuse_mknod(dir, entry, mode, 0); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 19e443b73354..867674785fcf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -755,11 +755,8 @@ fail: */ static int gfs2_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { - int excl = 0; - if (nd && (nd->flags & LOOKUP_EXCL)) - excl = 1; return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); } diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 617b1ed71f52..422dde2ec0a1 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -187,7 +187,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file) * the directory and the name (and its length) of the new file. */ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; int res; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 90c2f78b2c79..378ea0c43f19 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -465,7 +465,7 @@ out: } static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return hfsplus_mknod(dir, dentry, mode, 0); } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 0ea005228e1b..124146543aa7 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -553,7 +553,7 @@ static int read_name(struct inode *ino, char *name) } int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; char *name; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9083ef8af58c..bc9082482f68 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -115,7 +115,7 @@ bail: return err; } -static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { const unsigned char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cc9281b6c628..e13e9bdb0bf5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -565,7 +565,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod return retval; } -static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 6a601673f89f..23245191c5b5 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -25,7 +25,7 @@ static int jffs2_readdir (struct file *, void *, filldir_t); static int jffs2_create (struct inode *,struct dentry *,umode_t, - struct nameidata *); + bool); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, unsigned int); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); @@ -175,7 +175,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) static int jffs2_create(struct inode *dir_i, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct jffs2_raw_inode *ri; struct jffs2_inode_info *f, *dir_f; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 34fe85555caf..c426293e16c1 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -73,7 +73,7 @@ static inline void free_ea_wmap(struct inode *inode) * */ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int rc = 0; tid_t tid; /* transaction id */ diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 8a3dcc615b39..26e4a941532f 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -502,7 +502,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 1f245240ea08..0db73d9dd668 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -55,7 +55,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, } static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return minix_mknod(dir, dentry, mode, 0); } diff --git a/fs/namei.c b/fs/namei.c index fc01090a96c1..fd71156bfd74 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2082,7 +2082,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { int error = may_create(dir, dentry); - if (error) return error; @@ -2093,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, nd); + error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); if (!error) fsnotify_create(dir, dentry); return error; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index a0cff22bfc9b..4117e7b377bb 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -30,7 +30,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); -static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +static int ncp_create(struct inode *, struct dentry *, umode_t, bool); static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, umode_t); @@ -980,7 +980,7 @@ out: } static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return ncp_create_new(dir, dentry, mode, 0, 0); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f21205c5896..a6b1c7fb8232 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -47,7 +47,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +static int nfs_create(struct inode *, struct dentry *, umode_t, bool); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_unlink(struct inode *, struct dentry *); @@ -1589,11 +1589,11 @@ out_error: * reply path made it appear to have failed. */ static int nfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct iattr attr; + int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1601,9 +1601,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && !(nd->flags & LOOKUP_EXCL)) - open_flags = O_CREAT; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 5e5f779db76f..1d0c0b84c5a3 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -85,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) * with d_instantiate(). */ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; struct nilfs_transaction_info ti; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index e31d6ae013ab..83b6f98e0665 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -526,7 +526,7 @@ bail: static int dlmfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int status = 0; struct inode *inode; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index fd71f6e5841f..f1fd0741162b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -618,7 +618,7 @@ static int ocfs2_mkdir(struct inode *dir, static int ocfs2_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int ret; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 3d254872e641..fb5b3ff79dc6 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -285,7 +285,7 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return omfs_add_node(dir, dentry, mode | S_IFREG); } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a1fdabe21dec..eab8c09d3801 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) return retval; } -static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 1d9cf248c471..3916be1a330b 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) } static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int retval; struct inode *inode; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e6ad8d7dea64..d319963aeb11 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -62,7 +62,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) { BUG_ON(!mutex_is_locked(&dir->i_mutex)); - return dir->i_op->create(dir, dentry, mode, NULL); + return dir->i_op->create(dir, dentry, mode, true); } #endif diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index a8c4359cd0e1..1c0d5f264767 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, return err; } -static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) +static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { return sysv_mknod(dir, dentry, mode, 0); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 845b2df08317..b1cca89aeb68 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -246,7 +246,7 @@ out: } static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 929cc205985a..544b2799a911 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, } static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct udf_fileident_bh fibh; struct inode *inode; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index bc77fa170b9d..90d74b8f8eba 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -71,7 +71,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi * with d_instantiate(). */ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; int err; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index b41cfba14faf..9c4340f5c3e0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -179,7 +179,7 @@ xfs_vn_create( struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool flags) { return xfs_vn_mknod(dir, dentry, mode, 0); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a71709b7fa7..df869d248e7c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1674,7 +1674,7 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); void (*put_link) (struct dentry *, struct nameidata *, void *); - int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *); + int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8ce57691e7b6..da2c188688b1 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -413,7 +413,7 @@ static void mqueue_evict_inode(struct inode *inode) } static int mqueue_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct inode *inode; struct mq_attr *attr = dentry->d_fsdata; diff --git a/mm/shmem.c b/mm/shmem.c index bd106361be4b..c15b998e5a86 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1877,7 +1877,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return shmem_mknod(dir, dentry, mode | S_IFREG, 0); } -- cgit v1.2.3 From 312b63fba9e88a0dcf800834b8ede8716bcc1e17 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:09:36 -0400 Subject: don't pass nameidata * to vfs_create() all we want is a boolean flag, same as the method gets now Signed-off-by: Al Viro --- fs/cachefiles/namei.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/namei.c | 9 +++++---- fs/nfsd/vfs.c | 4 ++-- include/linux/fs.h | 2 +- ipc/mqueue.c | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7f0771d3894e..b0b5f7cdfffa 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -567,7 +567,7 @@ lookup_again: if (ret < 0) goto create_error; start = jiffies; - ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); + ret = vfs_create(dir->d_inode, next, S_IFREG, true); cachefiles_hist(cachefiles_create_histogram, start); if (ret < 0) goto create_error; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index f079dafea75a..da52cdbe8388 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -173,7 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = ERR_CAST(lower_dir_dentry); goto out; } - rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL); + rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); diff --git a/fs/namei.c b/fs/namei.c index fd71156bfd74..ffcd4e114b6e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2079,7 +2079,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool want_excl) { int error = may_create(dir, dentry); if (error) @@ -2092,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); + error = dir->i_op->create(dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; @@ -2396,7 +2396,8 @@ static int lookup_open(struct nameidata *nd, struct path *path, error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; - error = vfs_create(dir->d_inode, dentry, mode, nd); + error = vfs_create(dir->d_inode, dentry, mode, + nd->flags & LOOKUP_EXCL); if (error) goto out_dput; } @@ -2883,7 +2884,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,NULL); + error = vfs_create(path.dentry->d_inode,dentry,mode,true); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(path.dentry->d_inode,dentry,mode, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c8bd9c3be7f7..05d9eee6be3a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1329,7 +1329,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; @@ -1492,7 +1492,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (host_err < 0) { fh_drop_write(fhp); goto out_nfserr; diff --git a/include/linux/fs.h b/include/linux/fs.h index df869d248e7c..2f857e9eeb3a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1571,7 +1571,7 @@ extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index da2c188688b1..2dee38d53c73 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -751,7 +751,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, ret = mnt_want_write(ipc_ns->mq_mnt); if (ret) goto out; - ret = vfs_create(dir->d_inode, dentry, mode, NULL); + ret = vfs_create(dir->d_inode, dentry, mode, true); dentry->d_fsdata = NULL; if (ret) goto out_drop_write; -- cgit v1.2.3 From 1acf0af9b981027f3e73e93f0d3f85abdc794f71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Jun 2012 16:13:46 +0100 Subject: VFS: Fix the banner comment on lookup_open() Since commit 197e37d9, the banner comment on lookup_open() no longer matches what the function returns. It used to return a struct file pointer or NULL and now it returns an integer and is passed the struct file pointer it is to use amongst its arguments. Update the comment to reflect this. Also add a banner comment to atomic_open(). Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ffcd4e114b6e..5abab9176903 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2192,6 +2192,19 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } +/* + * Attempt to atomically look up, create and open a file from a negative + * dentry. + * + * Returns 0 if successful. The file will have been created and attached to + * @file by the filesystem calling finish_open(). + * + * Returns 1 if the file was looked up only or didn't need creating. The + * caller will need to perform the open themselves. @path will have been + * updated to point to the new dentry. This may be negative. + * + * Returns an error code otherwise. + */ static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, @@ -2336,12 +2349,22 @@ looked_up: } /* - * Lookup, maybe create and open the last component + * Look up and maybe create and open the last component. * * Must be called with i_mutex held on parent. * - * Returns open file or NULL on success, error otherwise. NULL means no open - * was performed, only lookup. + * Returns 0 if the file was successfully atomically created (if necessary) and + * opened. In this case the file will be returned attached to @file. + * + * Returns 1 if the file was not completely opened at this time, though lookups + * and creations will have been performed and the dentry returned in @path will + * be positive upon return if O_CREAT was specified. If O_CREAT wasn't + * specified then a negative dentry may be returned. + * + * An error code is returned otherwise. + * + * FILE_CREATE will be set in @*opened if the dentry was created and will be + * cleared otherwise prior to returning. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, -- cgit v1.2.3 From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Jun 2012 03:01:42 +0400 Subject: get rid of kern_path_parent() all callers want the same thing, actually - a kinda-sorta analog of kern_path_create(). I.e. they want parent vfsmount/dentry (with ->i_mutex held, to make sure the child dentry is still their child) + the child dentry. Signed-off-by Al Viro --- drivers/base/devtmpfs.c | 98 +++++++++++++++++++++---------------------------- fs/namei.c | 22 ++++++++++- include/linux/namei.h | 2 +- kernel/audit_watch.c | 25 ++----------- 4 files changed, 65 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 765c3a28077a..d91a3a0b2325 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -227,33 +227,24 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev) static int dev_rmdir(const char *name) { - struct nameidata nd; + struct path parent; struct dentry *dentry; int err; - err = kern_path_parent(name, &nd); - if (err) - return err; - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - if (dentry->d_inode->i_private == &thread) - err = vfs_rmdir(nd.path.dentry->d_inode, - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } - dput(dentry); + dentry = kern_path_locked(name, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (dentry->d_inode) { + if (dentry->d_inode->i_private == &thread) + err = vfs_rmdir(parent.dentry->d_inode, dentry); + else + err = -EPERM; } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); + path_put(&parent); return err; } @@ -305,50 +296,43 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta static int handle_remove(const char *nodename, struct device *dev) { - struct nameidata nd; + struct path parent; struct dentry *dentry; - struct kstat stat; int deleted = 1; int err; - err = kern_path_parent(nodename, &nd); - if (err) - return err; + dentry = kern_path_locked(nodename, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - err = vfs_getattr(nd.path.mnt, dentry, &stat); - if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = 0; - newattrs.ia_gid = 0; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &newattrs); - mutex_unlock(&dentry->d_inode->i_mutex); - err = vfs_unlink(nd.path.dentry->d_inode, - dentry); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + if (dentry->d_inode) { + struct kstat stat; + err = vfs_getattr(parent.mnt, dentry, &stat); + if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = 0; + newattrs.ia_gid = 0; + newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + mutex_lock(&dentry->d_inode->i_mutex); + notify_change(dentry, &newattrs); + mutex_unlock(&dentry->d_inode->i_mutex); + err = vfs_unlink(parent.dentry->d_inode, dentry); + if (!err || err == -ENOENT) + deleted = 1; } - dput(dentry); } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); - path_put(&nd.path); + path_put(&parent); if (deleted && strchr(nodename, '/')) delete_path(nodename); return err; diff --git a/fs/namei.c b/fs/namei.c index 5abab9176903..6b29a51bef5d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1814,9 +1814,27 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int kern_path_parent(const char *name, struct nameidata *nd) +/* does lookup, returns the object with parent locked */ +struct dentry *kern_path_locked(const char *name, struct path *path) { - return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); + struct nameidata nd; + struct dentry *d; + int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); + if (err) + return ERR_PTR(err); + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return ERR_PTR(-EINVAL); + } + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return d; + } + *path = nd.path; + return d; } int kern_path(const char *name, unsigned int flags, struct path *path) diff --git a/include/linux/namei.h b/include/linux/namei.h index 23d859879210..f5931489e150 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -67,7 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, int); extern struct dentry *user_path_create(int, const char __user *, struct path *, int); -extern int kern_path_parent(const char *, struct nameidata *); +extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e683869365d9..3823281401b5 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata nd; - struct dentry *d; - int err; - - err = kern_path_parent(watch->path, &nd); - if (err) - return err; - - if (nd.last_type != LAST_NORM) { - path_put(&nd.path); - return -EINVAL; - } - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (IS_ERR(d)) { - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + struct dentry *d = kern_path_locked(watch->path, parent); + if (IS_ERR(d)) return PTR_ERR(d); - } + mutex_unlock(&parent->dentry->d_inode->i_mutex); if (d->d_inode) { /* update watch filter fields */ watch->dev = d->d_inode->i_sb->s_dev; watch->ino = d->d_inode->i_ino; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - - *parent = nd.path; dput(d); return 0; } -- cgit v1.2.3 From 469796d10590341c53cff0a2959254eaf5d465de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jun 2012 20:51:39 -0400 Subject: sysfs: switch to ->s_d_op and ->d_release() a) ->d_iput() is wrong here - what we do to inode is completely usual, it's dentry->d_fsdata that we want to drop. Just use ->d_release(). b) switch to ->s_d_op - no need to play with d_set_d_op() Signed-off-by: Al Viro --- fs/sysfs/dir.c | 16 ++++++---------- fs/sysfs/mount.c | 1 + fs/sysfs/sysfs.h | 1 + 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index efd373e3e0aa..77c44ce493f8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -300,7 +300,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd) static int sysfs_dentry_delete(const struct dentry *dentry) { struct sysfs_dirent *sd = dentry->d_fsdata; - return !!(sd->s_flags & SYSFS_FLAG_REMOVED); + return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); } static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) @@ -355,18 +355,15 @@ out_bad: return 0; } -static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode) +static void sysfs_dentry_release(struct dentry *dentry) { - struct sysfs_dirent * sd = dentry->d_fsdata; - - sysfs_put(sd); - iput(inode); + sysfs_put(dentry->d_fsdata); } -static const struct dentry_operations sysfs_dentry_ops = { +const struct dentry_operations sysfs_dentry_ops = { .d_revalidate = sysfs_dentry_revalidate, .d_delete = sysfs_dentry_delete, - .d_iput = sysfs_dentry_iput, + .d_release = sysfs_dentry_release, }; struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) @@ -786,6 +783,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, ret = ERR_PTR(-ENOENT); goto out_unlock; } + dentry->d_fsdata = sysfs_get(sd); /* attach dentry and inode */ inode = sysfs_get_inode(dir->i_sb, sd); @@ -797,8 +795,6 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - d_set_d_op(dentry, &sysfs_dentry_ops); - dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { d_move(ret, dentry); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 52c3bdb66a84..c15a7a3572e9 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -68,6 +68,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) } root->d_fsdata = &sysfs_root; sb->s_root = root; + sb->s_d_op = &sysfs_dentry_ops; return 0; } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 661a9639570b..d73c0932bbd6 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -157,6 +157,7 @@ extern struct kmem_cache *sysfs_dir_cachep; */ extern struct mutex sysfs_mutex; extern spinlock_t sysfs_assoc_lock; +extern const struct dentry_operations sysfs_dentry_ops; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; -- cgit v1.2.3 From e77fb7cef87856d9d35f2f4d617d0b97148ee7c2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jun 2012 20:56:54 -0400 Subject: sysfs: just use d_materialise_unique() same as for nfs et.al. Signed-off-by: Al Viro --- fs/sysfs/dir.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 77c44ce493f8..a5cf784f9cc2 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -793,14 +793,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, } /* instantiate and hash dentry */ - ret = d_find_alias(inode); - if (!ret) { - d_add(dentry, inode); - } else { - d_move(ret, dentry); - iput(inode); - } - + ret = d_materialise_unique(dentry, inode); out_unlock: mutex_unlock(&sysfs_mutex); return ret; -- cgit v1.2.3 From ee3efa91e240f513898050ef305a49a653c8ed90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2012 15:59:33 -0400 Subject: __d_unalias() should refuse to move mountpoints Signed-off-by: Al Viro --- fs/dcache.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 015586f1ffc6..8086636bf796 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2387,14 +2387,13 @@ static struct dentry *__d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - struct dentry *ret; + struct dentry *ret = ERR_PTR(-EBUSY); /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) goto out_unalias; /* See lock_rename() */ - ret = ERR_PTR(-EBUSY); if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; @@ -2402,8 +2401,10 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - __d_move(alias, dentry); - ret = alias; + if (likely(!d_mountpoint(alias))) { + __d_move(alias, dentry); + ret = alias; + } out_err: spin_unlock(&inode->i_lock); if (m2) -- cgit v1.2.3 From c3b1a350846a11dd1054cb7832e098aa37025deb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:28:22 -0400 Subject: debugfs: make sure that debugfs_create_file() gets used only for regulars It, debugfs_create_dir() and debugfs_create_link() use the common helper now. Signed-off-by: Al Viro --- fs/debugfs/inode.c | 56 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b80bc846a15a..d423b966bc79 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -335,6 +335,30 @@ static int debugfs_create_by_name(const char *name, umode_t mode, return error; } +struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + struct dentry *dentry = NULL; + int error; + + pr_debug("debugfs: creating file '%s'\n",name); + + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, + &debugfs_mount_count); + if (error) + goto exit; + + error = debugfs_create_by_name(name, mode, parent, &dentry, + data, fops); + if (error) { + dentry = NULL; + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } +exit: + return dentry; +} + /** * debugfs_create_file - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. @@ -365,25 +389,15 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct dentry *dentry = NULL; - int error; - - pr_debug("debugfs: creating file '%s'\n",name); - - error = simple_pin_fs(&debug_fs_type, &debugfs_mount, - &debugfs_mount_count); - if (error) - goto exit; - - error = debugfs_create_by_name(name, mode, parent, &dentry, - data, fops); - if (error) { - dentry = NULL; - simple_release_fs(&debugfs_mount, &debugfs_mount_count); - goto exit; + switch (mode & S_IFMT) { + case S_IFREG: + case 0: + break; + default: + BUG(); } -exit: - return dentry; + + return __create_file(name, mode, parent, data, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file); @@ -407,8 +421,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); */ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { - return debugfs_create_file(name, - S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, parent, NULL, NULL); } EXPORT_SYMBOL_GPL(debugfs_create_dir); @@ -446,8 +459,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, if (!link) return NULL; - result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link, - NULL); + result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL); if (!result) kfree(link); return result; -- cgit v1.2.3 From cfa57c11b0d5a80f7bffa1ab35bc46892127817f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:33:28 -0400 Subject: debugfs: fold debugfs_create_by_name() into the only caller Signed-off-by: Al Viro --- fs/debugfs/inode.c | 53 ++++++++++++++++++++--------------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d423b966bc79..79f53f3ce7c6 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -293,13 +293,19 @@ static struct file_system_type debug_fs_type = { .kill_sb = kill_litter_super, }; -static int debugfs_create_by_name(const char *name, umode_t mode, - struct dentry *parent, - struct dentry **dentry, - void *data, - const struct file_operations *fops) +struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) { - int error = 0; + struct dentry *dentry = NULL; + int error; + + pr_debug("debugfs: creating file '%s'\n",name); + + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, + &debugfs_mount_count); + if (error) + goto exit; /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super @@ -309,48 +315,29 @@ static int debugfs_create_by_name(const char *name, umode_t mode, if (!parent) parent = debugfs_mount->mnt_root; - *dentry = NULL; + dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); - *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(*dentry)) { + dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case S_IFDIR: - error = debugfs_mkdir(parent->d_inode, *dentry, mode, + error = debugfs_mkdir(parent->d_inode, dentry, mode, data, fops); break; case S_IFLNK: - error = debugfs_link(parent->d_inode, *dentry, mode, + error = debugfs_link(parent->d_inode, dentry, mode, data, fops); break; default: - error = debugfs_create(parent->d_inode, *dentry, mode, + error = debugfs_create(parent->d_inode, dentry, mode, data, fops); break; } - dput(*dentry); + dput(dentry); } else - error = PTR_ERR(*dentry); + error = PTR_ERR(dentry); mutex_unlock(&parent->d_inode->i_mutex); - return error; -} - -struct dentry *__create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops) -{ - struct dentry *dentry = NULL; - int error; - - pr_debug("debugfs: creating file '%s'\n",name); - - error = simple_pin_fs(&debug_fs_type, &debugfs_mount, - &debugfs_mount_count); - if (error) - goto exit; - - error = debugfs_create_by_name(name, mode, parent, &dentry, - data, fops); if (error) { dentry = NULL; simple_release_fs(&debugfs_mount, &debugfs_mount_count); -- cgit v1.2.3 From ac481d6ca4081bdd348cbd84963d1ece843a3407 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:40:20 -0400 Subject: debugfs: get rid of useless arguments to debugfs_{mkdir,symlink} Signed-off-by: Al Viro --- fs/debugfs/inode.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 79f53f3ce7c6..d17c20fd74e6 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -54,13 +54,12 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev break; case S_IFLNK: inode->i_op = &debugfs_link_operations; - inode->i_fop = fops; inode->i_private = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; - inode->i_fop = fops ? fops : &simple_dir_operations; - inode->i_private = data; + inode->i_fop = &simple_dir_operations; + inode->i_private = NULL; /* directory inodes start off with i_nlink == 2 * (for "." entry) */ @@ -91,13 +90,12 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry, return error; } -static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, - void *data, const struct file_operations *fops) +static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int res; mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; - res = debugfs_mknod(dir, dentry, mode, 0, data, fops); + res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL); if (!res) { inc_nlink(dir); fsnotify_mkdir(dir, dentry); @@ -106,10 +104,10 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, } static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode, - void *data, const struct file_operations *fops) + void *data) { mode = (mode & S_IALLUGO) | S_IFLNK; - return debugfs_mknod(dir, dentry, mode, 0, data, fops); + return debugfs_mknod(dir, dentry, mode, 0, data, NULL); } static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -321,12 +319,12 @@ struct dentry *__create_file(const char *name, umode_t mode, if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case S_IFDIR: - error = debugfs_mkdir(parent->d_inode, dentry, mode, - data, fops); + error = debugfs_mkdir(parent->d_inode, dentry, mode); + break; case S_IFLNK: error = debugfs_link(parent->d_inode, dentry, mode, - data, fops); + data); break; default: error = debugfs_create(parent->d_inode, dentry, mode, -- cgit v1.2.3 From 408ef013cc9e2f94a14f7ccbbe52ddfb18437a99 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:03 -0400 Subject: fs: move path_put on failure out of ->follow_link Currently the non-nd_set_link based versions of ->follow_link are expected to do a path_put(&nd->path) on failure. This calling convention is unexpected, undocumented and doesn't match what the nd_set_link-based instances do. Move the path_put out of the only non-nd_set_link based ->follow_link instance into the caller. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/namei.c | 3 +-- fs/proc/base.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 6b29a51bef5d..a9b94c62c303 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -624,7 +624,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); if (IS_ERR(*p)) - goto out_put_link; + goto out_put_nd_path; error = 0; s = nd_get_link(nd); @@ -646,7 +646,6 @@ follow_link(struct path *link, struct nameidata *nd, void **p) out_put_nd_path: path_put(&nd->path); -out_put_link: path_put(link); return error; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 8eaa5ea1c613..3bd5ac1ff018 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1427,16 +1427,20 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; + struct path path; int error = -EACCES; - /* We don't need a base pointer in the /proc filesystem */ - path_put(&nd->path); - /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); + if (error) + goto out; + + path_put(&nd->path); + nd->path = path; + return NULL; out: return ERR_PTR(error); } -- cgit v1.2.3 From b5fb63c18315c5510c1d0636179c057e0c761c77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:04 -0400 Subject: fs: add nd_jump_link Add a helper that abstracts out the jump to an already parsed struct path from ->follow_link operation from procfs. Not only does this clean up the code by moving the two sides of this game into a single helper, but it also prepares for making struct nameidata private to namei.c Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/namei.c | 27 +++++++++++++++++---------- fs/proc/base.c | 3 +-- include/linux/namei.h | 2 ++ 3 files changed, 20 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index a9b94c62c303..0e1b9c3eb36d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -586,6 +586,21 @@ static inline void path_to_nameidata(const struct path *path, nd->path.dentry = path->dentry; } +/* + * Helper to directly jump to a known parsed path from ->follow_link, + * caller must have taken a reference to path beforehand. + */ +void nd_jump_link(struct nameidata *nd, struct path *path) +{ + path_put(&nd->path); + + nd->path = *path; + nd->inode = nd->path.dentry->d_inode; + nd->flags |= LOOKUP_JUMPED; + + BUG_ON(nd->inode->i_op->follow_link); +} + static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; @@ -630,17 +645,9 @@ follow_link(struct path *link, struct nameidata *nd, void **p) s = nd_get_link(nd); if (s) { error = __vfs_follow_link(nd, s); - } else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (unlikely(error)) + put_link(nd, link, *p); } - if (unlikely(error)) - put_link(nd, link, *p); return error; diff --git a/fs/proc/base.c b/fs/proc/base.c index 3bd5ac1ff018..2772208338f8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1438,8 +1438,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (error) goto out; - path_put(&nd->path); - nd->path = path; + nd_jump_link(nd, &path); return NULL; out: return ERR_PTR(error); diff --git a/include/linux/namei.h b/include/linux/namei.h index f5931489e150..d2ef8b34b967 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -80,6 +80,8 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +extern void nd_jump_link(struct nameidata *nd, struct path *path); + static inline void nd_set_link(struct nameidata *nd, char *path) { nd->saved_names[nd->depth] = path; -- cgit v1.2.3 From c4107b3097465e25f7d6a9b0ac0518b07b24e774 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 20 Jun 2012 09:55:58 +1000 Subject: notify_change(): check that i_mutex is held Cc: Djalal Harouni Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/attr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/attr.c b/fs/attr.c index 0da90951d277..29e38a1f7f77 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -171,6 +171,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr) struct timespec now; unsigned int ia_valid = attr->ia_valid; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; @@ -250,5 +252,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return error; } - EXPORT_SYMBOL(notify_change); -- cgit v1.2.3 From 85d7d618c17a09cfd824c1ad4483c19e6f9637ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2012 22:41:54 +0400 Subject: mark_files_ro(): don't bother with mntget/mntput mnt_drop_write_file() is safe under any lock Signed-off-by: Al Viro --- fs/file_table.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index a305d9e2d1b2..9ace2781931e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -483,10 +483,8 @@ void mark_files_ro(struct super_block *sb) { struct file *f; -retry: lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { - struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; if (!file_count(f)) @@ -499,12 +497,7 @@ retry: if (file_check_writeable(f) != 0) continue; file_release_write(f); - mnt = mntget(f->f_path.mnt); - /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(&files_lglock); - mnt_drop_write(mnt); - mntput(mnt); - goto retry; + mnt_drop_write_file(f); } while_file_list_for_each_entry; lg_global_unlock(&files_lglock); } -- cgit v1.2.3 From c3c4f69424db0760239762d36d0b1b6ae524008b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2012 22:49:45 +0400 Subject: do_dentry_open(): close the race with mark_files_ro() in failure exit we want to take it out of mark_files_ro() reach *before* we start checking if we ought to drop write access. Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 124ccb1d38a0..764cc9c201a5 100644 --- a/fs/open.c +++ b/fs/open.c @@ -727,6 +727,7 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); + file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -740,7 +741,6 @@ cleanup_all: mnt_drop_write(f->f_path.mnt); } } - file_sb_list_del(f); cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; -- cgit v1.2.3 From 55e4def0a6e79e7eb53017c4935adfed76510cd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:09 +0100 Subject: VFS: Make chown() and lchown() call fchownat() Make the chown() and lchown() syscalls jump to the fchownat() syscall with the appropriate extra arguments. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/open.c | 41 +++++++---------------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 764cc9c201a5..75bea868ef8a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) return error; } -SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) -{ - struct path path; - int error; - - error = user_path(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; -} - SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag) { @@ -583,23 +564,15 @@ out: return error; } -SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { - struct path path; - int error; + return sys_fchownat(AT_FDCWD, filename, user, group, 0); +} - error = user_lpath(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; +SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +{ + return sys_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) -- cgit v1.2.3 From be34d1a3bc4b6f357a49acb55ae870c81337e4f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:18 +0100 Subject: VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors copy_tree() can theoretically fail in a case other than ENOMEM, but always returns NULL which is interpreted by callers as -ENOMEM. Change it to return an explicit error. Also change clone_mnt() for consistency and because union mounts will add new error cases. Thanks to Andreas Gruenbacher for a bug fix. [AV: folded braino fix by Dan Carpenter] Original-author: Valerie Aurora Signed-off-by: David Howells Cc: Valerie Aurora Cc: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/namespace.c | 120 ++++++++++++++++++++++++++++------------------------ fs/pnode.c | 5 ++- kernel/audit_tree.c | 10 ++--- 3 files changed, 73 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 8f412abcb67f..be1b07a774f1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -708,56 +708,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { struct super_block *sb = old->mnt.mnt_sb; - struct mount *mnt = alloc_vfsmnt(old->mnt_devname); + struct mount *mnt; + int err; - if (mnt) { - if (flag & (CL_SLAVE | CL_PRIVATE)) - mnt->mnt_group_id = 0; /* not a peer of original */ - else - mnt->mnt_group_id = old->mnt_group_id; - - if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { - int err = mnt_alloc_group_id(mnt); - if (err) - goto out_free; - } + mnt = alloc_vfsmnt(old->mnt_devname); + if (!mnt) + return ERR_PTR(-ENOMEM); - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; - atomic_inc(&sb->s_active); - mnt->mnt.mnt_sb = sb; - mnt->mnt.mnt_root = dget(root); - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); - list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + if (flag & (CL_SLAVE | CL_PRIVATE)) + mnt->mnt_group_id = 0; /* not a peer of original */ + else + mnt->mnt_group_id = old->mnt_group_id; - if (flag & CL_SLAVE) { - list_add(&mnt->mnt_slave, &old->mnt_slave_list); - mnt->mnt_master = old; - CLEAR_MNT_SHARED(mnt); - } else if (!(flag & CL_PRIVATE)) { - if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) - list_add(&mnt->mnt_share, &old->mnt_share); - if (IS_MNT_SLAVE(old)) - list_add(&mnt->mnt_slave, &old->mnt_slave); - mnt->mnt_master = old->mnt_master; - } - if (flag & CL_MAKE_SHARED) - set_mnt_shared(mnt); - - /* stick the duplicate mount on the same expiry list - * as the original if that was on one */ - if (flag & CL_EXPIRE) { - if (!list_empty(&old->mnt_expire)) - list_add(&mnt->mnt_expire, &old->mnt_expire); - } + if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { + err = mnt_alloc_group_id(mnt); + if (err) + goto out_free; + } + + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + atomic_inc(&sb->s_active); + mnt->mnt.mnt_sb = sb; + mnt->mnt.mnt_root = dget(root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + br_write_lock(&vfsmount_lock); + list_add_tail(&mnt->mnt_instance, &sb->s_mounts); + br_write_unlock(&vfsmount_lock); + + if (flag & CL_SLAVE) { + list_add(&mnt->mnt_slave, &old->mnt_slave_list); + mnt->mnt_master = old; + CLEAR_MNT_SHARED(mnt); + } else if (!(flag & CL_PRIVATE)) { + if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) + list_add(&mnt->mnt_share, &old->mnt_share); + if (IS_MNT_SLAVE(old)) + list_add(&mnt->mnt_slave, &old->mnt_slave); + mnt->mnt_master = old->mnt_master; + } + if (flag & CL_MAKE_SHARED) + set_mnt_shared(mnt); + + /* stick the duplicate mount on the same expiry list + * as the original if that was on one */ + if (flag & CL_EXPIRE) { + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); } + return mnt; out_free: free_vfsmnt(mnt); - return NULL; + return ERR_PTR(err); } static inline void mntfree(struct mount *mnt) @@ -1242,11 +1246,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, struct path path; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) - return NULL; + return ERR_PTR(-EINVAL); res = q = clone_mnt(mnt, dentry, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + return q; + q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; @@ -1268,8 +1273,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, path.mnt = &q->mnt; path.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt.mnt_root, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + goto out; br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); @@ -1277,7 +1282,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, } } return res; -Enomem: +out: if (res) { LIST_HEAD(umount_list); br_write_lock(&vfsmount_lock); @@ -1285,9 +1290,11 @@ Enomem: br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } - return NULL; + return q; } +/* Caller should check returned pointer for errors */ + struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; @@ -1295,7 +1302,9 @@ struct vfsmount *collect_mounts(struct path *path) tree = copy_tree(real_mount(path->mnt), path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); - return tree ? &tree->mnt : NULL; + if (IS_ERR(tree)) + return NULL; + return &tree->mnt; } void drop_collected_mounts(struct vfsmount *mnt) @@ -1590,14 +1599,15 @@ static int do_loopback(struct path *path, char *old_name, if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) goto out2; - err = -ENOMEM; if (recurse) mnt = copy_tree(old, old_path.dentry, 0); else mnt = clone_mnt(old, old_path.dentry, 0); - if (!mnt) - goto out2; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out; + } err = graft_tree(mnt, path); if (err) { @@ -2211,10 +2221,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, down_write(&namespace_sem); /* First pass: copy the tree topology */ new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); - if (!new) { + if (IS_ERR(new)) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM); + return ERR_CAST(new); } new_ns->root = new; br_write_lock(&vfsmount_lock); diff --git a/fs/pnode.c b/fs/pnode.c index bed378db0758..3e000a51ac0d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - if (!(child = copy_tree(source, source->mnt.mnt_root, type))) { - ret = -ENOMEM; + child = copy_tree(source, source->mnt.mnt_root, type); + if (IS_ERR(child)) { + ret = PTR_ERR(child); list_splice(tree_list, tmp_list.prev); goto out; } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5bf0790497e7..3a5ca582ba1e 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -595,7 +595,7 @@ void audit_trim_trees(void) root_mnt = collect_mounts(&path); path_put(&path); - if (!root_mnt) + if (IS_ERR(root_mnt)) goto skip_it; spin_lock(&hash_lock); @@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule) goto Err; mnt = collect_mounts(&path); path_put(&path); - if (!mnt) { - err = -ENOMEM; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto Err; } @@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new) return err; tagged = collect_mounts(&path2); path_put(&path2); - if (!tagged) - return -ENOMEM; + if (IS_ERR(tagged)) + return PTR_ERR(tagged); err = kern_path(old, 0, &path1); if (err) { -- cgit v1.2.3 From f015f1267b23d3530d3f874243fb83cb5f443005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:28 +0100 Subject: VFS: Comment mount following code Add comments describing what the directions "up" and "down" mean and ref count handling to the VFS mount following family of functions. Signed-off-by: Valerie Aurora (Original author) Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 10 ++++++++++ fs/namespace.c | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0e1b9c3eb36d..c6dcb4c8f86c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -672,6 +672,16 @@ static int follow_up_rcu(struct path *path) return 1; } +/* + * follow_up - Find the mountpoint of path's vfsmount + * + * Given a path, find the mountpoint of its source file system. + * Replace @path with the path of the mountpoint in the parent mount. + * Up is towards /. + * + * Return 1 if we went up a level and 0 if we were already at the + * root. + */ int follow_up(struct path *path) { struct mount *mnt = real_mount(path->mnt); diff --git a/fs/namespace.c b/fs/namespace.c index be1b07a774f1..c53d3381b0d0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -515,8 +515,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, } /* - * lookup_mnt increments the ref count before returning - * the vfsmount struct. + * lookup_mnt - Return the first child mount mounted at path + * + * "First" means first mounted chronologically. If you create the + * following mounts: + * + * mount /dev/sda1 /mnt + * mount /dev/sda2 /mnt + * mount /dev/sda3 /mnt + * + * Then lookup_mnt() on the base /mnt dentry in the root mount will + * return successively the root dentry and vfsmount of /dev/sda1, then + * /dev/sda2, then /dev/sda3, then NULL. + * + * lookup_mnt takes a reference to the found vfsmount. */ struct vfsmount *lookup_mnt(struct path *path) { -- cgit v1.2.3 From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:37 +0100 Subject: VFS: Pass mount flags to sget() Pass mount flags to sget() so that it can use them in initialising a new superblock before the set function is called. They could also be passed to the compare function. Signed-off-by: David Howells Signed-off-by: Al Viro --- drivers/mtd/mtdsuper.c | 4 +--- fs/9p/vfs_super.c | 4 ++-- fs/afs/super.c | 3 +-- fs/btrfs/super.c | 4 ++-- fs/ceph/super.c | 2 +- fs/cifs/cifsfs.c | 9 ++++----- fs/devpts/inode.c | 6 +++--- fs/ecryptfs/main.c | 3 +-- fs/gfs2/ops_fstype.c | 5 ++--- fs/libfs.c | 4 ++-- fs/logfs/super.c | 3 +-- fs/nfs/super.c | 2 +- fs/nilfs2/super.c | 4 ++-- fs/proc/root.c | 3 +-- fs/reiserfs/procfs.c | 2 +- fs/super.c | 22 +++++++++++----------- fs/sysfs/mount.c | 3 +-- fs/ubifs/super.c | 3 +-- include/linux/fs.h | 2 +- kernel/cgroup.c | 2 +- 20 files changed, 40 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index a90bfe79916d..334da5f583c0 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -63,7 +63,7 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, struct super_block *sb; int ret; - sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, mtd); + sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, flags, mtd); if (IS_ERR(sb)) goto out_error; @@ -74,8 +74,6 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - sb->s_flags = flags; - ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8c92a9ba8330..137d50396898 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE; - sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; + sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; if (!v9ses->cache) sb->s_flags |= MS_SYNCHRONOUS; @@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto close_session; } - sb = sget(fs_type, NULL, v9fs_set_super, v9ses); + sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); goto clunk_fid; diff --git a/fs/afs/super.c b/fs/afs/super.c index f02b31e7e648..df8c6047c2a1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, as->volume = vol; /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, afs_set_super, as); + sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); afs_put_volume(vol); @@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); - sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e23991574fdf..b19d75567728 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, + fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); goto error_close_devices; @@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7305a4..7076109f014d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, if (ceph_test_opt(fsc->client, NOSHARE)) compare_super = NULL; - sb = sget(fs_type, compare_super, ceph_set_super, fsc); + sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); if (IS_ERR(sb)) { res = ERR_CAST(sb); goto out; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c0c2751a7573..a7610cfedf0a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -637,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type, mnt_data.cifs_sb = cifs_sb; mnt_data.flags = flags; - sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); + /* BB should we make this contingent on mount parm? */ + flags |= MS_NODIRATIME | MS_NOATIME; + + sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { root = ERR_CAST(sb); cifs_umount(cifs_sb); @@ -648,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Use existing superblock"); cifs_umount(cifs_sb); } else { - sb->s_flags = flags; - /* BB should we make this contingent on mount parm? */ - sb->s_flags |= MS_NODIRATIME | MS_NOATIME; - rc = cifs_read_super(sb); if (rc) { root = ERR_PTR(rc); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 979c1e309c73..14afbabe6546 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, return ERR_PTR(error); if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); + s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, + NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) goto out_undo_sget; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 68954937a071..7edeb3d893c1 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { rc = PTR_ERR(s); goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b8c250fc4922..6c906078f657 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1286,7 +1286,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); error = PTR_ERR(s); if (IS_ERR(s)) @@ -1316,7 +1316,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1360,7 +1359,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, dev_name, error); return ERR_PTR(error); } - s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { diff --git a/fs/libfs.c b/fs/libfs.c index ebd03f6910d5..a74cb1725ac6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, const struct super_operations *ops, const struct dentry_operations *dops, unsigned long magic) { - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s; struct dentry *dentry; struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); + s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = MS_NOUSER; s->s_maxbytes = MAX_LFS_FILESIZE; s->s_blocksize = PAGE_SIZE; s->s_blocksize_bits = PAGE_SHIFT; diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 97bca623d893..345c24b8a6f8 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, log_super("LogFS: Start mount %x\n", mount_count++); err = -EINVAL; - sb = sget(type, logfs_sb_test, logfs_sb_set, super); + sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super); if (IS_ERR(sb)) { super->s_devops->put_device(super); kfree(super); @@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, sb->s_maxbytes = (1ull << 43) - 1; sb->s_max_links = LOGFS_LINK_MAX; sb->s_op = &logfs_super_operations; - sb->s_flags = flags | MS_NOATIME; err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY); if (err) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 06228192f64e..8b2a2977b720 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1099a76cee59..d57c42f974ea 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags, err = -EBUSY; goto failed; } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, + sd.bdev); mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); @@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); diff --git a/fs/proc/root.c b/fs/proc/root.c index 568b20290c75..9a2d9fd7cadd 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, options = data; } - sb = sget(fs_type, proc_test_super, proc_set_super, ns); + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); if (IS_ERR(sb)) return ERR_CAST(sb); @@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } if (!sb->s_root) { - sb->s_flags = flags; err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 2c1ade692cc8..e60e87035bb3 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos) if (l) return NULL; - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s))) + if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s))) return NULL; up_write(&s->s_umount); diff --git a/fs/super.c b/fs/super.c index cf001775617f..c743fb3be4b8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to + * @flags: the mount flags * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(struct file_system_type *type) +static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; @@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type) #else INIT_LIST_HEAD(&s->s_files); #endif + s->s_flags = flags; s->s_bdi = &default_backing_dev_info; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); @@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super); * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback + * @flags: mount flags * @data: argument to each of them */ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), + int flags, void *data) { struct super_block *s = NULL; @@ -450,7 +454,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type); + s = alloc_super(type, flags); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags, { struct super_block *sb; - sb = sget(fs_type, ns_test_super, ns_set_super, data); + sb = sget(fs_type, ns_test_super, ns_set_super, flags, data); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { int err; - sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); @@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC, + bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)) { int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = flags; - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); @@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type, struct super_block *s; int error; - s = sget(fs_type, compare_single, set_anon_super, NULL); + s = sget(fs_type, compare_single, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index c15a7a3572e9..71eb7e253927 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -118,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) info->ns[type] = kobj_ns_grab_current(type); - sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); + sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); if (IS_ERR(sb) || sb->s_fs_info != info) free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { - sb->s_flags = flags; error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(sb); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5862dd9d2784..1c766c39c038 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, sb_test, sb_set, c); + sb = sget(fs_type, sb_test, sb_set, flags, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); @@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, goto out_deact; } } else { - sb->s_flags = flags; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2f857e9eeb3a..48548bdd7722 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1914,7 +1914,7 @@ void free_anon_bdev(dev_t); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), - void *data); + int flags, void *data); extern struct dentry *mount_pseudo(struct file_system_type *, char *, const struct super_operations *ops, const struct dentry_operations *dops, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0cd1314acdaf..af2b5641fc8b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, opts.new_root = new_root; /* Locate an existing or new sb for this hierarchy */ - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); if (IS_ERR(sb)) { ret = PTR_ERR(sb); cgroup_drop_root(opts.new_root); -- cgit v1.2.3 From 0bdaea9017b9d2b9996e153a71ee03555969b80e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:46 +0100 Subject: VFS: Split inode_permission() Split inode_permission() into inode- and superblock-dependent parts. This is aimed at unionmounts where the superblock from the upper layer has to be checked rather than the superblock from the lower layer as the upper layer may be writable, thus allowing an unwritable file from the lower layer to be copied up and modified. Original-author: Valerie Aurora Signed-off-by: David Howells (Further development) Signed-off-by: Al Viro --- fs/internal.h | 5 +++++ fs/namei.c | 66 ++++++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index 8a9f5fa840f1..a6fd56c68b11 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -41,6 +41,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) */ extern void __init chrdev_init(void); +/* + * namei.c + */ +extern int __inode_permission(struct inode *, int); + /* * namespace.c */ diff --git a/fs/namei.c b/fs/namei.c index c6dcb4c8f86c..1b6474687698 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask) } /** - * inode_permission - check for access rights to a given inode - * @inode: inode to check permission on - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) + * __inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * - * Used to check for read/write/execute permissions on an inode. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. + * Check for read/write/execute permissions on an inode. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + * + * This does not check for a read-only file system. You probably want + * inode_permission(). */ -int inode_permission(struct inode *inode, int mask) +int __inode_permission(struct inode *inode, int mask) { int retval; if (unlikely(mask & MAY_WRITE)) { - umode_t mode = inode->i_mode; - - /* - * Nobody gets write access to a read-only fs. - */ - if (IS_RDONLY(inode) && - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) - return -EROFS; - /* * Nobody gets write access to an immutable file. */ @@ -358,6 +349,47 @@ int inode_permission(struct inode *inode, int mask) return security_inode_permission(inode, mask); } +/** + * sb_permission - Check superblock-level permissions + * @sb: Superblock of inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Separate out file-system wide checks from inode-specific permission checks. + */ +static int sb_permission(struct super_block *sb, struct inode *inode, int mask) +{ + if (unlikely(mask & MAY_WRITE)) { + umode_t mode = inode->i_mode; + + /* Nobody gets write access to a read-only fs. */ + if ((sb->s_flags & MS_RDONLY) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + } + return 0; +} + +/** + * inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Check for read/write/execute permissions on an inode. We use fs[ug]id for + * this, letting us set arbitrary permissions for filesystem access without + * changing the "normal" UIDs which are used for other things. + * + * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + */ +int inode_permission(struct inode *inode, int mask) +{ + int retval; + + retval = sb_permission(inode->i_sb, inode, mask); + if (retval) + return retval; + return __inode_permission(inode, mask); +} + /** * path_get - get a reference to a path * @path: path to get the reference to -- cgit v1.2.3 From 82c7c7a5a9270b13380a588dc57b7541a5e4f541 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:31 -0400 Subject: NFSv4.1 return the LAYOUT for each file with failed DS connection I/O First mark the deviceid invalid to prevent any future use. Then fence all files involved in I/O to a DS with a connection error by sending a LAYOUTRETURN. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 85b70639921b..26b96de831ea 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -205,9 +205,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - if (!filelayout_test_devid_invalid(devid)) - _pnfs_return_layout(inode); filelayout_mark_devid_invalid(devid); + _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); nfs4_ds_disconnect(clp); /* fall through */ -- cgit v1.2.3 From baf6c2a44af02cf6f7cec1ff177189c78fc30f9a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:32 -0400 Subject: NFSv4.1 don't send LAYOUTCOMMIT if data resent through MDS Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 26b96de831ea..53f94d915bd1 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -206,6 +206,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); filelayout_mark_devid_invalid(devid); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); nfs4_ds_disconnect(clp); -- cgit v1.2.3 From 366d50521c57939e61e25dc27f009367447563e6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:33 -0400 Subject: NFSv4.1 mark layout when already returned When the file layout driver is fencing a DS, _pnfs_return_layout can be called mulitple times per inode due to in-flight i/o referencing lsegs on it's plh_segs list. Remember that LAYOUTRETURN has been called, and do not call it again. Allow LAYOUTRETURNs after a subsequent LAYOUTGET. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 10 ++++++++-- fs/nfs/pnfs.h | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2617831afd39..3ad768f2cef4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -662,11 +662,11 @@ _pnfs_return_layout(struct inode *ino) nfs4_stateid stateid; int status = 0; - dprintk("--> %s\n", __func__); + dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo) { + if (!lo || pnfs_test_layout_returned(lo)) { spin_unlock(&ino->i_lock); dprintk("%s: no layout to return\n", __func__); return status; @@ -676,6 +676,7 @@ _pnfs_return_layout(struct inode *ino) get_layout_hdr(lo); mark_matching_lsegs_invalid(lo, &tmp_list, NULL); lo->plh_block_lgets++; + pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); @@ -686,6 +687,7 @@ _pnfs_return_layout(struct inode *ino) status = -ENOMEM; set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); + pnfs_clear_layout_returned(lo); put_layout_hdr(lo); goto out; } @@ -1075,6 +1077,10 @@ pnfs_update_layout(struct inode *ino, get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; + + /* Enable LAYOUTRETURNs */ + pnfs_clear_layout_returned(lo); + spin_unlock(&ino->i_lock); if (first) { /* The lo must be on the clp list if there is any diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 592beb02c955..2c6c80503ba4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -64,6 +64,7 @@ enum { NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ NFS_LAYOUT_INVALID, /* layout is being destroyed */ + NFS_LAYOUT_RETURNED, /* layout has already been returned */ }; enum layoutdriver_policy_flags { @@ -255,6 +256,24 @@ struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node * bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_deviceid_purge_client(const struct nfs_client *); +static inline void +pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo) +{ + set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + +static inline void +pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo) +{ + clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + +static inline bool +pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + static inline int lo_fail_bit(u32 iomode) { return iomode == IOMODE_RW ? -- cgit v1.2.3 From 293b3b065c5ec5d15c3087ca42a52c991d7d8235 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:34 -0400 Subject: NFSv4.1 do not send LAYOUTRETURN on emtpy plh_segs list mark_matching_lsegs_invalid() resets the mds_threshold counters and can dereference the layout hdr on an initial empty plh_segs list. It returns 0 both in the case of an initial empty list and in a non-emtpy list that was cleared by calls to mark_lseg_invalid. Don't send a LAYOUTRETURN if the list was initially empty. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3ad768f2cef4..7fbd25afe418 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -651,7 +651,14 @@ out_err_free: return NULL; } -/* Initiates a LAYOUTRETURN(FILE) */ +/* + * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr + * when the layout segment list is empty. + * + * Note that a pnfs_layout_hdr can exist with an empty layout segment + * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the + * deviceid is marked invalid. + */ int _pnfs_return_layout(struct inode *ino) { @@ -660,7 +667,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); struct nfs4_layoutreturn *lrp; nfs4_stateid stateid; - int status = 0; + int status = 0, empty; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -668,13 +675,21 @@ _pnfs_return_layout(struct inode *ino) lo = nfsi->layout; if (!lo || pnfs_test_layout_returned(lo)) { spin_unlock(&ino->i_lock); - dprintk("%s: no layout to return\n", __func__); - return status; + dprintk("NFS: %s no layout to return\n", __func__); + goto out; } stateid = nfsi->layout->plh_stateid; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); + empty = list_empty(&lo->plh_segs); mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + /* Don't send a LAYOUTRETURN if list was initially empty */ + if (empty) { + spin_unlock(&ino->i_lock); + put_layout_hdr(lo); + dprintk("NFS: %s no layout segments to return\n", __func__); + goto out; + } lo->plh_block_lgets++; pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); -- cgit v1.2.3 From 377e507d1572eca6372c862483f4ce4680ad310a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:29:45 -0400 Subject: NFS: Fix up TEST_STATEID and FREE_STATEID return code handling The TEST_STATEID and FREE_STATEID operations can return -NFS4ERR_BAD_STATEID, -NFS4ERR_OLD_STATEID, or -NFS4ERR_DEADSESSION. nfs41_{test,free}_stateid() should not pass these errors to nfs4_handle_exception() during state recovery, since that will recursively kick off state recovery again, resulting in a deadlock. In particular, when the TEST_STATEID operation returns NFS4_OK, res.status can contain one of these errors. _nfs41_test_stateid() replaces NFS4_OK with the value in res.status, which is then returned to callers. But res.status is not passed through nfs4_stat_to_errno(), and thus is a positive NFS4ERR value. Currently callers are only interested in !NFS4_OK, and nfs4_handle_exception() ignores positive values. Thus the res.status values are currently ignored by nfs4_handle_exception() and won't cause the deadlock above. Thanks to this missing negative, it is only when these operations fail (which is very rare) that a deadlock can occur. Bryan agrees the original intent was to return res.status as a negative NFS4ERR value to callers of nfs41_test_stateid(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 006e98da730a..af3abf957f2c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6578,10 +6578,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - - if (status == NFS_OK) - return res.status; - return status; + if (status != NFS_OK) + return status; + return -res.status; } static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) @@ -6589,9 +6588,10 @@ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, stateid), - &exception); + err = _nfs41_test_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -6609,7 +6609,8 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) }; nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + return nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res, 1); } static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) @@ -6617,9 +6618,10 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, stateid), - &exception); + err = _nfs4_free_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } -- cgit v1.2.3 From 89af2739589365bf0dd2023c6a076b22ccd530f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:29:56 -0400 Subject: NFS: Don't free a state ID the server does not recognize The result of a TEST_STATEID operation can indicate a few different things: o If NFS_OK is returned, then the client can continue using the state ID under test, and skip recovery. o RFC 5661 says that if the state ID was revoked, then the client must perform an explicit FREE_STATEID before trying to re-open. o If the server doesn't recognize the state ID at all, then no FREE_STATEID is needed, and the client can immediately continue with open recovery. Let's err on the side of caution: if the server clearly tells us the state ID is unknown, we skip the FREE_STATEID. For any other error, we issue a FREE_STATEID. Sometimes that FREE_STATEID will be unnecessary, but leaving unused state IDs on the server needlessly ties up resources. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af3abf957f2c..afd61d72837d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1764,7 +1764,8 @@ static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *s if (state->flags & flags) { status = nfs41_test_stateid(server, stateid); if (status != NFS_OK) { - nfs41_free_stateid(server, stateid); + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); state->flags &= ~flags; } } @@ -4697,7 +4698,9 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { - nfs41_free_stateid(server, &lsp->ls_stateid); + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, + &lsp->ls_stateid); lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; ret = status; } -- cgit v1.2.3 From eb64cf964d453f8b559a8c0c2625952dbbcb5838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:05 -0400 Subject: NFS: State reclaim clears OPEN and LOCK state The "state->flags & flags" test in nfs41_check_expired_stateid() allows the state manager to squelch a TEST_STATEID operation when it is known for sure that a state ID is no longer valid. If the lease was purged, for example, the client already knows that state ID is now defunct. But open recovery is still needed for that inode. To force a call to nfs4_open_expired(), change the default return value for nfs41_check_expired_stateid() to force open recovery, and the default return value for nfs41_check_locks() to force lock recovery, if the requested flags are clear. Fix suggested by Bryan Schumaker. Also, the presence of a delegation state ID must not prevent normal open recovery. The delegation state ID must be cleared if it was revoked, but once cleared I don't think it's presence or absence has any bearing on whether open recovery is still needed. So the logic is adjusted to ignore the TEST_STATEID result for the delegation state ID. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index afd61d72837d..d1c1016cd505 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1758,8 +1758,8 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta #if defined(CONFIG_NFS_V4_1) static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) { - int status = NFS_OK; struct nfs_server *server = NFS_SERVER(state->inode); + int status = -NFS4ERR_BAD_STATEID; if (state->flags & flags) { status = nfs41_test_stateid(server, stateid); @@ -1774,16 +1774,17 @@ static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *s static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_status, open_status; int deleg_flags = 1 << NFS_DELEGATED_STATE; int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); + int status; - deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); + status = nfs41_check_expired_stateid(state, &state->open_stateid, + open_flags); - if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) - return NFS_OK; - return nfs4_open_expired(sp, state); + if (status != NFS_OK) + status = nfs4_open_expired(sp, state); + return status; } #endif @@ -4690,7 +4691,7 @@ out: #if defined(CONFIG_NFS_V4_1) static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status, ret = NFS_OK; + int status, ret = -NFS4ERR_BAD_STATEID; struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); @@ -4716,9 +4717,9 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques if (test_bit(LK_STATE_IN_USE, &state->flags)) status = nfs41_check_expired_locks(state); - if (status == NFS_OK) - return status; - return nfs4_lock_expired(state, request); + if (status != NFS_OK) + status = nfs4_lock_expired(state, request); + return status; } #endif -- cgit v1.2.3 From 3e60ffdd36fa518cc1822941dbb011e7a9adf513 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:14 -0400 Subject: NFS: Clean up nfs41_check_expired_stateid() Clean up: Instead of open-coded flag manipulation, use test_bit() and clear_bit() just like all other accessors of the state->flag field. This also eliminates several unnecessary implicit integer type conversions. To make it absolutely clear what is going on, a number of comments are introduced. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 77 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d1c1016cd505..1364569f1d1d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1756,32 +1756,67 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) +static void nfs41_clear_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - int status = -NFS4ERR_BAD_STATEID; - - if (state->flags & flags) { - status = nfs41_test_stateid(server, stateid); - if (status != NFS_OK) { - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid); - state->flags &= ~flags; - } + nfs4_stateid *stateid = &state->stateid; + int status; + + /* If a state reset has been done, test_stateid is unneeded */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + return; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); + + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +} + +/** + * nfs41_check_open_stateid - possibly free an open stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_open_stateid(struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + nfs4_stateid *stateid = &state->stateid; + int status; + + /* If a state reset has been done, test_stateid is unneeded */ + if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) + return -NFS4ERR_BAD_STATEID; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); + + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); } return status; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_flags = 1 << NFS_DELEGATED_STATE; - int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); int status; - nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - status = nfs41_check_expired_stateid(state, &state->open_stateid, - open_flags); - + nfs41_clear_delegation_stateid(state); + status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); return status; @@ -4689,6 +4724,14 @@ out: } #if defined(CONFIG_NFS_V4_1) +/** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ static int nfs41_check_expired_locks(struct nfs4_state *state) { int status, ret = -NFS4ERR_BAD_STATEID; @@ -4699,6 +4742,8 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { + /* Free the stateid unless the server + * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, &lsp->ls_stateid); -- cgit v1.2.3 From 38527b153a7b43e5c8103f0c2d901d11cfa26d30 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:23 -0400 Subject: NFS: Clean up TEST_STATEID and FREE_STATEID error reporting As a finishing touch, add appropriate documenting comments and some debugging printk's. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1364569f1d1d..0cb876640781 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6625,13 +6625,27 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_resp = &res, }; + dprintk("NFS call test_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - if (status != NFS_OK) + if (status != NFS_OK) { + dprintk("NFS reply test_stateid: failed, %d\n", status); return status; + } + dprintk("NFS reply test_stateid: succeeded, %d\n", -res.status); return -res.status; } +/** + * nfs41_test_stateid - perform a TEST_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to test + * + * Returns NFS_OK if the server recognizes that "stateid" is valid. + * Otherwise a negative NFS4ERR value is returned if the operation + * failed or the state ID is not currently valid. + */ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; @@ -6656,12 +6670,25 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_argp = &args, .rpc_resp = &res, }; + int status; + dprintk("NFS call free_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + dprintk("NFS reply free_stateid: %d\n", status); + return status; } +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * + * Returns NFS_OK if the server freed "stateid". Otherwise a + * negative NFS4ERR value is returned. + */ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; -- cgit v1.2.3 From 56d08fef2369d5ca9ad2e1fc697f5379fd8af751 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:32 -0400 Subject: NFS: nfs_getaclargs.acl_len is a size_t Squelch compiler warnings: fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’: fs/nfs/nfs4proc.c:3811:14: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/nfs4proc.c:3818:15: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Introduced by commit bf118a34 "NFSv4: include bitmap in nfsv4 get acl data", Dec 7, 2011. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0cb876640781..31369e9b5b04 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3775,7 +3775,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i, acl_len = 0; + int ret = -ENOMEM, npages, i; + size_t acl_len = 0; npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, -- cgit v1.2.3 From 6a1a1e34dc55f17e7bd260809207442dbb7a0296 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:31:08 -0400 Subject: SUNRPC: Add rpcauth_list_flavors() The gss_mech_list_pseudoflavors() function provides a list of currently registered GSS pseudoflavors. This list does not include any non-GSS flavors that have been registered with the RPC client. nfs4_find_root_sec() currently adds these extra flavors by hand. Instead, nfs4_find_root_sec() should be looking at the set of flavors that have been explicitly registered via rpcauth_register(). And, other areas of code will soon need the same kind of list that contains all flavors the kernel currently knows about (see below). Rather than cloning the open-coded logic in nfs4_find_root_sec() to those new places, introduce a generic RPC function that generates a full list of registered auth flavors and pseudoflavors. A new rpc_authops method is added that lists a flavor's pseudoflavors, if it has any. I encountered an interesting module loader loop when I tried to get the RPC client to invoke gss_mech_list_pseudoflavors() by name. This patch is a pre-requisite for server trunking discovery, and a pre-requisite for fixing up the in-kernel mount client to do better automatic security flavor selection. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 ++++--- include/linux/sunrpc/auth.h | 2 ++ include/linux/sunrpc/gss_api.h | 3 +- net/sunrpc/auth.c | 54 +++++++++++++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 18 +++++++++--- 6 files changed, 80 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 31369e9b5b04..80bb5055d0b3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -2412,11 +2411,15 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int i, len, status = 0; rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; - len = gss_mech_list_pseudoflavors(&flav_array[0]); - flav_array[len] = RPC_AUTH_NULL; - len += 1; + len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); + BUG_ON(len < 0); for (i = 0; i < len; i++) { + /* AUTH_UNIX is the default flavor if none was specified, + * thus has already been tried. */ + if (flav_array[i] == RPC_AUTH_UNIX) + continue; + status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 492a36d72829..f25ba922baaf 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -101,6 +101,7 @@ struct rpc_authops { struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); int (*pipes_create)(struct rpc_auth *); void (*pipes_destroy)(struct rpc_auth *); + int (*list_pseudoflavors)(rpc_authflavor_t *, int); }; struct rpc_credops { @@ -135,6 +136,7 @@ int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); +int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 332da61cf8b7..a19e2547ae6a 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -14,6 +14,7 @@ #ifdef __KERNEL__ #include +#include #include /* The mechanism-independent gss-api context: */ @@ -127,7 +128,7 @@ struct gss_api_mech *gss_mech_get_by_name(const char *); struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); /* Fill in an array with a list of supported pseudoflavors */ -int gss_mech_list_pseudoflavors(u32 *); +int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); /* Just increments the mechanism's reference count and returns its input: */ struct gss_api_mech * gss_mech_get(struct gss_api_mech *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 727e506cacda..b5c067bccc45 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef RPC_DEBUG @@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops) } EXPORT_SYMBOL_GPL(rpcauth_unregister); +/** + * rpcauth_list_flavors - discover registered flavors and pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int +rpcauth_list_flavors(rpc_authflavor_t *array, int size) +{ + rpc_authflavor_t flavor; + int result = 0; + + spin_lock(&rpc_authflavor_lock); + for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { + const struct rpc_authops *ops = auth_flavors[flavor]; + rpc_authflavor_t pseudos[4]; + int i, len; + + if (result >= size) { + result = -ENOMEM; + break; + } + + if (ops == NULL) + continue; + if (ops->list_pseudoflavors == NULL) { + array[result++] = ops->au_flavor; + continue; + } + len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); + if (len < 0) { + result = len; + break; + } + for (i = 0; i < len; i++) { + if (result >= size) { + result = -ENOMEM; + break; + } + array[result++] = pseudos[i]; + } + } + spin_unlock(&rpc_authflavor_lock); + + dprintk("RPC: %s returns %d\n", __func__, result); + return result; +} +EXPORT_SYMBOL_GPL(rpcauth_list_flavors); + struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d3ad81f8da5b..34c522021004 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = { .crcreate = gss_create_cred, .pipes_create = gss_pipes_dentries_create, .pipes_destroy = gss_pipes_dentries_destroy, + .list_pseudoflavors = gss_mech_list_pseudoflavors, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 782bfe1b6465..6ac5dfcd2928 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -239,14 +239,26 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); -int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) +/** + * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) { struct gss_api_mech *pos = NULL; int j, i = 0; spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { - for (j=0; j < pos->gm_pf_num; j++) { + for (j = 0; j < pos->gm_pf_num; j++) { + if (i >= size) + return -ENOMEM; array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; } } @@ -254,8 +266,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) return i; } -EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors); - u32 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) { -- cgit v1.2.3 From 46a87b8a7b939900d779042da7097bf330ab787f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:41 -0400 Subject: NFS: When state recovery fails, waiting tasks should exit NFSv4 state recovery is not always successful. Failure is signalled by setting the nfs_client.cl_cons_state to a negative (errno) value, then waking waiters. Currently this can happen only during mount processing. I'm about to add an explicit case where state recovery failure during normal operation should force all NFS requests waiting on that state recovery to exit. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 80bb5055d0b3..74dcd85f0a1d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -258,7 +258,12 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); - return res; + if (res) + return res; + + if (clp->cl_cons_state < 0) + return clp->cl_cons_state; + return 0; } static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) -- cgit v1.2.3 From de734831224e74fcaf8917386e33644c4243db95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:50 -0400 Subject: NFS: Treat NFS4ERR_CLID_INUSE as a fatal error For NFSv4 minor version 0, currently the cl_id_uniquifier allows the Linux client to generate a unique nfs_client_id4 string whenever a server replies with NFS4ERR_CLID_INUSE. This implementation seems to be based on a flawed reading of RFC 3530. NFS4ERR_CLID_INUSE actually means that the client has presented this nfs_client_id4 string with a different principal at some time in the past, and that lease is still in use on the server. For a Linux client this might be rather difficult to achieve: the authentication flavor is named right in the nfs_client_id4.id string. If we change flavors, we change strings automatically. So, practically speaking, NFS4ERR_CLID_INUSE means there is some other client using our string. There is not much that can be done to recover automatically. Let's make it a permanent error. Remove the recovery logic in nfs4_proc_setclientid(), and remove the cl_id_uniquifier field from the nfs_client data structure. And, remove the authentication flavor from the nfs_client_id4 string. Keeping the authentication flavor in the nfs_client_id4.id string means that we could have a separate lease for each authentication flavor used by mounts on the client. But we want just one lease for all the mounts on this client. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 47 ++++++++++++++++------------------------------- fs/nfs/nfs4state.c | 7 ++++++- include/linux/nfs_fs_sb.h | 3 +-- 3 files changed, 23 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 74dcd85f0a1d..1148081e1a53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4029,42 +4029,28 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; - int loop = 0; - int status; + /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - - for(;;) { - rcu_read_lock(); - setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%s %s %s %u", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO), - clp->cl_rpcclient->cl_auth->au_ops->au_name, - clp->cl_id_uniquifier); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + rcu_read_lock(); + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); + /* cb_client4 */ + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_NETID)); - setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + rcu_read_unlock(); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - rcu_read_unlock(); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status != -NFS4ERR_CLID_INUSE) - break; - if (loop != 0) { - ++clp->cl_id_uniquifier; - break; - } - ++loop; - ssleep(clp->cl_lease_time / HZ + 1); - } - return status; + return rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); } int nfs4_proc_setclientid_confirm(struct nfs_client *clp, @@ -5262,10 +5248,9 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) nfs4_init_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s/%u", + "%s/%s", clp->cl_ipaddr, - clp->cl_rpcclient->cl_nodename, - clp->cl_rpcclient->cl_auth->au_flavor); + clp->cl_rpcclient->cl_nodename); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1cfc4603fd9a..81eabcdad0e5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1606,10 +1606,15 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return -ESERVERFAULT; /* Lease confirmation error: retry after purging the lease */ ssleep(1); - case -NFS4ERR_CLID_INUSE: case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); break; + case -NFS4ERR_CLID_INUSE: + pr_err("NFS: Server %s reports our clientid is in use\n", + clp->cl_hostname); + nfs_mark_client_ready(clp, -EPERM); + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + return -EPERM; case -EACCES: if (clp->cl_machine_cred == NULL) return -EACCES; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index f58325a1d8fb..65327652c61a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -69,10 +69,9 @@ struct nfs_client { struct idmap * cl_idmap; /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. + * This is used to generate the mv0 callback address. */ char cl_ipaddr[48]; - unsigned char cl_id_uniquifier; u32 cl_cb_ident; /* v4.0 callback identifier */ const struct nfs4_minor_version_ops *cl_mvops; -- cgit v1.2.3 From 6bbb4ae8ffc4eef825c8742eff1fefae69a82e41 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:59 -0400 Subject: NFS: Clean up nfs4_proc_setclientid() and friends Add documenting comments and appropriate debugging messages. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 45 +++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs4state.c | 4 ++++ 2 files changed, 41 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1148081e1a53..05801be4a180 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4013,6 +4013,16 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +/** + * nfs4_proc_setclientid - Negotiate client ID + * @clp: state data structure + * @program: RPC program for NFSv4 callback service + * @port: IP port number for NFS4 callback service + * @cred: RPC credential to use for this call + * @res: where to place the result + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -4029,6 +4039,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + int status; /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); @@ -4050,9 +4061,22 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - return rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + dprintk("NFS call setclientid auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + setclientid.sc_name_len, setclientid.sc_name); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + dprintk("NFS reply setclientid: %d\n", status); + return status; } +/** + * nfs4_proc_setclientid_confirm - Confirm client ID + * @clp: state data structure + * @res: result of a previous SETCLIENTID + * @cred: RPC credential to use for this call + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) @@ -4067,6 +4091,9 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, unsigned long now; int status; + dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + clp->cl_clientid); now = jiffies; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status == 0) { @@ -4075,6 +4102,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, clp->cl_last_renewal = now; spin_unlock(&clp->cl_lock); } + dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -5218,6 +5246,8 @@ out: /* * nfs4_proc_exchange_id() * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * * Since the clientid has expired, all compounds using sessions * associated with the stale clientid will be returning * NFS4ERR_BADSESSION in the sequence operation, and will therefore @@ -5242,15 +5272,14 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_cred = cred, }; - dprintk("--> %s\n", __func__); - BUG_ON(clp == NULL); - nfs4_init_boot_verifier(clp, &verifier); - args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s", clp->cl_ipaddr, clp->cl_rpcclient->cl_nodename); + dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + args.id_len, args.id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); @@ -5313,12 +5342,12 @@ out_server_scope: kfree(res.server_scope); out: if (clp->cl_implid != NULL) - dprintk("%s: Server Implementation ID: " + dprintk("NFS reply exchange_id: Server Implementation ID: " "domain: %s, name: %s, date: %llu,%u\n", - __func__, clp->cl_implid->domain, clp->cl_implid->name, + clp->cl_implid->domain, clp->cl_implid->name, clp->cl_implid->date.seconds, clp->cl_implid->date.nseconds); - dprintk("<-- %s status= %d\n", __func__, status); + dprintk("NFS reply exchange_id: %d\n", status); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 81eabcdad0e5..55148def5540 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1665,6 +1665,10 @@ static int nfs4_establish_lease(struct nfs_client *clp) return 0; } +/* + * Returns zero or a negative errno. NFS4ERR values are converted + * to local errno values. + */ static int nfs4_reclaim_lease(struct nfs_client *clp) { int status; -- cgit v1.2.3 From c04fecb4d9f7753e0cbff7edd03ec68f8721cdce Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 10 May 2012 10:18:07 -0500 Subject: dlm: use rsbtbl as resource directory Remove the dir hash table (dirtbl), and use the rsb hash table (rsbtbl) as the resource directory. It has always been an unnecessary duplication of information. This improves efficiency by using a single rsbtbl lookup in many cases where both rsbtbl and dirtbl lookups were needed previously. This eliminates the need to handle cases of rsbtbl and dirtbl being out of sync. In many cases there will be memory savings because the dir hash table no longer exists. Signed-off-by: David Teigland --- fs/dlm/config.c | 7 - fs/dlm/config.h | 1 - fs/dlm/debug_fs.c | 103 ++++- fs/dlm/dir.c | 287 ++++---------- fs/dlm/dir.h | 7 +- fs/dlm/dlm_internal.h | 46 ++- fs/dlm/lock.c | 1022 +++++++++++++++++++++++++++++++++++++++---------- fs/dlm/lock.h | 5 +- fs/dlm/lockspace.c | 23 +- fs/dlm/rcom.c | 145 +++++-- fs/dlm/rcom.h | 1 + fs/dlm/recover.c | 140 ++++--- fs/dlm/recover.h | 2 +- fs/dlm/recoverd.c | 14 +- 14 files changed, 1215 insertions(+), 588 deletions(-) (limited to 'fs') diff --git a/fs/dlm/config.c b/fs/dlm/config.c index e7e327d43fa5..9ccf7346834a 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -96,7 +96,6 @@ struct dlm_cluster { unsigned int cl_tcp_port; unsigned int cl_buffer_size; unsigned int cl_rsbtbl_size; - unsigned int cl_dirtbl_size; unsigned int cl_recover_timer; unsigned int cl_toss_secs; unsigned int cl_scan_secs; @@ -113,7 +112,6 @@ enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, CLUSTER_ATTR_RSBTBL_SIZE, - CLUSTER_ATTR_DIRTBL_SIZE, CLUSTER_ATTR_RECOVER_TIMER, CLUSTER_ATTR_TOSS_SECS, CLUSTER_ATTR_SCAN_SECS, @@ -189,7 +187,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); CLUSTER_ATTR(rsbtbl_size, 1); -CLUSTER_ATTR(dirtbl_size, 1); CLUSTER_ATTR(recover_timer, 1); CLUSTER_ATTR(toss_secs, 1); CLUSTER_ATTR(scan_secs, 1); @@ -204,7 +201,6 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, - [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, @@ -478,7 +474,6 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_tcp_port = dlm_config.ci_tcp_port; cl->cl_buffer_size = dlm_config.ci_buffer_size; cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; - cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; cl->cl_recover_timer = dlm_config.ci_recover_timer; cl->cl_toss_secs = dlm_config.ci_toss_secs; cl->cl_scan_secs = dlm_config.ci_scan_secs; @@ -1050,7 +1045,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TCP_PORT 21064 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_RSBTBL_SIZE 1024 -#define DEFAULT_DIRTBL_SIZE 1024 #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 #define DEFAULT_SCAN_SECS 5 @@ -1066,7 +1060,6 @@ struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, .ci_buffer_size = DEFAULT_BUFFER_SIZE, .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, - .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, .ci_scan_secs = DEFAULT_SCAN_SECS, diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 9f5e3663bb0c..dbd35a08f3a5 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -27,7 +27,6 @@ struct dlm_config_info { int ci_tcp_port; int ci_buffer_size; int ci_rsbtbl_size; - int ci_dirtbl_size; int ci_recover_timer; int ci_toss_secs; int ci_scan_secs; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 1c9b08095f98..b969deef9ebb 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -344,6 +344,45 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) return rv; } +static int print_format4(struct dlm_rsb *r, struct seq_file *s) +{ + int our_nodeid = dlm_our_nodeid(); + int print_name = 1; + int i, rv; + + lock_rsb(r); + + rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ", + r, + r->res_nodeid, + r->res_master_nodeid, + r->res_dir_nodeid, + our_nodeid, + r->res_toss_time, + r->res_flags, + r->res_length); + if (rv) + goto out; + + for (i = 0; i < r->res_length; i++) { + if (!isascii(r->res_name[i]) || !isprint(r->res_name[i])) + print_name = 0; + } + + seq_printf(s, "%s", print_name ? "str " : "hex"); + + for (i = 0; i < r->res_length; i++) { + if (print_name) + seq_printf(s, "%c", r->res_name[i]); + else + seq_printf(s, " %02x", (unsigned char)r->res_name[i]); + } + rv = seq_printf(s, "\n"); + out: + unlock_rsb(r); + return rv; +} + struct rsbtbl_iter { struct dlm_rsb *rsb; unsigned bucket; @@ -382,6 +421,13 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) } rv = print_format3(ri->rsb, seq); break; + case 4: + if (ri->header) { + seq_printf(seq, "version 4 rsb 2\n"); + ri->header = 0; + } + rv = print_format4(ri->rsb, seq); + break; } return rv; @@ -390,15 +436,18 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) static const struct seq_operations format1_seq_ops; static const struct seq_operations format2_seq_ops; static const struct seq_operations format3_seq_ops; +static const struct seq_operations format4_seq_ops; static void *table_seq_start(struct seq_file *seq, loff_t *pos) { + struct rb_root *tree; struct rb_node *node; struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri; struct dlm_rsb *r; loff_t n = *pos; unsigned bucket, entry; + int toss = (seq->op == &format4_seq_ops); bucket = n >> 32; entry = n & ((1LL << 32) - 1); @@ -417,11 +466,14 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) ri->format = 2; if (seq->op == &format3_seq_ops) ri->format = 3; + if (seq->op == &format4_seq_ops) + ri->format = 4; + + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; - node = rb_next(node)) { + if (!RB_EMPTY_ROOT(tree)) { + for (node = rb_first(tree); node; node = rb_next(node)) { r = rb_entry(node, struct dlm_rsb, res_hashnode); if (!entry--) { dlm_hold_rsb(r); @@ -449,10 +501,11 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) kfree(ri); return NULL; } + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - node = rb_first(&ls->ls_rsbtbl[bucket].keep); + if (!RB_EMPTY_ROOT(tree)) { + node = rb_first(tree); r = rb_entry(node, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; @@ -469,10 +522,12 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri = iter_ptr; + struct rb_root *tree; struct rb_node *next; struct dlm_rsb *r, *rp; loff_t n = *pos; unsigned bucket; + int toss = (seq->op == &format4_seq_ops); bucket = n >> 32; @@ -511,10 +566,11 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) kfree(ri); return NULL; } + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - next = rb_first(&ls->ls_rsbtbl[bucket].keep); + if (!RB_EMPTY_ROOT(tree)) { + next = rb_first(tree); r = rb_entry(next, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; @@ -558,9 +614,17 @@ static const struct seq_operations format3_seq_ops = { .show = table_seq_show, }; +static const struct seq_operations format4_seq_ops = { + .start = table_seq_start, + .next = table_seq_next, + .stop = table_seq_stop, + .show = table_seq_show, +}; + static const struct file_operations format1_fops; static const struct file_operations format2_fops; static const struct file_operations format3_fops; +static const struct file_operations format4_fops; static int table_open(struct inode *inode, struct file *file) { @@ -573,6 +637,8 @@ static int table_open(struct inode *inode, struct file *file) ret = seq_open(file, &format2_seq_ops); else if (file->f_op == &format3_fops) ret = seq_open(file, &format3_seq_ops); + else if (file->f_op == &format4_fops) + ret = seq_open(file, &format4_seq_ops); if (ret) return ret; @@ -606,6 +672,14 @@ static const struct file_operations format3_fops = { .release = seq_release }; +static const struct file_operations format4_fops = { + .owner = THIS_MODULE, + .open = table_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + /* * dump lkb's on the ls_waiters list */ @@ -652,6 +726,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_locks_dentry); if (ls->ls_debug_all_dentry) debugfs_remove(ls->ls_debug_all_dentry); + if (ls->ls_debug_toss_dentry) + debugfs_remove(ls->ls_debug_toss_dentry); } int dlm_create_debug_file(struct dlm_ls *ls) @@ -694,6 +770,19 @@ int dlm_create_debug_file(struct dlm_ls *ls) if (!ls->ls_debug_all_dentry) goto fail; + /* format 4 */ + + memset(name, 0, sizeof(name)); + snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_toss", ls->ls_name); + + ls->ls_debug_toss_dentry = debugfs_create_file(name, + S_IFREG | S_IRUGO, + dlm_root, + ls, + &format4_fops); + if (!ls->ls_debug_toss_dentry) + goto fail; + memset(name, 0, sizeof(name)); snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name); diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index dc5eb598b81f..278a75cda446 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -23,50 +23,6 @@ #include "lock.h" #include "dir.h" - -static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) -{ - spin_lock(&ls->ls_recover_list_lock); - list_add(&de->list, &ls->ls_recover_list); - spin_unlock(&ls->ls_recover_list_lock); -} - -static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) -{ - int found = 0; - struct dlm_direntry *de; - - spin_lock(&ls->ls_recover_list_lock); - list_for_each_entry(de, &ls->ls_recover_list, list) { - if (de->length == len) { - list_del(&de->list); - de->master_nodeid = 0; - memset(de->name, 0, len); - found = 1; - break; - } - } - spin_unlock(&ls->ls_recover_list_lock); - - if (!found) - de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS); - return de; -} - -void dlm_clear_free_entries(struct dlm_ls *ls) -{ - struct dlm_direntry *de; - - spin_lock(&ls->ls_recover_list_lock); - while (!list_empty(&ls->ls_recover_list)) { - de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, - list); - list_del(&de->list); - kfree(de); - } - spin_unlock(&ls->ls_recover_list_lock); -} - /* * We use the upper 16 bits of the hash value to select the directory node. * Low bits are used for distribution of rsb's among hash buckets on each node. @@ -78,144 +34,53 @@ void dlm_clear_free_entries(struct dlm_ls *ls) int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) { - struct list_head *tmp; - struct dlm_member *memb = NULL; - uint32_t node, n = 0; - int nodeid; - - if (ls->ls_num_nodes == 1) { - nodeid = dlm_our_nodeid(); - goto out; - } + uint32_t node; - if (ls->ls_node_array) { + if (ls->ls_num_nodes == 1) + return dlm_our_nodeid(); + else { node = (hash >> 16) % ls->ls_total_weight; - nodeid = ls->ls_node_array[node]; - goto out; - } - - /* make_member_array() failed to kmalloc ls_node_array... */ - - node = (hash >> 16) % ls->ls_num_nodes; - - list_for_each(tmp, &ls->ls_nodes) { - if (n++ != node) - continue; - memb = list_entry(tmp, struct dlm_member, list); - break; + return ls->ls_node_array[node]; } - - DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", - ls->ls_num_nodes, n, node);); - nodeid = memb->nodeid; - out: - return nodeid; } int dlm_dir_nodeid(struct dlm_rsb *r) { - return dlm_hash2nodeid(r->res_ls, r->res_hash); -} - -static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) -{ - uint32_t val; - - val = jhash(name, len, 0); - val &= (ls->ls_dirtbl_size - 1); - - return val; -} - -static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) -{ - uint32_t bucket; - - bucket = dir_hash(ls, de->name, de->length); - list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); + return r->res_dir_nodeid; } -static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, - int namelen, uint32_t bucket) +void dlm_recover_dir_nodeid(struct dlm_ls *ls) { - struct dlm_direntry *de; - - list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { - if (de->length == namelen && !memcmp(name, de->name, namelen)) - goto out; - } - de = NULL; - out: - return de; -} - -void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) -{ - struct dlm_direntry *de; - uint32_t bucket; - - bucket = dir_hash(ls, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - - de = search_bucket(ls, name, namelen, bucket); - - if (!de) { - log_error(ls, "remove fr %u none", nodeid); - goto out; - } - - if (de->master_nodeid != nodeid) { - log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); - goto out; - } - - list_del(&de->list); - kfree(de); - out: - spin_unlock(&ls->ls_dirtbl[bucket].lock); -} + struct dlm_rsb *r; -void dlm_dir_clear(struct dlm_ls *ls) -{ - struct list_head *head; - struct dlm_direntry *de; - int i; - - DLM_ASSERT(list_empty(&ls->ls_recover_list), ); - - for (i = 0; i < ls->ls_dirtbl_size; i++) { - spin_lock(&ls->ls_dirtbl[i].lock); - head = &ls->ls_dirtbl[i].list; - while (!list_empty(head)) { - de = list_entry(head->next, struct dlm_direntry, list); - list_del(&de->list); - put_free_de(ls, de); - } - spin_unlock(&ls->ls_dirtbl[i].lock); + down_read(&ls->ls_root_sem); + list_for_each_entry(r, &ls->ls_root_list, res_root_list) { + r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); } + up_read(&ls->ls_root_sem); } int dlm_recover_directory(struct dlm_ls *ls) { struct dlm_member *memb; - struct dlm_direntry *de; char *b, *last_name = NULL; - int error = -ENOMEM, last_len, count = 0; + int error = -ENOMEM, last_len, nodeid, result; uint16_t namelen; + unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; log_debug(ls, "dlm_recover_directory"); if (dlm_no_directory(ls)) goto out_status; - dlm_dir_clear(ls); - last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); if (!last_name) goto out; list_for_each_entry(memb, &ls->ls_nodes, list) { + if (memb->nodeid == dlm_our_nodeid()) + continue; + memset(last_name, 0, DLM_RESNAME_MAXLEN); last_len = 0; @@ -230,7 +95,7 @@ int dlm_recover_directory(struct dlm_ls *ls) if (error) goto out_free; - schedule(); + cond_resched(); /* * pick namelen/name pairs out of received buffer @@ -267,87 +132,71 @@ int dlm_recover_directory(struct dlm_ls *ls) if (namelen > DLM_RESNAME_MAXLEN) goto out_free; - error = -ENOMEM; - de = get_free_de(ls, namelen); - if (!de) + error = dlm_master_lookup(ls, memb->nodeid, + b, namelen, + DLM_LU_RECOVER_DIR, + &nodeid, &result); + if (error) { + log_error(ls, "recover_dir lookup %d", + error); goto out_free; + } + + /* The name was found in rsbtbl, but the + * master nodeid is different from + * memb->nodeid which says it is the master. + * This should not happen. */ + + if (result == DLM_LU_MATCH && + nodeid != memb->nodeid) { + count_bad++; + log_error(ls, "recover_dir lookup %d " + "nodeid %d memb %d bad %u", + result, nodeid, memb->nodeid, + count_bad); + print_hex_dump_bytes("dlm_recover_dir ", + DUMP_PREFIX_NONE, + b, namelen); + } + + /* The name was found in rsbtbl, and the + * master nodeid matches memb->nodeid. */ + + if (result == DLM_LU_MATCH && + nodeid == memb->nodeid) { + count_match++; + } + + /* The name was not found in rsbtbl and was + * added with memb->nodeid as the master. */ + + if (result == DLM_LU_ADD) { + count_add++; + } - de->master_nodeid = memb->nodeid; - de->length = namelen; last_len = namelen; - memcpy(de->name, b, namelen); memcpy(last_name, b, namelen); b += namelen; left -= namelen; - - add_entry_to_hash(ls, de); count++; } } - done: + done: ; } out_status: error = 0; - log_debug(ls, "dlm_recover_directory %d entries", count); + dlm_set_recover_status(ls, DLM_RS_DIR); + + log_debug(ls, "dlm_recover_directory %u in %u new", + count, count_add); out_free: kfree(last_name); out: - dlm_clear_free_entries(ls); return error; } -static int get_entry(struct dlm_ls *ls, int nodeid, char *name, - int namelen, int *r_nodeid) -{ - struct dlm_direntry *de, *tmp; - uint32_t bucket; - - bucket = dir_hash(ls, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - de = search_bucket(ls, name, namelen, bucket); - if (de) { - *r_nodeid = de->master_nodeid; - spin_unlock(&ls->ls_dirtbl[bucket].lock); - if (*r_nodeid == nodeid) - return -EEXIST; - return 0; - } - - spin_unlock(&ls->ls_dirtbl[bucket].lock); - - if (namelen > DLM_RESNAME_MAXLEN) - return -EINVAL; - - de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS); - if (!de) - return -ENOMEM; - - de->master_nodeid = nodeid; - de->length = namelen; - memcpy(de->name, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - tmp = search_bucket(ls, name, namelen, bucket); - if (tmp) { - kfree(de); - de = tmp; - } else { - list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); - } - *r_nodeid = de->master_nodeid; - spin_unlock(&ls->ls_dirtbl[bucket].lock); - return 0; -} - -int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, - int *r_nodeid) -{ - return get_entry(ls, nodeid, name, namelen, r_nodeid); -} - static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; @@ -358,10 +207,10 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) bucket = hash & (ls->ls_rsbtbl_size - 1); spin_lock(&ls->ls_rsbtbl[bucket].lock); - rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r); + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); if (rv) rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, - name, len, 0, &r); + name, len, &r); spin_unlock(&ls->ls_rsbtbl[bucket].lock); if (!rv) @@ -371,7 +220,7 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) list_for_each_entry(r, &ls->ls_root_list, res_root_list) { if (len == r->res_length && !memcmp(name, r->res_name, len)) { up_read(&ls->ls_root_sem); - log_error(ls, "find_rsb_root revert to root_list %s", + log_debug(ls, "find_rsb_root revert to root_list %s", r->res_name); return r; } @@ -429,6 +278,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, be_namelen = cpu_to_be16(0); memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); offset += sizeof(__be16); + ls->ls_recover_dir_sent_msg++; goto out; } @@ -437,6 +287,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, offset += sizeof(__be16); memcpy(outbuf + offset, r->res_name, r->res_length); offset += r->res_length; + ls->ls_recover_dir_sent_res++; } /* @@ -449,8 +300,8 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, be_namelen = cpu_to_be16(0xFFFF); memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); offset += sizeof(__be16); + ls->ls_recover_dir_sent_msg++; } - out: up_read(&ls->ls_root_sem); } diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h index 0b0eb1267b6e..417506344456 100644 --- a/fs/dlm/dir.h +++ b/fs/dlm/dir.h @@ -14,15 +14,10 @@ #ifndef __DIR_DOT_H__ #define __DIR_DOT_H__ - int dlm_dir_nodeid(struct dlm_rsb *rsb); int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); -void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len); -void dlm_dir_clear(struct dlm_ls *ls); -void dlm_clear_free_entries(struct dlm_ls *ls); +void dlm_recover_dir_nodeid(struct dlm_ls *ls); int dlm_recover_directory(struct dlm_ls *ls); -int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, - int *r_nodeid); void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, char *outbuf, int outlen, int nodeid); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index bc342f7ac3af..3093207a7684 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -55,8 +55,6 @@ struct dlm_lkb; struct dlm_rsb; struct dlm_member; struct dlm_rsbtable; -struct dlm_dirtable; -struct dlm_direntry; struct dlm_recover; struct dlm_header; struct dlm_message; @@ -98,18 +96,6 @@ do { \ } -struct dlm_direntry { - struct list_head list; - uint32_t master_nodeid; - uint16_t length; - char name[1]; -}; - -struct dlm_dirtable { - struct list_head list; - spinlock_t lock; -}; - struct dlm_rsbtable { struct rb_root keep; struct rb_root toss; @@ -283,6 +269,15 @@ struct dlm_lkb { }; }; +/* + * res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real + * nodeid, even when nodeid is our_nodeid. + * + * res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid, + * greater than zero when another nodeid. + * + * (TODO: remove res_nodeid and only use res_master_nodeid) + */ struct dlm_rsb { struct dlm_ls *res_ls; /* the lockspace */ @@ -291,6 +286,8 @@ struct dlm_rsb { unsigned long res_flags; int res_length; /* length of rsb name */ int res_nodeid; + int res_master_nodeid; + int res_dir_nodeid; uint32_t res_lvbseq; uint32_t res_hash; uint32_t res_bucket; /* rsbtbl */ @@ -313,10 +310,21 @@ struct dlm_rsb { char res_name[DLM_RESNAME_MAXLEN+1]; }; +/* dlm_master_lookup() flags */ + +#define DLM_LU_RECOVER_DIR 1 +#define DLM_LU_RECOVER_MASTER 2 + +/* dlm_master_lookup() results */ + +#define DLM_LU_MATCH 1 +#define DLM_LU_ADD 2 + /* find_rsb() flags */ -#define R_MASTER 1 /* only return rsb if it's a master */ -#define R_CREATE 2 /* create/add rsb if not found */ +#define R_REQUEST 0x00000001 +#define R_RECEIVE_REQUEST 0x00000002 +#define R_RECEIVE_RECOVER 0x00000004 /* rsb_flags */ @@ -509,9 +517,6 @@ struct dlm_ls { struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; - struct dlm_dirtable *ls_dirtbl; - uint32_t ls_dirtbl_size; - struct mutex ls_waiters_mutex; struct list_head ls_waiters; /* lkbs needing a reply */ @@ -545,6 +550,7 @@ struct dlm_ls { struct dentry *ls_debug_waiters_dentry; /* debugfs */ struct dentry *ls_debug_locks_dentry; /* debugfs */ struct dentry *ls_debug_all_dentry; /* debugfs */ + struct dentry *ls_debug_toss_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; @@ -573,6 +579,8 @@ struct dlm_ls { struct mutex ls_requestqueue_mutex; struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ + unsigned int ls_recover_dir_sent_res; /* for log info */ + unsigned int ls_recover_dir_sent_msg; /* for log info */ unsigned int ls_recover_locks_in; /* for log info */ uint64_t ls_rcom_seq; spinlock_t ls_rcom_spin; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index bdafb65a5234..d9ee1b96549a 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -90,6 +90,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); static void del_timeout(struct dlm_lkb *lkb); +static void toss_rsb(struct kref *kref); /* * Lock compatibilty matrix - thanks Steve @@ -170,9 +171,11 @@ void dlm_print_lkb(struct dlm_lkb *lkb) static void dlm_print_rsb(struct dlm_rsb *r) { - printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", - r->res_nodeid, r->res_flags, r->res_first_lkid, - r->res_recover_locks_count, r->res_name); + printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x " + "rlc %d name %s\n", + r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid, + r->res_flags, r->res_first_lkid, r->res_recover_locks_count, + r->res_name); } void dlm_dump_rsb(struct dlm_rsb *r) @@ -327,6 +330,37 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) * Basic operations on rsb's and lkb's */ +/* This is only called to add a reference when the code already holds + a valid reference to the rsb, so there's no need for locking. */ + +static inline void hold_rsb(struct dlm_rsb *r) +{ + kref_get(&r->res_ref); +} + +void dlm_hold_rsb(struct dlm_rsb *r) +{ + hold_rsb(r); +} + +/* When all references to the rsb are gone it's transferred to + the tossed list for later disposal. */ + +static void put_rsb(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + uint32_t bucket = r->res_bucket; + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + kref_put(&r->res_ref, toss_rsb); + spin_unlock(&ls->ls_rsbtbl[bucket].lock); +} + +void dlm_put_rsb(struct dlm_rsb *r) +{ + put_rsb(r); +} + static int pre_rsb_struct(struct dlm_ls *ls) { struct dlm_rsb *r1, *r2; @@ -411,11 +445,10 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) } int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, - unsigned int flags, struct dlm_rsb **r_ret) + struct dlm_rsb **r_ret) { struct rb_node *node = tree->rb_node; struct dlm_rsb *r; - int error = 0; int rc; while (node) { @@ -432,10 +465,8 @@ int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, return -EBADR; found: - if (r->res_nodeid && (flags & R_MASTER)) - error = -ENOTBLK; *r_ret = r; - return error; + return 0; } static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) @@ -467,124 +498,587 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) return 0; } -static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, - unsigned int flags, struct dlm_rsb **r_ret) +/* + * Find rsb in rsbtbl and potentially create/add one + * + * Delaying the release of rsb's has a similar benefit to applications keeping + * NL locks on an rsb, but without the guarantee that the cached master value + * will still be valid when the rsb is reused. Apps aren't always smart enough + * to keep NL locks on an rsb that they may lock again shortly; this can lead + * to excessive master lookups and removals if we don't delay the release. + * + * Searching for an rsb means looking through both the normal list and toss + * list. When found on the toss list the rsb is moved to the normal list with + * ref count of 1; when found on normal list the ref count is incremented. + * + * rsb's on the keep list are being used locally and refcounted. + * rsb's on the toss list are not being used locally, and are not refcounted. + * + * The toss list rsb's were either + * - previously used locally but not any more (were on keep list, then + * moved to toss list when last refcount dropped) + * - created and put on toss list as a directory record for a lookup + * (we are the dir node for the res, but are not using the res right now, + * but some other node is) + * + * The purpose of find_rsb() is to return a refcounted rsb for local use. + * So, if the given rsb is on the toss list, it is moved to the keep list + * before being returned. + * + * toss_rsb() happens when all local usage of the rsb is done, i.e. no + * more refcounts exist, so the rsb is moved from the keep list to the + * toss list. + * + * rsb's on both keep and toss lists are used for doing a name to master + * lookups. rsb's that are in use locally (and being refcounted) are on + * the keep list, rsb's that are not in use locally (not refcounted) and + * only exist for name/master lookups are on the toss list. + * + * rsb's on the toss list who's dir_nodeid is not local can have stale + * name/master mappings. So, remote requests on such rsb's can potentially + * return with an error, which means the mapping is stale and needs to + * be updated with a new lookup. (The idea behind MASTER UNCERTAIN and + * first_lkid is to keep only a single outstanding request on an rsb + * while that rsb has a potentially stale master.) + */ + +static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, + uint32_t hash, uint32_t b, + int dir_nodeid, int from_nodeid, + unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r; + struct dlm_rsb *r = NULL; + int our_nodeid = dlm_our_nodeid(); + int from_local = 0; + int from_other = 0; + int from_dir = 0; + int create = 0; int error; - error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); - if (!error) { - kref_get(&r->res_ref); - goto out; + if (flags & R_RECEIVE_REQUEST) { + if (from_nodeid == dir_nodeid) + from_dir = 1; + else + from_other = 1; + } else if (flags & R_REQUEST) { + from_local = 1; + } + + /* + * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so + * from_nodeid has sent us a lock in dlm_recover_locks, believing + * we're the new master. Our local recovery may not have set + * res_master_nodeid to our_nodeid yet, so allow either. Don't + * create the rsb; dlm_recover_process_copy() will handle EBADR + * by resending. + * + * If someone sends us a request, we are the dir node, and we do + * not find the rsb anywhere, then recreate it. This happens if + * someone sends us a request after we have removed/freed an rsb + * from our toss list. (They sent a request instead of lookup + * because they are using an rsb from their toss list.) + */ + + if (from_local || from_dir || + (from_other && (dir_nodeid == our_nodeid))) { + create = 1; } - if (error == -ENOTBLK) - goto out; - error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + retry: + if (create) { + error = pre_rsb_struct(ls); + if (error < 0) + goto out; + } + + spin_lock(&ls->ls_rsbtbl[b].lock); + + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); if (error) - goto out; + goto do_toss; + + /* + * rsb is active, so we can't check master_nodeid without lock_rsb. + */ - rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); - error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + kref_get(&r->res_ref); + error = 0; + goto out_unlock; + + + do_toss: + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); if (error) - return error; + goto do_new; - if (dlm_no_directory(ls)) - goto out; + /* + * rsb found inactive (master_nodeid may be out of date unless + * we are the dir_nodeid or were the master) No other thread + * is using this rsb because it's on the toss list, so we can + * look at or update res_master_nodeid without lock_rsb. + */ - if (r->res_nodeid == -1) { + if ((r->res_master_nodeid != our_nodeid) && from_other) { + /* our rsb was not master, and another node (not the dir node) + has sent us a request */ + log_debug(ls, "find_rsb toss from_other %d master %d dir %d %s", + from_nodeid, r->res_master_nodeid, dir_nodeid, + r->res_name); + error = -ENOTBLK; + goto out_unlock; + } + + if ((r->res_master_nodeid != our_nodeid) && from_dir) { + /* don't think this should ever happen */ + log_error(ls, "find_rsb toss from_dir %d master %d", + from_nodeid, r->res_master_nodeid); + dlm_print_rsb(r); + /* fix it and go on */ + r->res_master_nodeid = our_nodeid; + r->res_nodeid = 0; rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); r->res_first_lkid = 0; - } else if (r->res_nodeid > 0) { + } + + if (from_local && (r->res_master_nodeid != our_nodeid)) { + /* Because we have held no locks on this rsb, + res_master_nodeid could have become stale. */ rsb_set_flag(r, RSB_MASTER_UNCERTAIN); r->res_first_lkid = 0; + } + + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + goto out_unlock; + + + do_new: + /* + * rsb not found + */ + + if (error == -EBADR && !create) + goto out_unlock; + + error = get_rsb_struct(ls, name, len, &r); + if (error == -EAGAIN) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + goto retry; + } + if (error) + goto out_unlock; + + r->res_hash = hash; + r->res_bucket = b; + r->res_dir_nodeid = dir_nodeid; + kref_init(&r->res_ref); + + if (from_dir) { + /* want to see how often this happens */ + log_debug(ls, "find_rsb new from_dir %d recreate %s", + from_nodeid, r->res_name); + r->res_master_nodeid = our_nodeid; + r->res_nodeid = 0; + goto out_add; + } + + if (from_other && (dir_nodeid != our_nodeid)) { + /* should never happen */ + log_error(ls, "find_rsb new from_other %d dir %d our %d %s", + from_nodeid, dir_nodeid, our_nodeid, r->res_name); + dlm_free_rsb(r); + error = -ENOTBLK; + goto out_unlock; + } + + if (from_other) { + log_debug(ls, "find_rsb new from_other %d dir %d %s", + from_nodeid, dir_nodeid, r->res_name); + } + + if (dir_nodeid == our_nodeid) { + /* When we are the dir nodeid, we can set the master + node immediately */ + r->res_master_nodeid = our_nodeid; + r->res_nodeid = 0; } else { - DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); - DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); + /* set_master will send_lookup to dir_nodeid */ + r->res_master_nodeid = 0; + r->res_nodeid = -1; } + + out_add: + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + out_unlock: + spin_unlock(&ls->ls_rsbtbl[b].lock); out: *r_ret = r; return error; } +/* During recovery, other nodes can send us new MSTCPY locks (from + dlm_recover_locks) before we've made ourself master (in + dlm_recover_masters). */ + +static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, + uint32_t hash, uint32_t b, + int dir_nodeid, int from_nodeid, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r = NULL; + int our_nodeid = dlm_our_nodeid(); + int recover = (flags & R_RECEIVE_RECOVER); + int error; + + retry: + error = pre_rsb_struct(ls); + if (error < 0) + goto out; + + spin_lock(&ls->ls_rsbtbl[b].lock); + + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); + if (error) + goto do_toss; + + /* + * rsb is active, so we can't check master_nodeid without lock_rsb. + */ + + kref_get(&r->res_ref); + goto out_unlock; + + + do_toss: + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (error) + goto do_new; + + /* + * rsb found inactive. No other thread is using this rsb because + * it's on the toss list, so we can look at or update + * res_master_nodeid without lock_rsb. + */ + + if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) { + /* our rsb is not master, and another node has sent us a + request; this should never happen */ + log_error(ls, "find_rsb toss from_nodeid %d master %d dir %d", + from_nodeid, r->res_master_nodeid, dir_nodeid); + dlm_print_rsb(r); + error = -ENOTBLK; + goto out_unlock; + } + + if (!recover && (r->res_master_nodeid != our_nodeid) && + (dir_nodeid == our_nodeid)) { + /* our rsb is not master, and we are dir; may as well fix it; + this should never happen */ + log_error(ls, "find_rsb toss our %d master %d dir %d", + our_nodeid, r->res_master_nodeid, dir_nodeid); + dlm_print_rsb(r); + r->res_master_nodeid = our_nodeid; + r->res_nodeid = 0; + } + + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + goto out_unlock; + + + do_new: + /* + * rsb not found + */ + + error = get_rsb_struct(ls, name, len, &r); + if (error == -EAGAIN) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + goto retry; + } + if (error) + goto out_unlock; + + r->res_hash = hash; + r->res_bucket = b; + r->res_dir_nodeid = dir_nodeid; + r->res_master_nodeid = dir_nodeid; + r->res_nodeid = (dir_nodeid == our_nodeid) ? 0 : dir_nodeid; + kref_init(&r->res_ref); + + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + out_unlock: + spin_unlock(&ls->ls_rsbtbl[b].lock); + out: + *r_ret = r; + return error; +} + +static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid, + unsigned int flags, struct dlm_rsb **r_ret) +{ + uint32_t hash, b; + int dir_nodeid; + + if (len > DLM_RESNAME_MAXLEN) + return -EINVAL; + + hash = jhash(name, len, 0); + b = hash & (ls->ls_rsbtbl_size - 1); + + dir_nodeid = dlm_hash2nodeid(ls, hash); + + if (dlm_no_directory(ls)) + return find_rsb_nodir(ls, name, len, hash, b, dir_nodeid, + from_nodeid, flags, r_ret); + else + return find_rsb_dir(ls, name, len, hash, b, dir_nodeid, + from_nodeid, flags, r_ret); +} + +/* we have received a request and found that res_master_nodeid != our_nodeid, + so we need to return an error or make ourself the master */ + +static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r, + int from_nodeid) +{ + if (dlm_no_directory(ls)) { + log_error(ls, "find_rsb keep from_nodeid %d master %d dir %d", + from_nodeid, r->res_master_nodeid, + r->res_dir_nodeid); + dlm_print_rsb(r); + return -ENOTBLK; + } + + if (from_nodeid != r->res_dir_nodeid) { + /* our rsb is not master, and another node (not the dir node) + has sent us a request. this is much more common when our + master_nodeid is zero, so limit debug to non-zero. */ + + if (r->res_master_nodeid) { + log_debug(ls, "validate master from_other %d master %d " + "dir %d first %x %s", from_nodeid, + r->res_master_nodeid, r->res_dir_nodeid, + r->res_first_lkid, r->res_name); + } + return -ENOTBLK; + } else { + /* our rsb is not master, but the dir nodeid has sent us a + request; this could happen with master 0 / res_nodeid -1 */ + + if (r->res_master_nodeid) { + log_error(ls, "validate master from_dir %d master %d " + "first %x %s", + from_nodeid, r->res_master_nodeid, + r->res_first_lkid, r->res_name); + } + + r->res_master_nodeid = dlm_our_nodeid(); + r->res_nodeid = 0; + return 0; + } +} + /* - * Find rsb in rsbtbl and potentially create/add one + * We're the dir node for this res and another node wants to know the + * master nodeid. During normal operation (non recovery) this is only + * called from receive_lookup(); master lookups when the local node is + * the dir node are done by find_rsb(). * - * Delaying the release of rsb's has a similar benefit to applications keeping - * NL locks on an rsb, but without the guarantee that the cached master value - * will still be valid when the rsb is reused. Apps aren't always smart enough - * to keep NL locks on an rsb that they may lock again shortly; this can lead - * to excessive master lookups and removals if we don't delay the release. + * normal operation, we are the dir node for a resource + * . _request_lock + * . set_master + * . send_lookup + * . receive_lookup + * . dlm_master_lookup flags 0 * - * Searching for an rsb means looking through both the normal list and toss - * list. When found on the toss list the rsb is moved to the normal list with - * ref count of 1; when found on normal list the ref count is incremented. + * recover directory, we are rebuilding dir for all resources + * . dlm_recover_directory + * . dlm_rcom_names + * remote node sends back the rsb names it is master of and we are dir of + * . dlm_master_lookup RECOVER_DIR (fix_master 0, from_master 1) + * we either create new rsb setting remote node as master, or find existing + * rsb and set master to be the remote node. + * + * recover masters, we are finding the new master for resources + * . dlm_recover_masters + * . recover_master + * . dlm_send_rcom_lookup + * . receive_rcom_lookup + * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0) */ -static int find_rsb(struct dlm_ls *ls, char *name, int namelen, - unsigned int flags, struct dlm_rsb **r_ret) +int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, + unsigned int flags, int *r_nodeid, int *result) { struct dlm_rsb *r = NULL; - uint32_t hash, bucket; - int error; + uint32_t hash, b; + int from_master = (flags & DLM_LU_RECOVER_DIR); + int fix_master = (flags & DLM_LU_RECOVER_MASTER); + int our_nodeid = dlm_our_nodeid(); + int dir_nodeid, error, toss_list = 0; - if (namelen > DLM_RESNAME_MAXLEN) { - error = -EINVAL; - goto out; + if (len > DLM_RESNAME_MAXLEN) + return -EINVAL; + + if (from_nodeid == our_nodeid) { + log_error(ls, "dlm_master_lookup from our_nodeid %d flags %x", + our_nodeid, flags); + return -EINVAL; } - if (dlm_no_directory(ls)) - flags |= R_CREATE; + hash = jhash(name, len, 0); + b = hash & (ls->ls_rsbtbl_size - 1); - hash = jhash(name, namelen, 0); - bucket = hash & (ls->ls_rsbtbl_size - 1); + dir_nodeid = dlm_hash2nodeid(ls, hash); + if (dir_nodeid != our_nodeid) { + log_error(ls, "dlm_master_lookup from %d dir %d our %d h %x %d", + from_nodeid, dir_nodeid, our_nodeid, hash, + ls->ls_num_nodes); + *r_nodeid = -1; + return -EINVAL; + } retry: - if (flags & R_CREATE) { - error = pre_rsb_struct(ls); - if (error < 0) - goto out; + error = pre_rsb_struct(ls); + if (error < 0) + return error; + + spin_lock(&ls->ls_rsbtbl[b].lock); + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); + if (!error) { + /* because the rsb is active, we need to lock_rsb before + checking/changing re_master_nodeid */ + + hold_rsb(r); + spin_unlock(&ls->ls_rsbtbl[b].lock); + lock_rsb(r); + goto found; } - spin_lock(&ls->ls_rsbtbl[bucket].lock); + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (error) + goto not_found; - error = _search_rsb(ls, name, namelen, bucket, flags, &r); - if (!error) - goto out_unlock; + /* because the rsb is inactive (on toss list), it's not refcounted + and lock_rsb is not used, but is protected by the rsbtbl lock */ - if (error == -EBADR && !(flags & R_CREATE)) - goto out_unlock; + toss_list = 1; + found: + if (r->res_dir_nodeid != our_nodeid) { + /* should not happen, but may as well fix it and carry on */ + log_error(ls, "dlm_master_lookup res_dir %d our %d %s", + r->res_dir_nodeid, our_nodeid, r->res_name); + r->res_dir_nodeid = our_nodeid; + } + + if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) { + /* Recovery uses this function to set a new master when + the previous master failed. Setting NEW_MASTER will + force dlm_recover_masters to call recover_master on this + rsb even though the res_nodeid is no longer removed. */ + + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + rsb_set_flag(r, RSB_NEW_MASTER); + + if (toss_list) { + /* I don't think we should ever find it on toss list. */ + log_error(ls, "dlm_master_lookup fix_master on toss"); + dlm_dump_rsb(r); + } + } - /* the rsb was found but wasn't a master copy */ - if (error == -ENOTBLK) - goto out_unlock; + if (from_master && (r->res_master_nodeid != from_nodeid)) { + /* this will happen if from_nodeid became master during + a previous recovery cycle, and we aborted the previous + cycle before recovering this master value */ - error = get_rsb_struct(ls, name, namelen, &r); + log_limit(ls, "dlm_master_lookup from_master %d " + "master_nodeid %d res_nodeid %d first %x %s", + from_nodeid, r->res_master_nodeid, r->res_nodeid, + r->res_first_lkid, r->res_name); + + if (r->res_master_nodeid == our_nodeid) { + log_error(ls, "from_master %d our_master", from_nodeid); + dlm_dump_rsb(r); + dlm_send_rcom_lookup_dump(r, from_nodeid); + goto out_found; + } + + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + rsb_set_flag(r, RSB_NEW_MASTER); + } + + if (!r->res_master_nodeid) { + /* this will happen if recovery happens while we're looking + up the master for this rsb */ + + log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s", + from_nodeid, r->res_first_lkid, r->res_name); + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + } + + if (!from_master && !fix_master && + (r->res_master_nodeid == from_nodeid)) { + /* this can happen when the master sends remove, the dir node + finds the rsb on the keep list and ignores the remove, + and the former master sends a lookup */ + + log_limit(ls, "dlm_master_lookup from master %d flags %x " + "first %x %s", from_nodeid, flags, + r->res_first_lkid, r->res_name); + } + + out_found: + *r_nodeid = r->res_master_nodeid; + if (result) + *result = DLM_LU_MATCH; + + if (toss_list) { + r->res_toss_time = jiffies; + /* the rsb was inactive (on toss list) */ + spin_unlock(&ls->ls_rsbtbl[b].lock); + } else { + /* the rsb was active */ + unlock_rsb(r); + put_rsb(r); + } + return 0; + + not_found: + error = get_rsb_struct(ls, name, len, &r); if (error == -EAGAIN) { - spin_unlock(&ls->ls_rsbtbl[bucket].lock); + spin_unlock(&ls->ls_rsbtbl[b].lock); goto retry; } if (error) goto out_unlock; r->res_hash = hash; - r->res_bucket = bucket; - r->res_nodeid = -1; + r->res_bucket = b; + r->res_dir_nodeid = our_nodeid; + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; kref_init(&r->res_ref); + r->res_toss_time = jiffies; - /* With no directory, the master can be set immediately */ - if (dlm_no_directory(ls)) { - int nodeid = dlm_dir_nodeid(r); - if (nodeid == dlm_our_nodeid()) - nodeid = 0; - r->res_nodeid = nodeid; + error = rsb_insert(r, &ls->ls_rsbtbl[b].toss); + if (error) { + /* should never happen */ + dlm_free_rsb(r); + spin_unlock(&ls->ls_rsbtbl[b].lock); + goto retry; } - error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep); + + if (result) + *result = DLM_LU_ADD; + *r_nodeid = from_nodeid; + error = 0; out_unlock: - spin_unlock(&ls->ls_rsbtbl[bucket].lock); - out: - *r_ret = r; + spin_unlock(&ls->ls_rsbtbl[b].lock); return error; } @@ -605,17 +1099,27 @@ static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash) } } -/* This is only called to add a reference when the code already holds - a valid reference to the rsb, so there's no need for locking. */ - -static inline void hold_rsb(struct dlm_rsb *r) +void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len) { - kref_get(&r->res_ref); -} + struct dlm_rsb *r = NULL; + uint32_t hash, b; + int error; -void dlm_hold_rsb(struct dlm_rsb *r) -{ - hold_rsb(r); + hash = jhash(name, len, 0); + b = hash & (ls->ls_rsbtbl_size - 1); + + spin_lock(&ls->ls_rsbtbl[b].lock); + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); + if (!error) + goto out_dump; + + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (error) + goto out; + out_dump: + dlm_dump_rsb(r); + out: + spin_unlock(&ls->ls_rsbtbl[b].lock); } static void toss_rsb(struct kref *kref) @@ -634,24 +1138,6 @@ static void toss_rsb(struct kref *kref) } } -/* When all references to the rsb are gone it's transferred to - the tossed list for later disposal. */ - -static void put_rsb(struct dlm_rsb *r) -{ - struct dlm_ls *ls = r->res_ls; - uint32_t bucket = r->res_bucket; - - spin_lock(&ls->ls_rsbtbl[bucket].lock); - kref_put(&r->res_ref, toss_rsb); - spin_unlock(&ls->ls_rsbtbl[bucket].lock); -} - -void dlm_put_rsb(struct dlm_rsb *r) -{ - put_rsb(r); -} - /* See comment for unhold_lkb */ static void unhold_rsb(struct dlm_rsb *r) @@ -1138,27 +1624,13 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) return error; } -static void dir_remove(struct dlm_rsb *r) -{ - int to_nodeid; - - if (dlm_no_directory(r->res_ls)) - return; - - to_nodeid = dlm_dir_nodeid(r); - if (to_nodeid != dlm_our_nodeid()) - send_remove(r); - else - dlm_dir_remove_entry(r->res_ls, to_nodeid, - r->res_name, r->res_length); -} - /* FIXME: make this more efficient */ static int shrink_bucket(struct dlm_ls *ls, int b) { struct rb_node *n; struct dlm_rsb *r; + int our_nodeid = dlm_our_nodeid(); int count = 0, found; for (;;) { @@ -1166,6 +1638,17 @@ static int shrink_bucket(struct dlm_ls *ls, int b) spin_lock(&ls->ls_rsbtbl[b].lock); for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { r = rb_entry(n, struct dlm_rsb, res_hashnode); + + /* If we're the directory record for this rsb, and + we're not the master of it, then we need to wait + for the master node to send us a dir remove for + before removing the dir record. */ + + if (!dlm_no_directory(ls) && !is_master(r) && + (dlm_dir_nodeid(r) == our_nodeid)) { + continue; + } + if (!time_after_eq(jiffies, r->res_toss_time + dlm_config.ci_toss_secs * HZ)) continue; @@ -1182,8 +1665,15 @@ static int shrink_bucket(struct dlm_ls *ls, int b) rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); spin_unlock(&ls->ls_rsbtbl[b].lock); - if (is_master(r)) - dir_remove(r); + /* We're the master of this rsb but we're not + the directory record, so we need to tell the + dir node to remove the dir record. */ + + if (!dlm_no_directory(ls) && is_master(r) && + (dlm_dir_nodeid(r) != our_nodeid)) { + send_remove(r); + } + dlm_free_rsb(r); count++; } else { @@ -2078,8 +2568,7 @@ static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) { - struct dlm_ls *ls = r->res_ls; - int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); + int our_nodeid = dlm_our_nodeid(); if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); @@ -2093,53 +2582,35 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) return 1; } - if (r->res_nodeid == 0) { + if (r->res_master_nodeid == our_nodeid) { lkb->lkb_nodeid = 0; return 0; } - if (r->res_nodeid > 0) { - lkb->lkb_nodeid = r->res_nodeid; + if (r->res_master_nodeid) { + lkb->lkb_nodeid = r->res_master_nodeid; return 0; } - DLM_ASSERT(r->res_nodeid == -1, dlm_dump_rsb(r);); - - dir_nodeid = dlm_dir_nodeid(r); - - if (dir_nodeid != our_nodeid) { - r->res_first_lkid = lkb->lkb_id; - send_lookup(r, lkb); - return 1; - } - - for (i = 0; i < 2; i++) { - /* It's possible for dlm_scand to remove an old rsb for - this same resource from the toss list, us to create - a new one, look up the master locally, and find it - already exists just before dlm_scand does the - dir_remove() on the previous rsb. */ - - error = dlm_dir_lookup(ls, our_nodeid, r->res_name, - r->res_length, &ret_nodeid); - if (!error) - break; - log_debug(ls, "dir_lookup error %d %s", error, r->res_name); - schedule(); - } - if (error && error != -EEXIST) - return error; - - if (ret_nodeid == our_nodeid) { - r->res_first_lkid = 0; + if (dlm_dir_nodeid(r) == our_nodeid) { + /* This is a somewhat unusual case; find_rsb will usually + have set res_master_nodeid when dir nodeid is local, but + there are cases where we become the dir node after we've + past find_rsb and go through _request_lock again. + confirm_master() or process_lookup_list() needs to be + called after this. */ + log_debug(r->res_ls, "set_master %x self master %d dir %d %s", + lkb->lkb_id, r->res_master_nodeid, r->res_dir_nodeid, + r->res_name); + r->res_master_nodeid = our_nodeid; r->res_nodeid = 0; lkb->lkb_nodeid = 0; - } else { - r->res_first_lkid = lkb->lkb_id; - r->res_nodeid = ret_nodeid; - lkb->lkb_nodeid = ret_nodeid; + return 0; } - return 0; + + r->res_first_lkid = lkb->lkb_id; + send_lookup(r, lkb); + return 1; } static void process_lookup_list(struct dlm_rsb *r) @@ -2584,7 +3055,7 @@ static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, } /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ - + static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error; @@ -2708,11 +3179,11 @@ static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, error = validate_lock_args(ls, lkb, args); if (error) - goto out; + return error; - error = find_rsb(ls, name, len, R_CREATE, &r); + error = find_rsb(ls, name, len, 0, R_REQUEST, &r); if (error) - goto out; + return error; lock_rsb(r); @@ -2723,8 +3194,6 @@ static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, unlock_rsb(r); put_rsb(r); - - out: return error; } @@ -3406,8 +3875,11 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) { struct dlm_lkb *lkb; struct dlm_rsb *r; + int from_nodeid; int error, namelen; + from_nodeid = ms->m_header.h_nodeid; + error = create_lkb(ls, &lkb); if (error) goto fail; @@ -3420,9 +3892,16 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) goto fail; } + /* The dir node is the authority on whether we are the master + for this rsb or not, so if the master sends us a request, we should + recreate the rsb if we've destroyed it. This race happens when we + send a remove message to the dir node at the same time that the dir + node sends us a request for the rsb. */ + namelen = receive_extralen(ms); - error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); + error = find_rsb(ls, ms->m_extra, namelen, from_nodeid, + R_RECEIVE_REQUEST, &r); if (error) { __put_lkb(ls, lkb); goto fail; @@ -3430,6 +3909,16 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) lock_rsb(r); + if (r->res_master_nodeid != dlm_our_nodeid()) { + error = validate_master_nodeid(ls, r, from_nodeid); + if (error) { + unlock_rsb(r); + put_rsb(r); + __put_lkb(ls, lkb); + goto fail; + } + } + attach_lkb(r, lkb); error = do_request(r, lkb); send_request_reply(r, lkb, error); @@ -3445,6 +3934,23 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) return 0; fail: + /* TODO: instead of returning ENOTBLK, add the lkb to res_lookup + and do this receive_request again from process_lookup_list once + we get the lookup reply. This would avoid a many repeated + ENOTBLK request failures when the lookup reply designating us + as master is delayed. */ + + /* We could repeatedly return -EBADR here if our send_remove() is + delayed in being sent/arriving/being processed on the dir node. + Another node would repeatedly lookup up the master, and the dir + node would continue returning our nodeid until our send_remove + took effect. */ + + if (error != -ENOTBLK) { + log_limit(ls, "receive_request %x from %d %d", + ms->m_lkid, from_nodeid, error); + } + setup_stub_lkb(ls, ms); send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); return error; @@ -3651,49 +4157,110 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms) static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) { - int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid; + int len, error, ret_nodeid, from_nodeid, our_nodeid; from_nodeid = ms->m_header.h_nodeid; our_nodeid = dlm_our_nodeid(); len = receive_extralen(ms); - dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); - if (dir_nodeid != our_nodeid) { - log_error(ls, "lookup dir_nodeid %d from %d", - dir_nodeid, from_nodeid); - error = -EINVAL; - ret_nodeid = -1; - goto out; - } - - error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid); + error = dlm_master_lookup(ls, from_nodeid, ms->m_extra, len, 0, + &ret_nodeid, NULL); /* Optimization: we're master so treat lookup as a request */ if (!error && ret_nodeid == our_nodeid) { receive_request(ls, ms); return; } - out: send_lookup_reply(ls, ms, ret_nodeid, error); } static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) { - int len, dir_nodeid, from_nodeid; + char name[DLM_RESNAME_MAXLEN+1]; + struct dlm_rsb *r; + uint32_t hash, b; + int rv, len, dir_nodeid, from_nodeid; from_nodeid = ms->m_header.h_nodeid; len = receive_extralen(ms); + if (len > DLM_RESNAME_MAXLEN) { + log_error(ls, "receive_remove from %d bad len %d", + from_nodeid, len); + return; + } + dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); if (dir_nodeid != dlm_our_nodeid()) { - log_error(ls, "remove dir entry dir_nodeid %d from %d", - dir_nodeid, from_nodeid); + log_error(ls, "receive_remove from %d bad nodeid %d", + from_nodeid, dir_nodeid); return; } - dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); + /* Look for name on rsbtbl.toss, if it's there, kill it. + If it's on rsbtbl.keep, it's being used, and we should ignore this + message. This is an expected race between the dir node sending a + request to the master node at the same time as the master node sends + a remove to the dir node. The resolution to that race is for the + dir node to ignore the remove message, and the master node to + recreate the master rsb when it gets a request from the dir node for + an rsb it doesn't have. */ + + memset(name, 0, sizeof(name)); + memcpy(name, ms->m_extra, len); + + hash = jhash(name, len, 0); + b = hash & (ls->ls_rsbtbl_size - 1); + + spin_lock(&ls->ls_rsbtbl[b].lock); + + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (rv) { + /* verify the rsb is on keep list per comment above */ + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); + if (rv) { + /* should not happen */ + log_error(ls, "receive_remove from %d not found %s", + from_nodeid, name); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return; + } + if (r->res_master_nodeid != from_nodeid) { + /* should not happen */ + log_error(ls, "receive_remove keep from %d master %d", + from_nodeid, r->res_master_nodeid); + dlm_print_rsb(r); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return; + } + + log_debug(ls, "receive_remove from %d master %d first %x %s", + from_nodeid, r->res_master_nodeid, r->res_first_lkid, + name); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return; + } + + if (r->res_master_nodeid != from_nodeid) { + log_error(ls, "receive_remove toss from %d master %d", + from_nodeid, r->res_master_nodeid); + dlm_print_rsb(r); + spin_unlock(&ls->ls_rsbtbl[b].lock); + return; + } + + if (kref_put(&r->res_ref, kill_rsb)) { + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + spin_unlock(&ls->ls_rsbtbl[b].lock); + dlm_free_rsb(r); + } else { + log_error(ls, "receive_remove from %d rsb ref error", + from_nodeid); + dlm_print_rsb(r); + spin_unlock(&ls->ls_rsbtbl[b].lock); + } } static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) @@ -3706,6 +4273,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; struct dlm_rsb *r; int error, mstype, result; + int from_nodeid = ms->m_header.h_nodeid; error = find_lkb(ls, ms->m_remid, &lkb); if (error) @@ -3723,8 +4291,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); if (error) { log_error(ls, "receive_request_reply %x remote %d %x result %d", - lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid, - ms->m_result); + lkb->lkb_id, from_nodeid, ms->m_lkid, ms->m_result); dlm_dump_rsb(r); goto out; } @@ -3732,8 +4299,9 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) /* Optimization: the dir node was also the master, so it took our lookup as a request and sent request reply instead of lookup reply */ if (mstype == DLM_MSG_LOOKUP) { - r->res_nodeid = ms->m_header.h_nodeid; - lkb->lkb_nodeid = r->res_nodeid; + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + lkb->lkb_nodeid = from_nodeid; } /* this is the value returned from do_request() on the master */ @@ -3767,18 +4335,30 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) case -EBADR: case -ENOTBLK: /* find_rsb failed to find rsb or rsb wasn't master */ - log_debug(ls, "receive_request_reply %x %x master diff %d %d", - lkb->lkb_id, lkb->lkb_flags, r->res_nodeid, result); - r->res_nodeid = -1; - lkb->lkb_nodeid = -1; + log_limit(ls, "receive_request_reply %x from %d %d " + "master %d dir %d first %x %s", lkb->lkb_id, + from_nodeid, result, r->res_master_nodeid, + r->res_dir_nodeid, r->res_first_lkid, r->res_name); + + if (r->res_dir_nodeid != dlm_our_nodeid() && + r->res_master_nodeid != dlm_our_nodeid()) { + /* cause _request_lock->set_master->send_lookup */ + r->res_master_nodeid = 0; + r->res_nodeid = -1; + lkb->lkb_nodeid = -1; + } if (is_overlap(lkb)) { /* we'll ignore error in cancel/unlock reply */ queue_cast_overlap(r, lkb); confirm_master(r, result); unhold_lkb(lkb); /* undoes create_lkb() */ - } else + } else { _request_lock(r, lkb); + + if (r->res_master_nodeid == dlm_our_nodeid()) + confirm_master(r, 0); + } break; default: @@ -3994,6 +4574,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; struct dlm_rsb *r; int error, ret_nodeid; + int do_lookup_list = 0; error = find_lkb(ls, ms->m_lkid, &lkb); if (error) { @@ -4001,7 +4582,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) return; } - /* ms->m_result is the value returned by dlm_dir_lookup on dir node + /* ms->m_result is the value returned by dlm_master_lookup on dir node FIXME: will a non-zero error ever be returned? */ r = lkb->lkb_resource; @@ -4013,12 +4594,37 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) goto out; ret_nodeid = ms->m_nodeid; + + /* We sometimes receive a request from the dir node for this + rsb before we've received the dir node's loookup_reply for it. + The request from the dir node implies we're the master, so we set + ourself as master in receive_request_reply, and verify here that + we are indeed the master. */ + + if (r->res_master_nodeid && (r->res_master_nodeid != ret_nodeid)) { + /* This should never happen */ + log_error(ls, "receive_lookup_reply %x from %d ret %d " + "master %d dir %d our %d first %x %s", + lkb->lkb_id, ms->m_header.h_nodeid, ret_nodeid, + r->res_master_nodeid, r->res_dir_nodeid, + dlm_our_nodeid(), r->res_first_lkid, r->res_name); + } + if (ret_nodeid == dlm_our_nodeid()) { + r->res_master_nodeid = ret_nodeid; r->res_nodeid = 0; - ret_nodeid = 0; + do_lookup_list = 1; r->res_first_lkid = 0; + } else if (ret_nodeid == -1) { + /* the remote node doesn't believe it's the dir node */ + log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid", + lkb->lkb_id, ms->m_header.h_nodeid); + r->res_master_nodeid = 0; + r->res_nodeid = -1; + lkb->lkb_nodeid = -1; } else { - /* set_master() will copy res_nodeid to lkb_nodeid */ + /* set_master() will set lkb_nodeid from r */ + r->res_master_nodeid = ret_nodeid; r->res_nodeid = ret_nodeid; } @@ -4033,7 +4639,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) _request_lock(r, lkb); out_list: - if (!ret_nodeid) + if (do_lookup_list) process_lookup_list(r); out: unlock_rsb(r); @@ -4047,7 +4653,7 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, int error = 0, noent = 0; if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { - log_debug(ls, "ignore non-member message %d from %d %x %x %d", + log_limit(ls, "receive %d from non-member %d %x %x %d", ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, ms->m_remid, ms->m_result); return; @@ -4174,6 +4780,15 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) { if (dlm_locking_stopped(ls)) { + /* If we were a member of this lockspace, left, and rejoined, + other nodes may still be sending us messages from the + lockspace generation before we left. */ + if (!ls->ls_generation) { + log_limit(ls, "receive %d from %d ignore old gen", + ms->m_type, nodeid); + return; + } + dlm_add_requestqueue(ls, nodeid, ms); } else { dlm_wait_requestqueue(ls); @@ -4798,6 +5413,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_rsb *r; struct dlm_lkb *lkb; uint32_t remid = 0; + int from_nodeid = rc->rc_header.h_nodeid; int error; if (rl->rl_parent_lkid) { @@ -4815,21 +5431,21 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) we make ourselves master, dlm_recover_masters() won't touch the MSTCPY locks we've received early. */ - error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), 0, &r); + error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), + from_nodeid, R_RECEIVE_RECOVER, &r); if (error) goto out; + lock_rsb(r); + if (dlm_no_directory(ls) && (dlm_dir_nodeid(r) != dlm_our_nodeid())) { log_error(ls, "dlm_recover_master_copy remote %d %x not dir", - rc->rc_header.h_nodeid, remid); + from_nodeid, remid); error = -EBADR; - put_rsb(r); - goto out; + goto out_unlock; } - lock_rsb(r); - - lkb = search_remid(r, rc->rc_header.h_nodeid, remid); + lkb = search_remid(r, from_nodeid, remid); if (lkb) { error = -EEXIST; goto out_remid; @@ -4866,7 +5482,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) out: if (error && error != -EEXIST) log_debug(ls, "dlm_recover_master_copy remote %d %x error %d", - rc->rc_header.h_nodeid, remid, error); + from_nodeid, remid, error); rl->rl_result = cpu_to_le32(error); return error; } diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index c8b226c62807..5e0c72e36a9b 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -14,6 +14,7 @@ #define __LOCK_DOT_H__ void dlm_dump_rsb(struct dlm_rsb *r); +void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms, uint32_t saved_seq); @@ -28,9 +29,11 @@ void dlm_unlock_recovery(struct dlm_ls *ls); void dlm_scan_waiters(struct dlm_ls *ls); void dlm_scan_timeout(struct dlm_ls *ls); void dlm_adjust_timeouts(struct dlm_ls *ls); +int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len, + unsigned int flags, int *r_nodeid, int *result); int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, - unsigned int flags, struct dlm_rsb **r_ret); + struct dlm_rsb **r_ret); void dlm_recover_purge(struct dlm_ls *ls); void dlm_purge_mstcpy_locks(struct dlm_rsb *r); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index ca506abbdd3b..065bb75ed609 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -509,17 +509,6 @@ static int new_lockspace(const char *name, const char *cluster, idr_init(&ls->ls_lkbidr); spin_lock_init(&ls->ls_lkbidr_spin); - size = dlm_config.ci_dirtbl_size; - ls->ls_dirtbl_size = size; - - ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size); - if (!ls->ls_dirtbl) - goto out_lkbfree; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); - spin_lock_init(&ls->ls_dirtbl[i].lock); - } - INIT_LIST_HEAD(&ls->ls_waiters); mutex_init(&ls->ls_waiters_mutex); INIT_LIST_HEAD(&ls->ls_orphans); @@ -567,7 +556,7 @@ static int new_lockspace(const char *name, const char *cluster, ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); if (!ls->ls_recover_buf) - goto out_dirfree; + goto out_lkbfree; ls->ls_slot = 0; ls->ls_num_slots = 0; @@ -648,8 +637,6 @@ static int new_lockspace(const char *name, const char *cluster, list_del(&ls->ls_list); spin_unlock(&lslist_lock); kfree(ls->ls_recover_buf); - out_dirfree: - vfree(ls->ls_dirtbl); out_lkbfree: idr_destroy(&ls->ls_lkbidr); vfree(ls->ls_rsbtbl); @@ -778,13 +765,6 @@ static int release_lockspace(struct dlm_ls *ls, int force) kfree(ls->ls_recover_buf); - /* - * Free direntry structs. - */ - - dlm_dir_clear(ls); - vfree(ls->ls_dirtbl); - /* * Free all lkb's in idr */ @@ -826,7 +806,6 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_purge_requestqueue(ls); kfree(ls->ls_recover_args); - dlm_clear_free_entries(ls); dlm_clear_members(ls); dlm_clear_members_gone(ls); kfree(ls->ls_node_array); diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 64d3e2b958c7..c8c298d81463 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -23,8 +23,6 @@ #include "memory.h" #include "lock.h" #include "util.h" -#include "member.h" - static int rcom_response(struct dlm_ls *ls) { @@ -275,19 +273,9 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) struct dlm_rcom *rc; struct dlm_mhandle *mh; int error = 0; - int max_size = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); ls->ls_recover_nodeid = nodeid; - if (nodeid == dlm_our_nodeid()) { - ls->ls_recover_buf->rc_header.h_length = - dlm_config.ci_buffer_size; - dlm_copy_master_names(ls, last_name, last_len, - ls->ls_recover_buf->rc_buf, - max_size, nodeid); - goto out; - } - error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); if (error) goto out; @@ -344,6 +332,25 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) return error; } +int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid) +{ + struct dlm_rcom *rc; + struct dlm_mhandle *mh; + struct dlm_ls *ls = r->res_ls; + int error; + + error = create_rcom(ls, to_nodeid, DLM_RCOM_LOOKUP, r->res_length, + &rc, &mh); + if (error) + goto out; + memcpy(rc->rc_buf, r->res_name, r->res_length); + rc->rc_id = 0xFFFFFFFF; + + send_rcom(ls, mh, rc); + out: + return error; +} + static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; @@ -355,7 +362,14 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) if (error) return; - error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid); + if (rc_in->rc_id == 0xFFFFFFFF) { + log_error(ls, "receive_rcom_lookup dump from %d", nodeid); + dlm_dump_rsb_name(ls, rc_in->rc_buf, len); + return; + } + + error = dlm_master_lookup(ls, nodeid, rc_in->rc_buf, len, + DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL); if (error) ret_nodeid = error; rc->rc_result = ret_nodeid; @@ -486,17 +500,76 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) return 0; } +/* + * Ignore messages for stage Y before we set + * recover_status bit for stage X: + * + * recover_status = 0 + * + * dlm_recover_members() + * - send nothing + * - recv nothing + * - ignore NAMES, NAMES_REPLY + * - ignore LOOKUP, LOOKUP_REPLY + * - ignore LOCK, LOCK_REPLY + * + * recover_status |= NODES + * + * dlm_recover_members_wait() + * + * dlm_recover_directory() + * - send NAMES + * - recv NAMES_REPLY + * - ignore LOOKUP, LOOKUP_REPLY + * - ignore LOCK, LOCK_REPLY + * + * recover_status |= DIR + * + * dlm_recover_directory_wait() + * + * dlm_recover_masters() + * - send LOOKUP + * - recv LOOKUP_REPLY + * + * dlm_recover_locks() + * - send LOCKS + * - recv LOCKS_REPLY + * + * recover_status |= LOCKS + * + * dlm_recover_locks_wait() + * + * recover_status |= DONE + */ + /* Called by dlm_recv; corresponds to dlm_receive_message() but special recovery-only comms are sent through here. */ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); - int stop, reply = 0, lock = 0; + int stop, reply = 0, names = 0, lookup = 0, lock = 0; uint32_t status; uint64_t seq; switch (rc->rc_type) { + case DLM_RCOM_STATUS_REPLY: + reply = 1; + break; + case DLM_RCOM_NAMES: + names = 1; + break; + case DLM_RCOM_NAMES_REPLY: + names = 1; + reply = 1; + break; + case DLM_RCOM_LOOKUP: + lookup = 1; + break; + case DLM_RCOM_LOOKUP_REPLY: + lookup = 1; + reply = 1; + break; case DLM_RCOM_LOCK: lock = 1; break; @@ -504,10 +577,6 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) lock = 1; reply = 1; break; - case DLM_RCOM_STATUS_REPLY: - case DLM_RCOM_NAMES_REPLY: - case DLM_RCOM_LOOKUP_REPLY: - reply = 1; }; spin_lock(&ls->ls_recover_lock); @@ -516,19 +585,17 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) seq = ls->ls_recover_seq; spin_unlock(&ls->ls_recover_lock); - if ((stop && (rc->rc_type != DLM_RCOM_STATUS)) || - (reply && (rc->rc_seq_reply != seq)) || - (lock && !(status & DLM_RS_DIR))) { - log_limit(ls, "dlm_receive_rcom ignore msg %d " - "from %d %llu %llu recover seq %llu sts %x gen %u", - rc->rc_type, - nodeid, - (unsigned long long)rc->rc_seq, - (unsigned long long)rc->rc_seq_reply, - (unsigned long long)seq, - status, ls->ls_generation); - goto out; - } + if (stop && (rc->rc_type != DLM_RCOM_STATUS)) + goto ignore; + + if (reply && (rc->rc_seq_reply != seq)) + goto ignore; + + if (!(status & DLM_RS_NODES) && (names || lookup || lock)) + goto ignore; + + if (!(status & DLM_RS_DIR) && (lookup || lock)) + goto ignore; switch (rc->rc_type) { case DLM_RCOM_STATUS: @@ -570,10 +637,20 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) default: log_error(ls, "receive_rcom bad type %d", rc->rc_type); } -out: + return; + +ignore: + log_limit(ls, "dlm_receive_rcom ignore msg %d " + "from %d %llu %llu recover seq %llu sts %x gen %u", + rc->rc_type, + nodeid, + (unsigned long long)rc->rc_seq, + (unsigned long long)rc->rc_seq_reply, + (unsigned long long)seq, + status, ls->ls_generation); return; Eshort: - log_error(ls, "recovery message %x from %d is too short", - rc->rc_type, nodeid); + log_error(ls, "recovery message %d from %d is too short", + rc->rc_type, nodeid); } diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index 206723ab744d..f8e243463c15 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h @@ -17,6 +17,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); +int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid); int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 7554e4dac6bb..3c025fe49ad3 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -361,9 +361,8 @@ static void set_master_lkbs(struct dlm_rsb *r) * rsb's to consider. */ -static void set_new_master(struct dlm_rsb *r, int nodeid) +static void set_new_master(struct dlm_rsb *r) { - r->res_nodeid = nodeid; set_master_lkbs(r); rsb_set_flag(r, RSB_NEW_MASTER); rsb_set_flag(r, RSB_NEW_MASTER2); @@ -372,31 +371,48 @@ static void set_new_master(struct dlm_rsb *r, int nodeid) /* * We do async lookups on rsb's that need new masters. The rsb's * waiting for a lookup reply are kept on the recover_list. + * + * Another node recovering the master may have sent us a rcom lookup, + * and our dlm_master_lookup() set it as the new master, along with + * NEW_MASTER so that we'll recover it here (this implies dir_nodeid + * equals our_nodeid below). */ -static int recover_master(struct dlm_rsb *r) +static int recover_master(struct dlm_rsb *r, unsigned int *count) { struct dlm_ls *ls = r->res_ls; - int error, ret_nodeid; - int our_nodeid = dlm_our_nodeid(); - int dir_nodeid = dlm_dir_nodeid(r); + int our_nodeid, dir_nodeid; + int is_removed = 0; + int error; + + if (is_master(r)) + return 0; + + is_removed = dlm_is_removed(ls, r->res_nodeid); + + if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER)) + return 0; + + our_nodeid = dlm_our_nodeid(); + dir_nodeid = dlm_dir_nodeid(r); if (dir_nodeid == our_nodeid) { - error = dlm_dir_lookup(ls, our_nodeid, r->res_name, - r->res_length, &ret_nodeid); - if (error) - log_error(ls, "recover dir lookup error %d", error); + if (is_removed) { + r->res_master_nodeid = our_nodeid; + r->res_nodeid = 0; + } - if (ret_nodeid == our_nodeid) - ret_nodeid = 0; - lock_rsb(r); - set_new_master(r, ret_nodeid); - unlock_rsb(r); + /* set master of lkbs to ourself when is_removed, or to + another new master which we set along with NEW_MASTER + in dlm_master_lookup */ + set_new_master(r); + error = 0; } else { recover_list_add(r); error = dlm_send_rcom_lookup(r, dir_nodeid); } + (*count)++; return error; } @@ -415,7 +431,7 @@ static int recover_master(struct dlm_rsb *r) * resent. */ -static int recover_master_static(struct dlm_rsb *r) +static int recover_master_static(struct dlm_rsb *r, unsigned int *count) { int dir_nodeid = dlm_dir_nodeid(r); int new_master = dir_nodeid; @@ -423,11 +439,12 @@ static int recover_master_static(struct dlm_rsb *r) if (dir_nodeid == dlm_our_nodeid()) new_master = 0; - lock_rsb(r); dlm_purge_mstcpy_locks(r); - set_new_master(r, new_master); - unlock_rsb(r); - return 1; + r->res_master_nodeid = dir_nodeid; + r->res_nodeid = new_master; + set_new_master(r); + (*count)++; + return 0; } /* @@ -443,7 +460,10 @@ static int recover_master_static(struct dlm_rsb *r) int dlm_recover_masters(struct dlm_ls *ls) { struct dlm_rsb *r; - int error = 0, count = 0; + unsigned int total = 0; + unsigned int count = 0; + int nodir = dlm_no_directory(ls); + int error; log_debug(ls, "dlm_recover_masters"); @@ -455,20 +475,23 @@ int dlm_recover_masters(struct dlm_ls *ls) goto out; } - if (dlm_no_directory(ls)) - count += recover_master_static(r); - else if (!is_master(r) && - (dlm_is_removed(ls, r->res_nodeid) || - rsb_flag(r, RSB_NEW_MASTER))) { - recover_master(r); - count++; - } + lock_rsb(r); + if (nodir) + error = recover_master_static(r, &count); + else + error = recover_master(r, &count); + unlock_rsb(r); + cond_resched(); + total++; - schedule(); + if (error) { + up_read(&ls->ls_root_sem); + goto out; + } } up_read(&ls->ls_root_sem); - log_debug(ls, "dlm_recover_masters %d resources", count); + log_debug(ls, "dlm_recover_masters %u of %u", count, total); error = dlm_wait_function(ls, &recover_list_empty); out: @@ -480,7 +503,7 @@ int dlm_recover_masters(struct dlm_ls *ls) int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) { struct dlm_rsb *r; - int nodeid; + int ret_nodeid, new_master; r = recover_list_find(ls, rc->rc_id); if (!r) { @@ -489,12 +512,17 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) goto out; } - nodeid = rc->rc_result; - if (nodeid == dlm_our_nodeid()) - nodeid = 0; + ret_nodeid = rc->rc_result; + + if (ret_nodeid == dlm_our_nodeid()) + new_master = 0; + else + new_master = ret_nodeid; lock_rsb(r); - set_new_master(r, nodeid); + r->res_master_nodeid = ret_nodeid; + r->res_nodeid = new_master; + set_new_master(r); unlock_rsb(r); recover_list_del(r); @@ -791,20 +819,8 @@ int dlm_create_root_list(struct dlm_ls *ls) dlm_hold_rsb(r); } - /* If we're using a directory, add tossed rsbs to the root - list; they'll have entries created in the new directory, - but no other recovery steps should do anything with them. */ - - if (dlm_no_directory(ls)) { - spin_unlock(&ls->ls_rsbtbl[i].lock); - continue; - } - - for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) { - r = rb_entry(n, struct dlm_rsb, res_hashnode); - list_add(&r->res_root_list, &ls->ls_root_list); - dlm_hold_rsb(r); - } + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss)) + log_error(ls, "dlm_create_root_list toss not empty"); spin_unlock(&ls->ls_rsbtbl[i].lock); } out: @@ -824,28 +840,26 @@ void dlm_release_root_list(struct dlm_ls *ls) up_write(&ls->ls_root_sem); } -/* If not using a directory, clear the entire toss list, there's no benefit to - caching the master value since it's fixed. If we are using a dir, keep the - rsb's we're the master of. Recovery will add them to the root list and from - there they'll be entered in the rebuilt directory. */ - -void dlm_clear_toss_list(struct dlm_ls *ls) +void dlm_clear_toss(struct dlm_ls *ls) { struct rb_node *n, *next; - struct dlm_rsb *rsb; + struct dlm_rsb *r; + unsigned int count = 0; int i; for (i = 0; i < ls->ls_rsbtbl_size; i++) { spin_lock(&ls->ls_rsbtbl[i].lock); for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { - next = rb_next(n);; - rsb = rb_entry(n, struct dlm_rsb, res_hashnode); - if (dlm_no_directory(ls) || !is_master(rsb)) { - rb_erase(n, &ls->ls_rsbtbl[i].toss); - dlm_free_rsb(rsb); - } + next = rb_next(n); + r = rb_entry(n, struct dlm_rsb, res_hashnode); + rb_erase(n, &ls->ls_rsbtbl[i].toss); + dlm_free_rsb(r); + count++; } spin_unlock(&ls->ls_rsbtbl[i].lock); } + + if (count) + log_debug(ls, "dlm_clear_toss %u done", count); } diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h index ebd0363f1e08..d8c8738c70eb 100644 --- a/fs/dlm/recover.h +++ b/fs/dlm/recover.h @@ -27,7 +27,7 @@ int dlm_recover_locks(struct dlm_ls *ls); void dlm_recovered_lock(struct dlm_rsb *r); int dlm_create_root_list(struct dlm_ls *ls); void dlm_release_root_list(struct dlm_ls *ls); -void dlm_clear_toss_list(struct dlm_ls *ls); +void dlm_clear_toss(struct dlm_ls *ls); void dlm_recover_rsbs(struct dlm_ls *ls); #endif /* __RECOVER_DOT_H__ */ diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index f1a9073c0835..88ce65ff021e 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -60,12 +60,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_callback_suspend(ls); - /* - * Free non-master tossed rsb's. Master rsb's are kept on toss - * list and put on root list to be included in resdir recovery. - */ - - dlm_clear_toss_list(ls); + dlm_clear_toss(ls); /* * This list of root rsb's will be the basis of most of the recovery @@ -84,6 +79,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) goto fail; } + dlm_recover_dir_nodeid(ls); + + ls->ls_recover_dir_sent_res = 0; + ls->ls_recover_dir_sent_msg = 0; ls->ls_recover_locks_in = 0; dlm_set_recover_status(ls, DLM_RS_NODES); @@ -115,6 +114,9 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) goto fail; } + log_debug(ls, "dlm_recover_directory %u out %u messages", + ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); + /* * We may have outstanding operations that are waiting for a reply from * a failed node. Mark these to be resent after recovery. Unlock and -- cgit v1.2.3 From 1d7c484eeb167fc374294e38ae402de4097c8611 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 15 May 2012 16:07:49 -0500 Subject: dlm: use idr instead of list for recovered rsbs When a large number of resources are being recovered, a linear search of the recover_list takes a long time. Use an idr in place of a list. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 3 ++ fs/dlm/lockspace.c | 3 ++ fs/dlm/rcom.c | 2 +- fs/dlm/recover.c | 116 ++++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 101 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 3093207a7684..a5f82d5b3946 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -288,6 +288,7 @@ struct dlm_rsb { int res_nodeid; int res_master_nodeid; int res_dir_nodeid; + int res_id; /* for ls_recover_idr */ uint32_t res_lvbseq; uint32_t res_hash; uint32_t res_bucket; /* rsbtbl */ @@ -587,6 +588,8 @@ struct dlm_ls { struct list_head ls_recover_list; spinlock_t ls_recover_list_lock; int ls_recover_list_count; + struct idr ls_recover_idr; + spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; struct mutex ls_clear_proc_locks; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 065bb75ed609..d4d3b3165c6c 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -565,6 +565,8 @@ static int new_lockspace(const char *name, const char *cluster, INIT_LIST_HEAD(&ls->ls_recover_list); spin_lock_init(&ls->ls_recover_list_lock); + idr_init(&ls->ls_recover_idr); + spin_lock_init(&ls->ls_recover_idr_lock); ls->ls_recover_list_count = 0; ls->ls_local_handle = ls; init_waitqueue_head(&ls->ls_wait_general); @@ -636,6 +638,7 @@ static int new_lockspace(const char *name, const char *cluster, spin_lock(&lslist_lock); list_del(&ls->ls_list); spin_unlock(&lslist_lock); + idr_destroy(&ls->ls_recover_idr); kfree(ls->ls_recover_buf); out_lkbfree: idr_destroy(&ls->ls_lkbidr); diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index c8c298d81463..87f1a56eab32 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -325,7 +325,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) if (error) goto out; memcpy(rc->rc_buf, r->res_name, r->res_length); - rc->rc_id = (unsigned long) r; + rc->rc_id = (unsigned long) r->res_id; send_rcom(ls, mh, rc); out: diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 3c025fe49ad3..ff6f27629a0c 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -277,22 +277,6 @@ static void recover_list_del(struct dlm_rsb *r) dlm_put_rsb(r); } -static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id) -{ - struct dlm_rsb *r = NULL; - - spin_lock(&ls->ls_recover_list_lock); - - list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) { - if (id == (unsigned long) r) - goto out; - } - r = NULL; - out: - spin_unlock(&ls->ls_recover_list_lock); - return r; -} - static void recover_list_clear(struct dlm_ls *ls) { struct dlm_rsb *r, *s; @@ -313,6 +297,94 @@ static void recover_list_clear(struct dlm_ls *ls) spin_unlock(&ls->ls_recover_list_lock); } +static int recover_idr_empty(struct dlm_ls *ls) +{ + int empty = 1; + + spin_lock(&ls->ls_recover_idr_lock); + if (ls->ls_recover_list_count) + empty = 0; + spin_unlock(&ls->ls_recover_idr_lock); + + return empty; +} + +static int recover_idr_add(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + int rv, id; + + rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS); + if (!rv) + return -ENOMEM; + + spin_lock(&ls->ls_recover_idr_lock); + if (r->res_id) { + spin_unlock(&ls->ls_recover_idr_lock); + return -1; + } + rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id); + if (rv) { + spin_unlock(&ls->ls_recover_idr_lock); + return rv; + } + r->res_id = id; + ls->ls_recover_list_count++; + dlm_hold_rsb(r); + spin_unlock(&ls->ls_recover_idr_lock); + return 0; +} + +static void recover_idr_del(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + + spin_lock(&ls->ls_recover_idr_lock); + idr_remove(&ls->ls_recover_idr, r->res_id); + r->res_id = 0; + ls->ls_recover_list_count--; + spin_unlock(&ls->ls_recover_idr_lock); + + dlm_put_rsb(r); +} + +static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) +{ + struct dlm_rsb *r; + + spin_lock(&ls->ls_recover_idr_lock); + r = idr_find(&ls->ls_recover_idr, (int)id); + spin_unlock(&ls->ls_recover_idr_lock); + return r; +} + +static int recover_idr_clear_rsb(int id, void *p, void *data) +{ + struct dlm_ls *ls = data; + struct dlm_rsb *r = p; + + r->res_id = 0; + r->res_recover_locks_count = 0; + ls->ls_recover_list_count--; + + dlm_put_rsb(r); + return 0; +} + +static void recover_idr_clear(struct dlm_ls *ls) +{ + spin_lock(&ls->ls_recover_idr_lock); + idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls); + idr_remove_all(&ls->ls_recover_idr); + + if (ls->ls_recover_list_count != 0) { + log_error(ls, "warning: recover_list_count %d", + ls->ls_recover_list_count); + ls->ls_recover_list_count = 0; + } + spin_unlock(&ls->ls_recover_idr_lock); +} + /* Master recovery: find new master node for rsb's that were mastered on nodes that have been removed. @@ -408,7 +480,7 @@ static int recover_master(struct dlm_rsb *r, unsigned int *count) set_new_master(r); error = 0; } else { - recover_list_add(r); + recover_idr_add(r); error = dlm_send_rcom_lookup(r, dir_nodeid); } @@ -493,10 +565,10 @@ int dlm_recover_masters(struct dlm_ls *ls) log_debug(ls, "dlm_recover_masters %u of %u", count, total); - error = dlm_wait_function(ls, &recover_list_empty); + error = dlm_wait_function(ls, &recover_idr_empty); out: if (error) - recover_list_clear(ls); + recover_idr_clear(ls); return error; } @@ -505,7 +577,7 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_rsb *r; int ret_nodeid, new_master; - r = recover_list_find(ls, rc->rc_id); + r = recover_idr_find(ls, rc->rc_id); if (!r) { log_error(ls, "dlm_recover_master_reply no id %llx", (unsigned long long)rc->rc_id); @@ -524,9 +596,9 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) r->res_nodeid = new_master; set_new_master(r); unlock_rsb(r); - recover_list_del(r); + recover_idr_del(r); - if (recover_list_empty(ls)) + if (recover_idr_empty(ls)) wake_up(&ls->ls_wait_general); out: return 0; -- cgit v1.2.3 From 05c32f47bfae74dabff05208957768078b53cc49 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 14 Jun 2012 12:17:32 -0500 Subject: dlm: fix race between remove and lookup It was possible for a remove message on an old rsb to be sent after a lookup message on a new rsb, where the rsbs were for the same resource name. This could lead to a missing directory entry for the new rsb. It is fixed by keeping a copy of the resource name being removed until after the remove has been sent. A lookup checks if this in-progress remove matches the name it is looking up. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 13 ++++ fs/dlm/lock.c | 181 +++++++++++++++++++++++++++++++++++++++----------- fs/dlm/lockspace.c | 21 +++++- 3 files changed, 176 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index a5f82d5b3946..9d3e485f88c8 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -498,6 +498,13 @@ struct rcom_lock { char rl_lvb[0]; }; +/* + * The max number of resources per rsbtbl bucket that shrink will attempt + * to remove in each iteration. + */ + +#define DLM_REMOVE_NAMES_MAX 8 + struct dlm_ls { struct list_head ls_list; /* list of lockspaces */ dlm_lockspace_t *ls_local_handle; @@ -531,6 +538,12 @@ struct dlm_ls { int ls_new_rsb_count; struct list_head ls_new_rsb; /* new rsb structs */ + spinlock_t ls_remove_spin; + char ls_remove_name[DLM_RESNAME_MAXLEN+1]; + char *ls_remove_names[DLM_REMOVE_NAMES_MAX]; + int ls_remove_len; + int ls_remove_lens[DLM_REMOVE_NAMES_MAX]; + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d9ee1b96549a..c7c6cf9e8685 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1624,65 +1624,170 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) return error; } -/* FIXME: make this more efficient */ +/* If there's an rsb for the same resource being removed, ensure + that the remove message is sent before the new lookup message. + It should be rare to need a delay here, but if not, then it may + be worthwhile to add a proper wait mechanism rather than a delay. */ -static int shrink_bucket(struct dlm_ls *ls, int b) +static void wait_pending_remove(struct dlm_rsb *r) { - struct rb_node *n; + struct dlm_ls *ls = r->res_ls; + restart: + spin_lock(&ls->ls_remove_spin); + if (ls->ls_remove_len && + !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) { + log_debug(ls, "delay lookup for remove dir %d %s", + r->res_dir_nodeid, r->res_name); + spin_unlock(&ls->ls_remove_spin); + msleep(1); + goto restart; + } + spin_unlock(&ls->ls_remove_spin); +} + +/* + * ls_remove_spin protects ls_remove_name and ls_remove_len which are + * read by other threads in wait_pending_remove. ls_remove_names + * and ls_remove_lens are only used by the scan thread, so they do + * not need protection. + */ + +static void shrink_bucket(struct dlm_ls *ls, int b) +{ + struct rb_node *n, *next; struct dlm_rsb *r; + char *name; int our_nodeid = dlm_our_nodeid(); - int count = 0, found; + int remote_count = 0; + int i, len, rv; - for (;;) { - found = 0; - spin_lock(&ls->ls_rsbtbl[b].lock); - for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { - r = rb_entry(n, struct dlm_rsb, res_hashnode); + memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); - /* If we're the directory record for this rsb, and - we're not the master of it, then we need to wait - for the master node to send us a dir remove for - before removing the dir record. */ + spin_lock(&ls->ls_rsbtbl[b].lock); + for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { + next = rb_next(n); + r = rb_entry(n, struct dlm_rsb, res_hashnode); - if (!dlm_no_directory(ls) && !is_master(r) && - (dlm_dir_nodeid(r) == our_nodeid)) { - continue; - } + /* If we're the directory record for this rsb, and + we're not the master of it, then we need to wait + for the master node to send us a dir remove for + before removing the dir record. */ - if (!time_after_eq(jiffies, r->res_toss_time + - dlm_config.ci_toss_secs * HZ)) - continue; - found = 1; - break; + if (!dlm_no_directory(ls) && + (r->res_master_nodeid != our_nodeid) && + (dlm_dir_nodeid(r) == our_nodeid)) { + continue; } - if (!found) { - spin_unlock(&ls->ls_rsbtbl[b].lock); - break; + if (!time_after_eq(jiffies, r->res_toss_time + + dlm_config.ci_toss_secs * HZ)) { + continue; } - if (kref_put(&r->res_ref, kill_rsb)) { - rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); - spin_unlock(&ls->ls_rsbtbl[b].lock); + if (!dlm_no_directory(ls) && + (r->res_master_nodeid == our_nodeid) && + (dlm_dir_nodeid(r) != our_nodeid)) { /* We're the master of this rsb but we're not the directory record, so we need to tell the dir node to remove the dir record. */ - if (!dlm_no_directory(ls) && is_master(r) && - (dlm_dir_nodeid(r) != our_nodeid)) { - send_remove(r); - } + ls->ls_remove_lens[remote_count] = r->res_length; + memcpy(ls->ls_remove_names[remote_count], r->res_name, + DLM_RESNAME_MAXLEN); + remote_count++; - dlm_free_rsb(r); - count++; - } else { - spin_unlock(&ls->ls_rsbtbl[b].lock); + if (remote_count >= DLM_REMOVE_NAMES_MAX) + break; + continue; + } + + if (!kref_put(&r->res_ref, kill_rsb)) { log_error(ls, "tossed rsb in use %s", r->res_name); + continue; } + + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + dlm_free_rsb(r); } + spin_unlock(&ls->ls_rsbtbl[b].lock); - return count; + /* + * While searching for rsb's to free, we found some that require + * remote removal. We leave them in place and find them again here + * so there is a very small gap between removing them from the toss + * list and sending the removal. Keeping this gap small is + * important to keep us (the master node) from being out of sync + * with the remote dir node for very long. + * + * From the time the rsb is removed from toss until just after + * send_remove, the rsb name is saved in ls_remove_name. A new + * lookup checks this to ensure that a new lookup message for the + * same resource name is not sent just before the remove message. + */ + + for (i = 0; i < remote_count; i++) { + name = ls->ls_remove_names[i]; + len = ls->ls_remove_lens[i]; + + spin_lock(&ls->ls_rsbtbl[b].lock); + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (rv) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_debug(ls, "remove_name not toss %s", name); + continue; + } + + if (r->res_master_nodeid != our_nodeid) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_debug(ls, "remove_name master %d dir %d our %d %s", + r->res_master_nodeid, r->res_dir_nodeid, + our_nodeid, name); + continue; + } + + if (r->res_dir_nodeid == our_nodeid) { + /* should never happen */ + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "remove_name dir %d master %d our %d %s", + r->res_dir_nodeid, r->res_master_nodeid, + our_nodeid, name); + continue; + } + + if (!time_after_eq(jiffies, r->res_toss_time + + dlm_config.ci_toss_secs * HZ)) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_debug(ls, "remove_name toss_time %lu now %lu %s", + r->res_toss_time, jiffies, name); + continue; + } + + if (!kref_put(&r->res_ref, kill_rsb)) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "remove_name in use %s", name); + continue; + } + + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + + /* block lookup of same name until we've sent remove */ + spin_lock(&ls->ls_remove_spin); + ls->ls_remove_len = len; + memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); + spin_unlock(&ls->ls_remove_spin); + spin_unlock(&ls->ls_rsbtbl[b].lock); + + send_remove(r); + + /* allow lookup of name again */ + spin_lock(&ls->ls_remove_spin); + ls->ls_remove_len = 0; + memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); + spin_unlock(&ls->ls_remove_spin); + + dlm_free_rsb(r); + } } void dlm_scan_rsbs(struct dlm_ls *ls) @@ -2608,6 +2713,8 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) return 0; } + wait_pending_remove(r); + r->res_first_lkid = lkb->lkb_id; send_lookup(r, lkb); return 1; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d4d3b3165c6c..952557d00ccd 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -506,6 +506,15 @@ static int new_lockspace(const char *name, const char *cluster, spin_lock_init(&ls->ls_rsbtbl[i].lock); } + spin_lock_init(&ls->ls_remove_spin); + + for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { + ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1, + GFP_KERNEL); + if (!ls->ls_remove_names[i]) + goto out_rsbtbl; + } + idr_init(&ls->ls_lkbidr); spin_lock_init(&ls->ls_lkbidr_spin); @@ -556,7 +565,7 @@ static int new_lockspace(const char *name, const char *cluster, ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); if (!ls->ls_recover_buf) - goto out_lkbfree; + goto out_lkbidr; ls->ls_slot = 0; ls->ls_num_slots = 0; @@ -640,8 +649,13 @@ static int new_lockspace(const char *name, const char *cluster, spin_unlock(&lslist_lock); idr_destroy(&ls->ls_recover_idr); kfree(ls->ls_recover_buf); - out_lkbfree: + out_lkbidr: idr_destroy(&ls->ls_lkbidr); + for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { + if (ls->ls_remove_names[i]) + kfree(ls->ls_remove_names[i]); + } + out_rsbtbl: vfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) @@ -796,6 +810,9 @@ static int release_lockspace(struct dlm_ls *ls, int force) vfree(ls->ls_rsbtbl); + for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) + kfree(ls->ls_remove_names[i]); + while (!list_empty(&ls->ls_new_rsb)) { rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); -- cgit v1.2.3 From 6d768177c282637a7943e72b4b2b148e7553ecf1 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Jun 2012 11:23:21 -0500 Subject: dlm: use wait_event_timeout Use wait_event_timeout to avoid using a timer directly. Signed-off-by: David Teigland --- fs/dlm/recover.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index ff6f27629a0c..12702cc533b3 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -36,30 +36,23 @@ * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another * function thinks it could have completed the waited-on task, they should wake * up ls_wait_general to get an immediate response rather than waiting for the - * timer to detect the result. A timer wakes us up periodically while waiting - * to see if we should abort due to a node failure. This should only be called - * by the dlm_recoverd thread. + * timeout. This uses a timeout so it can check periodically if the wait + * should abort due to node failure (which doesn't cause a wake_up). + * This should only be called by the dlm_recoverd thread. */ -static void dlm_wait_timer_fn(unsigned long data) -{ - struct dlm_ls *ls = (struct dlm_ls *) data; - mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); - wake_up(&ls->ls_wait_general); -} - int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) { int error = 0; + int rv; - init_timer(&ls->ls_timer); - ls->ls_timer.function = dlm_wait_timer_fn; - ls->ls_timer.data = (long) ls; - ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); - add_timer(&ls->ls_timer); - - wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); - del_timer_sync(&ls->ls_timer); + while (1) { + rv = wait_event_timeout(ls->ls_wait_general, + testfn(ls) || dlm_recovery_stopped(ls), + dlm_config.ci_recover_timer * HZ); + if (rv) + break; + } if (dlm_recovery_stopped(ls)) { log_debug(ls, "dlm_wait_function aborted"); -- cgit v1.2.3 From c503a62103c46d56447f56306b52be6f844689ba Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Jun 2012 15:55:19 -0500 Subject: dlm: fix conversion deadlock from recovery The process of rebuilding locks on a new master during recovery could re-order the locks on the convert queue, creating an "in place" conversion deadlock that would not be resolved. Fix this by not considering queue order when granting conversions after recovery. Signed-off-by: David Teigland --- fs/dlm/lock.c | 55 ++++++++++++++++++++++++++++++++++++++++--------------- fs/dlm/recover.c | 10 ++++++++-- 2 files changed, 48 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index c7c6cf9e8685..04e3f15aa0cc 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -2279,10 +2279,14 @@ static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2) * immediate request, it is 0 if called later, after the lock has been * queued. * + * recover is 1 if dlm_recover_grant() is trying to grant conversions + * after recovery. + * * References are from chapter 6 of "VAXcluster Principles" by Roy Davis */ -static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, + int recover) { int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); @@ -2314,7 +2318,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (queue_conflict(&r->res_grantqueue, lkb)) - goto out; + return 0; /* * 6-3: By default, a conversion request is immediately granted if the @@ -2323,7 +2327,24 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (queue_conflict(&r->res_convertqueue, lkb)) - goto out; + return 0; + + /* + * The RECOVER_GRANT flag means dlm_recover_grant() is granting + * locks for a recovered rsb, on which lkb's have been rebuilt. + * The lkb's may have been rebuilt on the queues in a different + * order than they were in on the previous master. So, granting + * queued conversions in order after recovery doesn't make sense + * since the order hasn't been preserved anyway. The new order + * could also have created a new "in place" conversion deadlock. + * (e.g. old, failed master held granted EX, with PR->EX, NL->EX. + * After recovery, there would be no granted locks, and possibly + * NL->EX, PR->EX, an in-place conversion deadlock.) So, after + * recovery, grant conversions without considering order. + */ + + if (conv && recover) + return 1; /* * 6-5: But the default algorithm for deciding whether to grant or @@ -2360,7 +2381,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (list_empty(&r->res_convertqueue)) return 1; else - goto out; + return 0; } /* @@ -2406,12 +2427,12 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (!now && !conv && list_empty(&r->res_convertqueue) && first_in_list(lkb, &r->res_waitqueue)) return 1; - out: + return 0; } static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, - int *err) + int recover, int *err) { int rv; int8_t alt = 0, rqmode = lkb->lkb_rqmode; @@ -2420,7 +2441,7 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, if (err) *err = 0; - rv = _can_be_granted(r, lkb, now); + rv = _can_be_granted(r, lkb, now, recover); if (rv) goto out; @@ -2461,7 +2482,7 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, if (alt) { lkb->lkb_rqmode = alt; - rv = _can_be_granted(r, lkb, now); + rv = _can_be_granted(r, lkb, now, 0); if (rv) lkb->lkb_sbflags |= DLM_SBF_ALTMODE; else @@ -2485,6 +2506,7 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, unsigned int *count) { struct dlm_lkb *lkb, *s; + int recover = rsb_flag(r, RSB_RECOVER_GRANT); int hi, demoted, quit, grant_restart, demote_restart; int deadlk; @@ -2498,7 +2520,7 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, demoted = is_demoted(lkb); deadlk = 0; - if (can_be_granted(r, lkb, 0, &deadlk)) { + if (can_be_granted(r, lkb, 0, recover, &deadlk)) { grant_lock_pending(r, lkb); grant_restart = 1; if (count) @@ -2542,7 +2564,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw, struct dlm_lkb *lkb, *s; list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { - if (can_be_granted(r, lkb, 0, NULL)) { + if (can_be_granted(r, lkb, 0, 0, NULL)) { grant_lock_pending(r, lkb); if (count) (*count)++; @@ -3042,7 +3064,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error = 0; - if (can_be_granted(r, lkb, 1, NULL)) { + if (can_be_granted(r, lkb, 1, 0, NULL)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); goto out; @@ -3082,7 +3104,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) /* changing an existing lock may allow others to be granted */ - if (can_be_granted(r, lkb, 1, &deadlk)) { + if (can_be_granted(r, lkb, 1, 0, &deadlk)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); goto out; @@ -3108,7 +3130,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (is_demoted(lkb)) { grant_pending_convert(r, DLM_LOCK_IV, NULL, NULL); - if (_can_be_granted(r, lkb, 1)) { + if (_can_be_granted(r, lkb, 1, 0)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); goto out; @@ -5373,9 +5395,10 @@ static struct dlm_rsb *find_grant_rsb(struct dlm_ls *ls, int bucket) if (!rsb_flag(r, RSB_RECOVER_GRANT)) continue; - rsb_clear_flag(r, RSB_RECOVER_GRANT); - if (!is_master(r)) + if (!is_master(r)) { + rsb_clear_flag(r, RSB_RECOVER_GRANT); continue; + } hold_rsb(r); spin_unlock(&ls->ls_rsbtbl[bucket].lock); return r; @@ -5420,7 +5443,9 @@ void dlm_recover_grant(struct dlm_ls *ls) rsb_count++; count = 0; lock_rsb(r); + /* the RECOVER_GRANT flag is checked in the grant path */ grant_pending_locks(r, &count); + rsb_clear_flag(r, RSB_RECOVER_GRANT); lkb_count += count; confirm_master(r, 0); unlock_rsb(r); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 12702cc533b3..4a7a76e42fc3 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -804,6 +804,7 @@ static void recover_lvb(struct dlm_rsb *r) static void recover_conversion(struct dlm_rsb *r) { + struct dlm_ls *ls = r->res_ls; struct dlm_lkb *lkb; int grmode = -1; @@ -818,10 +819,15 @@ static void recover_conversion(struct dlm_rsb *r) list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { if (lkb->lkb_grmode != DLM_LOCK_IV) continue; - if (grmode == -1) + if (grmode == -1) { + log_debug(ls, "recover_conversion %x set gr to rq %d", + lkb->lkb_id, lkb->lkb_rqmode); lkb->lkb_grmode = lkb->lkb_rqmode; - else + } else { + log_debug(ls, "recover_conversion %x set gr %d", + lkb->lkb_id, grmode); lkb->lkb_grmode = grmode; + } } } -- cgit v1.2.3 From 96006ea6d4eea73466e90ef353bf34e507724e77 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 25 Jun 2012 13:48:05 -0500 Subject: dlm: fix missing dir remove I don't know exactly how, but in some cases, a dir record is not removed, or a new one is created when it shouldn't be. The result is that the dir node lookup returns a master node where the rsb does not exist. In this case, The master node will repeatedly return -EBADR for requests, and the lock requests will be stuck. Until all possible ways for this to happen can be eliminated, a simple and effective way to recover from this situation is for the supposed master node to send a standard remove message to the dir node when it receives a request for a resource it has no rsb for. Signed-off-by: David Teigland --- fs/dlm/lock.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 04e3f15aa0cc..b56950758188 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -4000,12 +4000,70 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) return error; } +static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) +{ + char name[DLM_RESNAME_MAXLEN + 1]; + struct dlm_message *ms; + struct dlm_mhandle *mh; + struct dlm_rsb *r; + uint32_t hash, b; + int rv, dir_nodeid; + + memset(name, 0, sizeof(name)); + memcpy(name, ms_name, len); + + hash = jhash(name, len, 0); + b = hash & (ls->ls_rsbtbl_size - 1); + + dir_nodeid = dlm_hash2nodeid(ls, hash); + + log_error(ls, "send_repeat_remove dir %d %s", dir_nodeid, name); + + spin_lock(&ls->ls_rsbtbl[b].lock); + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); + if (!rv) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "repeat_remove on keep %s", name); + return; + } + + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); + if (!rv) { + spin_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "repeat_remove on toss %s", name); + return; + } + + /* use ls->remove_name2 to avoid conflict with shrink? */ + + spin_lock(&ls->ls_remove_spin); + ls->ls_remove_len = len; + memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); + spin_unlock(&ls->ls_remove_spin); + spin_unlock(&ls->ls_rsbtbl[b].lock); + + rv = _create_message(ls, sizeof(struct dlm_message) + len, + dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); + if (rv) + return; + + memcpy(ms->m_extra, name, len); + ms->m_hash = hash; + + send_message(mh, ms); + + spin_lock(&ls->ls_remove_spin); + ls->ls_remove_len = 0; + memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); + spin_unlock(&ls->ls_remove_spin); +} + static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) { struct dlm_lkb *lkb; struct dlm_rsb *r; int from_nodeid; - int error, namelen; + int error, namelen = 0; from_nodeid = ms->m_header.h_nodeid; @@ -4073,13 +4131,21 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) delayed in being sent/arriving/being processed on the dir node. Another node would repeatedly lookup up the master, and the dir node would continue returning our nodeid until our send_remove - took effect. */ + took effect. + + We send another remove message in case our previous send_remove + was lost/ignored/missed somehow. */ if (error != -ENOTBLK) { log_limit(ls, "receive_request %x from %d %d", ms->m_lkid, from_nodeid, error); } + if (namelen && error == -EBADR) { + send_repeat_remove(ls, ms->m_extra, namelen); + msleep(1000); + } + setup_stub_lkb(ls, ms); send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); return error; -- cgit v1.2.3 From e5bcac61472ca627241b394d439decd00bba3aea Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 6 Jul 2012 13:09:07 +0400 Subject: sysfs: fail dentry revalidation after namespace change When we change the namespace tag of a sysfs entry, the associated dentry is still kept around. readdir() will work correctly and not display the old entries, but open() will still succeed, so will reads and writes. This will no longer happen if sysfs is remounted, hinting that this is a cache-related problem. I am using the following sequence to demonstrate that: shell1: ip link add type veth unshare -nm shell2: ip link set veth1 cat /sys/devices/virtual/net/veth1/ifindex Before that patch, this will succeed (fail to fail). After it, it will correctly return an error. Differently from a normal rename, which we handle fine, changing the object namespace will keep it's path intact. So this check seems necessary as well. [ v2: get type from parent, as suggested by Eric Biederman ] Signed-off-by: Glauber Costa CC: Tejun Heo Reviewed-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e6bb9b2a4cbe..c0bf38a21caa 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -307,6 +307,7 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) { struct sysfs_dirent *sd; int is_dir; + int type; if (nd->flags & LOOKUP_RCU) return -ECHILD; @@ -326,6 +327,13 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) if (strcmp(dentry->d_name.name, sd->s_name) != 0) goto out_bad; + /* The sysfs dirent has been moved to a different namespace */ + type = KOBJ_NS_TYPE_NONE; + if (sd->s_parent) + type = sysfs_ns_type(sd->s_parent); + if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)) + goto out_bad; + mutex_unlock(&sysfs_mutex); out_valid: return 1; -- cgit v1.2.3 From 17f79be93d95bb0e46bd08681ec9c9e601869c15 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 9 Jul 2012 16:13:36 -0700 Subject: sysfs: fail dentry revalidation after namespace change fix don't assume that KOBJ_NS_TYPE_NONE==0. Also save a test-n-branch. Cc: Eric W. Biederman Cc: Glauber Costa Cc: Tejun Heo Signed-off-by: Andrew Morton Acked-by: Serge E. Hallyn Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index c0bf38a21caa..1cdfb53199aa 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -329,10 +329,12 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) /* The sysfs dirent has been moved to a different namespace */ type = KOBJ_NS_TYPE_NONE; - if (sd->s_parent) + if (sd->s_parent) { type = sysfs_ns_type(sd->s_parent); - if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)) - goto out_bad; + if (type != KOBJ_NS_TYPE_NONE && + sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns) + goto out_bad; + } mutex_unlock(&sysfs_mutex); out_valid: -- cgit v1.2.3 From 4a53ffae6afc94bab803087245b3b45e712c21c8 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:03:18 -0700 Subject: pstore/ram_core: Get rid of prz->ecc_symsize and prz->ecc_poly The struct members were never used anywhere outside of persistent_ram_init_ecc(), so there's actually no need for them to be in the struct. If we ever want to make polynomial or symbol size configurable, it would make more sense to just pass initialized rs_decoder to the persistent_ram init functions. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 7 +++---- include/linux/pstore_ram.h | 2 -- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index a5a7b13d358c..3f4d6e64f6d7 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -177,14 +177,14 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz) struct persistent_ram_buffer *buffer = prz->buffer; int ecc_blocks; size_t ecc_total; + int ecc_symsize = 8; + int ecc_poly = 0x11d; if (!prz->ecc) return 0; prz->ecc_block_size = 128; prz->ecc_size = 16; - prz->ecc_symsize = 8; - prz->ecc_poly = 0x11d; ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size); ecc_total = (ecc_blocks + 1) * prz->ecc_size; @@ -202,8 +202,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz) * first consecutive root is 0 * primitive element to generate roots = 1 */ - prz->rs_decoder = init_rs(prz->ecc_symsize, prz->ecc_poly, 0, 1, - prz->ecc_size); + prz->rs_decoder = init_rs(ecc_symsize, ecc_poly, 0, 1, prz->ecc_size); if (prz->rs_decoder == NULL) { pr_info("persistent_ram: init_rs failed\n"); return -EINVAL; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index e681af92c04b..a0975c02194a 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -41,8 +41,6 @@ struct persistent_ram_zone { int bad_blocks; int ecc_block_size; int ecc_size; - int ecc_symsize; - int ecc_poly; char *old_log; size_t old_log_size; -- cgit v1.2.3 From 5ca5d4e61d0cac15f36160ab48425c6e43bf2e2f Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:03:19 -0700 Subject: pstore/ram: Make ECC size configurable This is now pretty straightforward: instead of using bool, just pass an integer. For backwards compatibility ramoops.ecc=1 means 16 bytes ECC (using 1 byte for ECC isn't much of use anyway). Suggested-by: Arve HjønnevÃ¥g Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 22 ++++++++++++++-------- fs/pstore/ram_core.c | 15 ++++++++------- include/linux/pstore_ram.h | 4 ++-- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 58b93fbd117e..b39aebbaeb89 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -63,7 +63,9 @@ MODULE_PARM_DESC(dump_oops, static int ramoops_ecc; module_param_named(ecc, ramoops_ecc, int, 0600); MODULE_PARM_DESC(ramoops_ecc, - "set to 1 to enable ECC support"); + "if non-zero, the option enables ECC support and specifies " + "ECC buffer size in bytes (1 is a special value, means 16 " + "bytes ECC)"); struct ramoops_context { struct persistent_ram_zone **przs; @@ -73,7 +75,7 @@ struct ramoops_context { size_t record_size; size_t console_size; int dump_oops; - bool ecc; + int ecc_size; unsigned int max_dump_cnt; unsigned int dump_write_cnt; unsigned int dump_read_cnt; @@ -288,7 +290,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, for (i = 0; i < cxt->max_dump_cnt; i++) { size_t sz = cxt->record_size; - cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc); + cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc_size); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -314,7 +316,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, if (*paddr + sz > *paddr + cxt->size) return -ENOMEM; - *prz = persistent_ram_new(*paddr, sz, cxt->ecc); + *prz = persistent_ram_new(*paddr, sz, cxt->ecc_size); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -361,7 +363,7 @@ static int __devinit ramoops_probe(struct platform_device *pdev) cxt->record_size = pdata->record_size; cxt->console_size = pdata->console_size; cxt->dump_oops = pdata->dump_oops; - cxt->ecc = pdata->ecc; + cxt->ecc_size = pdata->ecc_size; paddr = cxt->phys_addr; @@ -411,9 +413,9 @@ static int __devinit ramoops_probe(struct platform_device *pdev) record_size = pdata->record_size; dump_oops = pdata->dump_oops; - pr_info("attached 0x%lx@0x%llx, ecc: %s\n", + pr_info("attached 0x%lx@0x%llx, ecc: %d\n", cxt->size, (unsigned long long)cxt->phys_addr, - ramoops_ecc ? "on" : "off"); + cxt->ecc_size); return 0; @@ -478,7 +480,11 @@ static void ramoops_register_dummy(void) dummy_data->record_size = record_size; dummy_data->console_size = ramoops_console_size; dummy_data->dump_oops = dump_oops; - dummy_data->ecc = ramoops_ecc; + /* + * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC + * (using 1 byte for ECC isn't much of use anyway). + */ + dummy_data->ecc_size = ramoops_ecc == 1 ? 16 : ramoops_ecc; dummy = platform_device_register_data(NULL, "ramoops", -1, dummy_data, sizeof(struct ramoops_platform_data)); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3f4d6e64f6d7..7e5a2a9154ca 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -171,7 +171,8 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) } } -static int persistent_ram_init_ecc(struct persistent_ram_zone *prz) +static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, + int ecc_size) { int numerr; struct persistent_ram_buffer *buffer = prz->buffer; @@ -184,7 +185,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz) return 0; prz->ecc_block_size = 128; - prz->ecc_size = 16; + prz->ecc_size = ecc_size; ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size); ecc_total = (ecc_blocks + 1) * prz->ecc_size; @@ -390,13 +391,13 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, - bool ecc) + int ecc_size) { int ret; - prz->ecc = ecc; + prz->ecc = ecc_size; - ret = persistent_ram_init_ecc(prz); + ret = persistent_ram_init_ecc(prz, ecc_size); if (ret) return ret; @@ -444,7 +445,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, size_t size, - bool ecc) + int ecc_size) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -459,7 +460,7 @@ struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, if (ret) goto err; - ret = persistent_ram_post_init(prz, ecc); + ret = persistent_ram_post_init(prz, ecc_size); if (ret) goto err; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index a0975c02194a..94b79f173365 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -48,7 +48,7 @@ struct persistent_ram_zone { struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, size_t size, - bool ecc); + int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); @@ -74,7 +74,7 @@ struct ramoops_platform_data { unsigned long record_size; unsigned long console_size; int dump_oops; - bool ecc; + int ecc_size; }; #endif -- cgit v1.2.3 From c1743cbc8d20d208bb1d2b10598204f2d89b144c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:03:20 -0700 Subject: pstore/ram_core: Get rid of prz->ecc enable/disable flag Nowadays we can use prz->ecc_size as a flag, no need for the special member in the prz struct. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 10 ++++------ include/linux/pstore_ram.h | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 7e5a2a9154ca..4dabbb8e4270 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -114,7 +114,7 @@ static void notrace persistent_ram_update_ecc(struct persistent_ram_zone *prz, int ecc_size = prz->ecc_size; int size = prz->ecc_block_size; - if (!prz->ecc) + if (!prz->ecc_size) return; block = buffer->data + (start & ~(ecc_block_size - 1)); @@ -133,7 +133,7 @@ static void persistent_ram_update_header_ecc(struct persistent_ram_zone *prz) { struct persistent_ram_buffer *buffer = prz->buffer; - if (!prz->ecc) + if (!prz->ecc_size) return; persistent_ram_encode_rs8(prz, (uint8_t *)buffer, sizeof(*buffer), @@ -146,7 +146,7 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) uint8_t *block; uint8_t *par; - if (!prz->ecc) + if (!prz->ecc_size) return; block = buffer->data; @@ -181,7 +181,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, int ecc_symsize = 8; int ecc_poly = 0x11d; - if (!prz->ecc) + if (!ecc_size) return 0; prz->ecc_block_size = 128; @@ -395,8 +395,6 @@ static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, { int ret; - prz->ecc = ecc_size; - ret = persistent_ram_init_ecc(prz, ecc_size); if (ret) return ret; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 94b79f173365..dcf805f56bc6 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -33,7 +33,6 @@ struct persistent_ram_zone { size_t buffer_size; /* ECC correction */ - bool ecc; char *par_buffer; char *par_header; struct rs_control *rs_decoder; -- cgit v1.2.3 From 897dba027445be93f40e5caf550556ca38c48c51 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:40 -0700 Subject: pstore: Introduce write_buf backend callback For function tracing we need to stop using pstore.buf directly, since in a tracing callback we can't use spinlocks, and thus we can't safely use the global buffer. With write_buf callback, backends no longer need to access pstore.buf directly, and thus we can pass any buffers (e.g. allocated on stack). Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 10 ++++++++++ include/linux/pstore.h | 4 ++++ 2 files changed, 14 insertions(+) (limited to 'fs') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 6b3ff045fe6e..ef5ca8a0255c 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -188,6 +188,14 @@ static void pstore_register_console(void) static void pstore_register_console(void) {} #endif +static int pstore_write_compat(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, + size_t size, struct pstore_info *psi) +{ + return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); +} + /* * platform specific persistent storage driver registers with * us here. If pstore is already mounted, call the platform @@ -212,6 +220,8 @@ int pstore_register(struct pstore_info *psi) return -EINVAL; } + if (!psi->write) + psi->write = pstore_write_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); spin_unlock(&pstore_lock); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 1bd014b8e432..b107484192fc 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -48,6 +48,10 @@ struct pstore_info { int (*write)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, size_t size, struct pstore_info *psi); + int (*write_buf)(enum pstore_type_id type, + enum kmsg_dump_reason reason, u64 *id, + unsigned int part, const char *buf, size_t size, + struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); void *data; -- cgit v1.2.3 From 060287b8c467bf49a594d8d669e1986c6d8d76b0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:41 -0700 Subject: pstore: Add persistent function tracing With this support kernel can save function call chain log into a persistent ram buffer that can be decoded and dumped after reboot through pstore filesystem. It can be used to determine what function was last called before a reset or panic. We store the log in a binary format and then decode it at read time. p.s. Mostly the code comes from trace_persistent.c driver found in the Android git tree, written by Colin Cross (according to sign-off history). I reworked the driver a little bit, and ported it to pstore. Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/Kconfig | 12 ++++++ fs/pstore/Makefile | 1 + fs/pstore/ftrace.c | 35 ++++++++++++++++ fs/pstore/inode.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/pstore/internal.h | 43 +++++++++++++++++++ fs/pstore/platform.c | 2 +- include/linux/pstore.h | 9 ++++ 7 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 fs/pstore/ftrace.c (limited to 'fs') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index d044de6ee308..d39bb5cce883 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -19,6 +19,18 @@ config PSTORE_CONSOLE When the option is enabled, pstore will log all kernel messages, even if no oops or panic happened. +config PSTORE_FTRACE + bool "Persistent function tracer" + depends on PSTORE + depends on FUNCTION_TRACER + help + With this option kernel traces function calls into a persistent + ram buffer that can be decoded and dumped after reboot through + pstore filesystem. It can be used to determine what function + was last called before a reset or panic. + + If unsure, say N. + config PSTORE_RAM tristate "Log panic/oops to a RAM buffer" depends on PSTORE diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 278a44e0d4e1..4c9095c2781e 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -5,6 +5,7 @@ obj-y += pstore.o pstore-objs += inode.o platform.o +obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o ramoops-objs += ram.o ram_core.o obj-$(CONFIG_PSTORE_RAM) += ramoops.o diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c new file mode 100644 index 000000000000..a130d484b7d3 --- /dev/null +++ b/fs/pstore/ftrace.c @@ -0,0 +1,35 @@ +/* + * Copyright 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) +{ + struct pstore_ftrace_record rec = {}; + + if (unlikely(oops_in_progress)) + return; + + rec.ip = ip; + rec.parent_ip = parent_ip; + pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); + psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, + sizeof(rec), psinfo); +} diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 45bff5441b04..4ab572e6d277 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -52,18 +53,117 @@ struct pstore_private { char data[]; }; +struct pstore_ftrace_seq_data { + const void *ptr; + size_t off; + size_t size; +}; + +#define REC_SIZE sizeof(struct pstore_ftrace_record) + +static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->off = ps->size % REC_SIZE; + data->off += *pos * REC_SIZE; + if (data->off + REC_SIZE > ps->size) { + kfree(data); + return NULL; + } + + return data; + +} + +static void pstore_ftrace_seq_stop(struct seq_file *s, void *v) +{ + kfree(v); +} + +static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data = v; + + data->off += REC_SIZE; + if (data->off + REC_SIZE > ps->size) + return NULL; + + (*pos)++; + return data; +} + +static int pstore_ftrace_seq_show(struct seq_file *s, void *v) +{ + struct pstore_private *ps = s->private; + struct pstore_ftrace_seq_data *data = v; + struct pstore_ftrace_record *rec = (void *)(ps->data + data->off); + + seq_printf(s, "%d %08lx %08lx %pf <- %pF\n", + pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip, + (void *)rec->ip, (void *)rec->parent_ip); + + return 0; +} + +static const struct seq_operations pstore_ftrace_seq_ops = { + .start = pstore_ftrace_seq_start, + .next = pstore_ftrace_seq_next, + .stop = pstore_ftrace_seq_stop, + .show = pstore_ftrace_seq_show, +}; + static ssize_t pstore_file_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct pstore_private *ps = file->private_data; + struct seq_file *sf = file->private_data; + struct pstore_private *ps = sf->private; + if (ps->type == PSTORE_TYPE_FTRACE) + return seq_read(file, userbuf, count, ppos); return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size); } +static int pstore_file_open(struct inode *inode, struct file *file) +{ + struct pstore_private *ps = inode->i_private; + struct seq_file *sf; + int err; + const struct seq_operations *sops = NULL; + + if (ps->type == PSTORE_TYPE_FTRACE) + sops = &pstore_ftrace_seq_ops; + + err = seq_open(file, sops); + if (err < 0) + return err; + + sf = file->private_data; + sf->private = ps; + + return 0; +} + +static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin) +{ + struct seq_file *sf = file->private_data; + + if (sf->op) + return seq_lseek(file, off, origin); + return default_llseek(file, off, origin); +} + static const struct file_operations pstore_file_operations = { - .open = simple_open, - .read = pstore_file_read, - .llseek = default_llseek, + .open = pstore_file_open, + .read = pstore_file_read, + .llseek = pstore_file_llseek, + .release = seq_release, }; /* @@ -215,6 +315,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, case PSTORE_TYPE_CONSOLE: sprintf(name, "console-%s", psname); break; + case PSTORE_TYPE_FTRACE: + sprintf(name, "ftrace-%s", psname); + break; case PSTORE_TYPE_MCE: sprintf(name, "mce-%s-%lld", psname, id); break; diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 3bde461c3f34..958c48d8905c 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -1,6 +1,49 @@ +#ifndef __PSTORE_INTERNAL_H__ +#define __PSTORE_INTERNAL_H__ + +#include + +#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB) +#define PSTORE_CPU_IN_IP 0x1 +#elif NR_CPUS <= 4 && defined(CONFIG_ARM) +#define PSTORE_CPU_IN_IP 0x3 +#endif + +struct pstore_ftrace_record { + unsigned long ip; + unsigned long parent_ip; +#ifndef PSTORE_CPU_IN_IP + unsigned int cpu; +#endif +}; + +static inline void +pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu) +{ +#ifndef PSTORE_CPU_IN_IP + rec->cpu = cpu; +#else + rec->ip |= cpu; +#endif +} + +static inline unsigned int +pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) +{ +#ifndef PSTORE_CPU_IN_IP + return rec->cpu; +#else + return rec->ip & PSTORE_CPU_IN_IP; +#endif +} + +extern struct pstore_info *psinfo; + extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(int); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, char *data, size_t size, struct timespec time, struct pstore_info *psi); extern int pstore_is_mounted(void); + +#endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index ef5ca8a0255c..29996e8793a7 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -61,7 +61,7 @@ static DECLARE_WORK(pstore_work, pstore_dowork); * calls to pstore_register() */ static DEFINE_SPINLOCK(pstore_lock); -static struct pstore_info *psinfo; +struct pstore_info *psinfo; static char *backend; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index b107484192fc..120443b0fda5 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -30,6 +30,7 @@ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, PSTORE_TYPE_MCE = 1, PSTORE_TYPE_CONSOLE = 2, + PSTORE_TYPE_FTRACE = 3, PSTORE_TYPE_UNKNOWN = 255 }; @@ -57,6 +58,14 @@ struct pstore_info { void *data; }; + +#ifdef CONFIG_PSTORE_FTRACE +extern void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip); +#else +static inline void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) +{ } +#endif + #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); #else -- cgit v1.2.3 From c2b7113261c5bb49031a15b833e59ea2d8ec4074 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:43 -0700 Subject: pstore/ram: Convert to write_buf callback Don't use pstore.buf directly, instead convert the code to write_buf callback which passes a pointer to a buffer as an argument. Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index b39aebbaeb89..74f4111bd0da 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -170,11 +170,12 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) return len; } -static int ramoops_pstore_write(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, - unsigned int part, - size_t size, struct pstore_info *psi) + +static int ramoops_pstore_write_buf(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, + const char *buf, size_t size, + struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; @@ -183,7 +184,7 @@ static int ramoops_pstore_write(enum pstore_type_id type, if (type == PSTORE_TYPE_CONSOLE) { if (!cxt->cprz) return -ENOMEM; - persistent_ram_write(cxt->cprz, cxt->pstore.buf, size); + persistent_ram_write(cxt->cprz, buf, size); return 0; } @@ -212,7 +213,7 @@ static int ramoops_pstore_write(enum pstore_type_id type, hlen = ramoops_write_kmsg_hdr(prz); if (size + hlen > prz->buffer_size) size = prz->buffer_size - hlen; - persistent_ram_write(prz, cxt->pstore.buf, size); + persistent_ram_write(prz, buf, size); cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt; @@ -250,7 +251,7 @@ static struct ramoops_context oops_cxt = { .name = "ramoops", .open = ramoops_pstore_open, .read = ramoops_pstore_read, - .write = ramoops_pstore_write, + .write_buf = ramoops_pstore_write_buf, .erase = ramoops_pstore_erase, }, }; -- cgit v1.2.3 From a694d1b5916a486ce25fb5f2b39f2627f7afd5f3 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:44 -0700 Subject: pstore/ram: Add ftrace messages handling The ftrace log size is configurable via ramoops.ftrace_size module option, and the log itself is available via /ftrace-ramoops file. Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- Documentation/ramoops.txt | 25 +++++++++++++++++++++++++ fs/pstore/ram.c | 37 +++++++++++++++++++++++++++++++++---- include/linux/pstore_ram.h | 1 + 3 files changed, 59 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt index 59a74a8ee2e5..197ad59ab9bf 100644 --- a/Documentation/ramoops.txt +++ b/Documentation/ramoops.txt @@ -94,3 +94,28 @@ timestamp and a new line. The dump then continues with the actual data. The dump data can be read from the pstore filesystem. The format for these files is "dmesg-ramoops-N", where N is the record number in memory. To delete a stored record from RAM, simply unlink the respective pstore file. + +5. Persistent function tracing + +Persistent function tracing might be useful for debugging software or hardware +related hangs. The functions call chain log is stored in a "ftrace-ramoops" +file. Here is an example of usage: + + # mount -t debugfs debugfs /sys/kernel/debug/ + # cd /sys/kernel/debug/tracing + # echo function > current_tracer + # echo 1 > options/func_pstore + # reboot -f + [...] + # mount -t pstore pstore /mnt/ + # tail /mnt/ftrace-ramoops + 0 ffffffff8101ea64 ffffffff8101bcda native_apic_mem_read <- disconnect_bsp_APIC+0x6a/0xc0 + 0 ffffffff8101ea44 ffffffff8101bcf6 native_apic_mem_write <- disconnect_bsp_APIC+0x86/0xc0 + 0 ffffffff81020084 ffffffff8101a4b5 hpet_disable <- native_machine_shutdown+0x75/0x90 + 0 ffffffff81005f94 ffffffff8101a4bb iommu_shutdown_noop <- native_machine_shutdown+0x7b/0x90 + 0 ffffffff8101a6a1 ffffffff8101a437 native_machine_emergency_restart <- native_machine_restart+0x37/0x40 + 0 ffffffff811f9876 ffffffff8101a73a acpi_reboot <- native_machine_emergency_restart+0xaa/0x1e0 + 0 ffffffff8101a514 ffffffff8101a772 mach_reboot_fixups <- native_machine_emergency_restart+0xe2/0x1e0 + 0 ffffffff811d9c54 ffffffff8101a7a0 __const_udelay <- native_machine_emergency_restart+0x110/0x1e0 + 0 ffffffff811d9c34 ffffffff811d9c80 __delay <- __const_udelay+0x30/0x40 + 0 ffffffff811d9d14 ffffffff811d9c3f delay_tsc <- __delay+0xf/0x20 diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 74f4111bd0da..1dd108e0cc60 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -45,6 +45,10 @@ static ulong ramoops_console_size = MIN_MEM_SIZE; module_param_named(console_size, ramoops_console_size, ulong, 0400); MODULE_PARM_DESC(console_size, "size of kernel console log"); +static ulong ramoops_ftrace_size = MIN_MEM_SIZE; +module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); +MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); + static ulong mem_address; module_param(mem_address, ulong, 0400); MODULE_PARM_DESC(mem_address, @@ -70,16 +74,19 @@ MODULE_PARM_DESC(ramoops_ecc, struct ramoops_context { struct persistent_ram_zone **przs; struct persistent_ram_zone *cprz; + struct persistent_ram_zone *fprz; phys_addr_t phys_addr; unsigned long size; size_t record_size; size_t console_size; + size_t ftrace_size; int dump_oops; int ecc_size; unsigned int max_dump_cnt; unsigned int dump_write_cnt; unsigned int dump_read_cnt; unsigned int console_read_cnt; + unsigned int ftrace_read_cnt; struct pstore_info pstore; }; @@ -137,6 +144,9 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 1, id, type, PSTORE_TYPE_CONSOLE, 0); + if (!prz) + prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, + 1, id, type, PSTORE_TYPE_FTRACE, 0); if (!prz) return 0; @@ -186,6 +196,11 @@ static int ramoops_pstore_write_buf(enum pstore_type_id type, return -ENOMEM; persistent_ram_write(cxt->cprz, buf, size); return 0; + } else if (type == PSTORE_TYPE_FTRACE) { + if (!cxt->fprz) + return -ENOMEM; + persistent_ram_write(cxt->fprz, buf, size); + return 0; } if (type != PSTORE_TYPE_DMESG) @@ -235,6 +250,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, case PSTORE_TYPE_CONSOLE: prz = cxt->cprz; break; + case PSTORE_TYPE_FTRACE: + prz = cxt->fprz; + break; default: return -EINVAL; } @@ -348,7 +366,8 @@ static int __devinit ramoops_probe(struct platform_device *pdev) if (cxt->max_dump_cnt) goto fail_out; - if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size)) { + if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && + !pdata->ftrace_size)) { pr_err("The memory size and the record/console size must be " "non-zero\n"); goto fail_out; @@ -357,18 +376,20 @@ static int __devinit ramoops_probe(struct platform_device *pdev) pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); pdata->record_size = rounddown_pow_of_two(pdata->record_size); pdata->console_size = rounddown_pow_of_two(pdata->console_size); + pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->console_size = pdata->console_size; + cxt->ftrace_size = pdata->ftrace_size; cxt->dump_oops = pdata->dump_oops; cxt->ecc_size = pdata->ecc_size; paddr = cxt->phys_addr; - dump_mem_sz = cxt->size - cxt->console_size; + dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size; err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); if (err) goto fail_out; @@ -377,9 +398,14 @@ static int __devinit ramoops_probe(struct platform_device *pdev) if (err) goto fail_init_cprz; - if (!cxt->przs && !cxt->cprz) { + err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size); + if (err) + goto fail_init_fprz; + + if (!cxt->przs && !cxt->cprz && !cxt->fprz) { pr_err("memory size too small, minimum is %lu\n", - cxt->console_size + cxt->record_size); + cxt->console_size + cxt->record_size + + cxt->ftrace_size); goto fail_cnt; } @@ -426,6 +452,8 @@ fail_clear: cxt->pstore.bufsize = 0; cxt->max_dump_cnt = 0; fail_cnt: + kfree(cxt->fprz); +fail_init_fprz: kfree(cxt->cprz); fail_init_cprz: ramoops_free_przs(cxt); @@ -480,6 +508,7 @@ static void ramoops_register_dummy(void) dummy_data->mem_address = mem_address; dummy_data->record_size = record_size; dummy_data->console_size = ramoops_console_size; + dummy_data->ftrace_size = ramoops_ftrace_size; dummy_data->dump_oops = dump_oops; /* * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index dcf805f56bc6..af848e1593b9 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -72,6 +72,7 @@ struct ramoops_platform_data { unsigned long mem_address; unsigned long record_size; unsigned long console_size; + unsigned long ftrace_size; int dump_oops; int ecc_size; }; -- cgit v1.2.3 From 597d92891b8859b4b4949fd08e25e60fc80ddaaf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:10 -0400 Subject: NFS: Split out NFS v2 inode operations This patch moves the NFS v2 file and directory inode functions into files that are only compiled whet CONFIG_NFS_V2 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 43 +++++++++---------------------------------- fs/nfs/file.c | 6 ------ fs/nfs/internal.h | 9 +++++++++ fs/nfs/proc.c | 21 +++++++++++++++++++++ include/linux/nfs_fs.h | 2 -- 5 files changed, 39 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b7136853ca9c..9ae329d62340 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -46,16 +46,6 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -static int nfs_create(struct inode *, struct dentry *, umode_t, bool); -static int nfs_mkdir(struct inode *, struct dentry *, umode_t); -static int nfs_rmdir(struct inode *, struct dentry *); -static int nfs_unlink(struct inode *, struct dentry *); -static int nfs_symlink(struct inode *, struct dentry *, const char *); -static int nfs_link(struct dentry *, struct inode *, struct dentry *); -static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); -static int nfs_rename(struct inode *, struct dentry *, - struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct page*); @@ -69,21 +59,6 @@ const struct file_operations nfs_dir_operations = { .fsync = nfs_fsync_dir, }; -const struct inode_operations nfs_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; @@ -1270,7 +1245,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_release = nfs_d_release, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) +struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1588,7 +1563,7 @@ out_error: * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, +int nfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct iattr attr; @@ -1613,7 +1588,7 @@ out_err: /* * See comments for nfs_proc_create regarding failed operations. */ -static int +int nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; @@ -1640,7 +1615,7 @@ out_err: /* * See comments for nfs_proc_create regarding failed operations. */ -static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; int error; @@ -1666,7 +1641,7 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry) d_delete(dentry); } -static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -1725,7 +1700,7 @@ out: * * If sillyrename() returns 0, we do nothing, otherwise we unlink. */ -static int nfs_unlink(struct inode *dir, struct dentry *dentry) +int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; int need_rehash = 0; @@ -1769,7 +1744,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct pagevec lru_pvec; struct page *page; @@ -1824,7 +1799,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym return 0; } -static int +int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; @@ -1869,7 +1844,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) * If these conditions are met, we can drop the dentries before doing * the rename. */ -static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 57a22a1533e2..7da8745e22ac 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,12 +41,6 @@ static const struct vm_operations_struct nfs_file_vm_ops; -const struct inode_operations nfs_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - #ifdef CONFIG_NFS_V3 const struct inode_operations nfs3_file_inode_operations = { .permission = nfs_permission, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7edc172c371e..35a8ffec69f6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -247,6 +247,15 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc); +struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); +int nfs_create(struct inode *, struct dentry *, umode_t, bool); +int nfs_mkdir(struct inode *, struct dentry *, umode_t); +int nfs_rmdir(struct inode *, struct dentry *); +int nfs_unlink(struct inode *, struct dentry *); +int nfs_symlink(struct inode *, struct dentry *, const char *); +int nfs_link(struct dentry *, struct inode *, struct dentry *); +int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c5ed1c0a8ab7..4d3356af3309 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -745,6 +745,27 @@ static int nfs_return_delegation(struct inode *inode) return 0; } +static const struct inode_operations nfs_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +static const struct inode_operations nfs_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b23cfc120edb..6c38bc9c0081 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -427,7 +427,6 @@ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ /* * linux/fs/nfs/file.c */ -extern const struct inode_operations nfs_file_inode_operations; #ifdef CONFIG_NFS_V3 extern const struct inode_operations nfs3_file_inode_operations; #endif /* CONFIG_NFS_V3 */ @@ -485,7 +484,6 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, /* * linux/fs/nfs/dir.c */ -extern const struct inode_operations nfs_dir_inode_operations; #ifdef CONFIG_NFS_V3 extern const struct inode_operations nfs3_dir_inode_operations; #endif /* CONFIG_NFS_V3 */ -- cgit v1.2.3 From ab96291ea16b6b9c76bfac35ccbb26a15ecb01ce Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:11 -0400 Subject: NFS: Split out NFS v3 inode operations This patch moves the NFS v3 file and directory inode functions into files that are only compiled whet CONFIG_NFS_V3 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 21 --------------------- fs/nfs/file.c | 12 ------------ fs/nfs/nfs3proc.c | 29 +++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 6 ------ 4 files changed, 29 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9ae329d62340..e75f2aaafadf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -63,27 +63,6 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_V3 */ - #ifdef CONFIG_NFS_V4 static int nfs_atomic_open(struct inode *, struct dentry *, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7da8745e22ac..76239178e959 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,18 +41,6 @@ static const struct vm_operations_struct nfs_file_vm_ops; -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_v3 */ - /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f580358cad62..65d23eb92fe0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -888,6 +888,35 @@ static int nfs3_return_delegation(struct inode *inode) return 0; } +static const struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + +static const struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c38bc9c0081..4b6043c20f77 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -427,9 +427,6 @@ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ /* * linux/fs/nfs/file.c */ -#ifdef CONFIG_NFS_V3 -extern const struct inode_operations nfs3_file_inode_operations; -#endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_file_operations; #ifdef CONFIG_NFS_V4 extern const struct file_operations nfs4_file_operations; @@ -484,9 +481,6 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, /* * linux/fs/nfs/dir.c */ -#ifdef CONFIG_NFS_V3 -extern const struct inode_operations nfs3_dir_inode_operations; -#endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_dir_operations; extern const struct dentry_operations nfs_dentry_operations; -- cgit v1.2.3 From 73a79706d7f197a428a43fbf335bbe75cdbc221f Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:12 -0400 Subject: NFS: Split out NFS v4 inode operations The NFS v4 file inode operations are already already in nfs4proc.c, so this patch just needs to move the directory operations to the same file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 33 +++------------------------------ fs/nfs/nfs4_fs.h | 5 ++++- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e75f2aaafadf..d49f1b9cd3fd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -63,33 +63,6 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -#ifdef CONFIG_NFS_V4 - -static int nfs_atomic_open(struct inode *, struct dentry *, - struct file *, unsigned, umode_t, - int *); -const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .atomic_open = nfs_atomic_open, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .getxattr = generic_getxattr, - .setxattr = generic_setxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -}; - -#endif /* CONFIG_NFS_V4 */ - static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) { struct nfs_open_dir_context *ctx; @@ -1352,9 +1325,9 @@ out: return err; } -static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned open_flags, - umode_t mode, int *opened) +int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3696ca7f5f4d..e2c4c72d3866 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -200,7 +200,10 @@ struct nfs4_state_maintenance_ops { }; extern const struct dentry_operations nfs4_dentry_operations; -extern const struct inode_operations nfs4_dir_inode_operations; + +/* dir.c */ +int nfs_atomic_open(struct inode *, struct dentry *, struct file *, + unsigned, umode_t, int *); /* write.c */ int nfs4_write_inode(struct inode *, struct writeback_control *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05801be4a180..5e373c30e8d4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6832,6 +6832,26 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #endif }; +const struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, +}; + static const struct inode_operations nfs4_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, -- cgit v1.2.3 From 129d1977ed39cbb4f091a518e4a12498c04f45ba Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:13 -0400 Subject: NFS: Create an init_nfs_v4() function I want to initialize all of NFS v4 in a single function that will eventually be used as the v4 module init function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/inode.c | 67 ++++++++++++++++++++++++++++++------------------------ fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4super.c | 23 +++++++++++++++++++ 4 files changed, 65 insertions(+), 31 deletions(-) create mode 100644 fs/nfs/nfs4super.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7ddd45d9f170..162a699134ca 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,7 +13,7 @@ nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - delegation.o idmap.o \ + nfs4super.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 28c9ebbe78a6..35f7e4bc680e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -1628,87 +1627,96 @@ static int __init init_nfs_fs(void) { int err; - err = nfs_idmap_init(); - if (err < 0) - goto out10; - err = nfs_dns_resolver_init(); if (err < 0) - goto out9; + goto out11; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out8; + goto out10; err = nfs_fscache_register(); if (err < 0) - goto out7; + goto out9; err = nfsiod_start(); if (err) - goto out6; + goto out8; err = nfs_fs_proc_init(); if (err) - goto out5; + goto out7; err = nfs_init_nfspagecache(); if (err) - goto out4; + goto out6; err = nfs_init_inodecache(); if (err) - goto out3; + goto out5; err = nfs_init_readpagecache(); if (err) - goto out2; + goto out4; err = nfs_init_writepagecache(); if (err) - goto out1; + goto out3; err = nfs_init_directcache(); if (err) - goto out0; + goto out2; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif + +#ifdef CONFIG_NFS_V4 + err = init_nfs_v4(); + if (err) + goto out1; +#endif + if ((err = register_nfs_fs()) != 0) - goto out; + goto out0; + return 0; -out: +out0: +#ifdef CONFIG_NFS_V4 + exit_nfs_v4(); +out1: +#endif #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out0: - nfs_destroy_writepagecache(); -out1: - nfs_destroy_readpagecache(); out2: - nfs_destroy_inodecache(); + nfs_destroy_writepagecache(); out3: - nfs_destroy_nfspagecache(); + nfs_destroy_readpagecache(); out4: - nfs_fs_proc_exit(); + nfs_destroy_inodecache(); out5: - nfsiod_stop(); + nfs_destroy_nfspagecache(); out6: - nfs_fscache_unregister(); + nfs_fs_proc_exit(); out7: - unregister_pernet_subsys(&nfs_net_ops); + nfsiod_stop(); out8: - nfs_dns_resolver_destroy(); + nfs_fscache_unregister(); out9: - nfs_idmap_quit(); + unregister_pernet_subsys(&nfs_net_ops); out10: + nfs_dns_resolver_destroy(); +out11: return err; } static void __exit exit_nfs_fs(void) { +#ifdef CONFIG_NFS_V4 + exit_nfs_v4(); +#endif nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); @@ -1717,7 +1725,6 @@ static void __exit exit_nfs_fs(void) nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); - nfs_idmap_quit(); #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e2c4c72d3866..1a6ed3f9a32a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -364,6 +364,10 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta extern const nfs4_stateid zero_stateid; +/* nfs4super.c */ +int init_nfs_v4(void); +void exit_nfs_v4(void); + /* nfs4xdr.c */ extern struct rpc_procinfo nfs4_procedures[]; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c new file mode 100644 index 000000000000..366e41459695 --- /dev/null +++ b/fs/nfs/nfs4super.c @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 Bryan Schumaker + */ +#include +#include + +int __init init_nfs_v4(void) +{ + int err; + + err = nfs_idmap_init(); + if (err) + goto out; + + return 0; +out: + return err; +} + +void __exit exit_nfs_v4(void) +{ + nfs_idmap_quit(); +} -- cgit v1.2.3 From 466bfe7f4a5bee4cdd73d3f6bd290173a8c75a40 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:14 -0400 Subject: NFS: Initialize v4 sysctls from nfs_init_v4() And split them out of the generic client into their own file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 7 +++++- fs/nfs/nfs4_fs.h | 15 ++++++++++++ fs/nfs/nfs4super.c | 9 +++++++ fs/nfs/nfs4sysctl.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/sysctl.c | 26 -------------------- 5 files changed, 98 insertions(+), 27 deletions(-) create mode 100644 fs/nfs/nfs4sysctl.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 162a699134ca..4a78e76440f8 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -17,7 +17,12 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o -nfs-$(CONFIG_SYSCTL) += sysctl.o + +ifeq ($(CONFIG_SYSCTL), y) +nfs-y += sysctl.o +nfs-$(CONFIG_NFS_V4) += nfs4sysctl.o +endif + nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1a6ed3f9a32a..b508fef1a32b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -368,6 +368,21 @@ extern const nfs4_stateid zero_stateid; int init_nfs_v4(void); void exit_nfs_v4(void); +/* nfs4sysctl.c */ +#ifdef CONFIG_SYSCTL +int nfs4_register_sysctl(void); +void nfs4_unregister_sysctl(void); +#else +static inline int nfs4_register_sysctl(void) +{ + return 0; +} + +static inline int nfs4_unregister_sysctl(void) +{ +} +#endif + /* nfs4xdr.c */ extern struct rpc_procinfo nfs4_procedures[]; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 366e41459695..70c394e75ca1 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -3,6 +3,8 @@ */ #include #include +#include +#include "nfs4_fs.h" int __init init_nfs_v4(void) { @@ -12,12 +14,19 @@ int __init init_nfs_v4(void) if (err) goto out; + err = nfs4_register_sysctl(); + if (err) + goto out1; + return 0; +out1: + nfs_idmap_quit(); out: return err; } void __exit exit_nfs_v4(void) { + nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c new file mode 100644 index 000000000000..5729bc8aa75d --- /dev/null +++ b/fs/nfs/nfs4sysctl.c @@ -0,0 +1,68 @@ +/* + * linux/fs/nfs/nfs4sysctl.c + * + * Sysctl interface to NFS v4 parameters + * + * Copyright (c) 2006 Trond Myklebust + */ +#include +#include +#include + +#include "callback.h" + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static struct ctl_table_header *nfs4_callback_sysctl_table; + +static ctl_table nfs4_cb_sysctls[] = { + { + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, + { + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_dir[] = { + { + .procname = "nfs", + .mode = 0555, + .child = nfs4_cb_sysctls, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = nfs4_cb_sysctl_dir, + }, + { } +}; + +int nfs4_register_sysctl(void) +{ + nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root); + if (nfs4_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs4_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs4_callback_sysctl_table); + nfs4_callback_sysctl_table = NULL; +} diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b20..6b3f2535a3ec 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -9,37 +9,11 @@ #include #include #include -#include -#include #include -#include "callback.h" - -#ifdef CONFIG_NFS_V4 -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; -#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { -#ifdef CONFIG_NFS_V4 - { - .procname = "nfs_callback_tcpport", - .data = &nfs_callback_set_tcpport, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = (int *)&nfs_set_port_min, - .extra2 = (int *)&nfs_set_port_max, - }, - { - .procname = "idmap_cache_timeout", - .data = &nfs_idmap_cache_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#endif { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, -- cgit v1.2.3 From ce4ef7c0a8a0594d7b9d088d73866a4389402a7e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:15 -0400 Subject: NFS: Split out NFS v4 file operations This patch moves the NFS v4 file functions into a new file that is only compiled when CONFIG_NFS_V4 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/file.c | 151 ++++++------------------------------------------------ fs/nfs/internal.h | 17 ++++++ fs/nfs/nfs4file.c | 126 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 136 deletions(-) create mode 100644 fs/nfs/nfs4file.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4a78e76440f8..e882a389b2ed 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,7 +13,7 @@ nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - nfs4super.o delegation.o idmap.o \ + nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 76239178e959..70d124a61b98 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -35,7 +35,6 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" -#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -46,7 +45,7 @@ static const struct vm_operations_struct nfs_file_vm_ops; # define IS_SWAPFILE(inode) (0) #endif -static int nfs_check_flags(int flags) +int nfs_check_flags(int flags) { if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) return -EINVAL; @@ -75,7 +74,7 @@ nfs_file_open(struct inode *inode, struct file *filp) return res; } -static int +int nfs_file_release(struct inode *inode, struct file *filp) { dprintk("NFS: release(%s/%s)\n", @@ -117,7 +116,7 @@ force_reval: return __nfs_revalidate_inode(server, inode); } -static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -142,7 +141,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) /* * Flush all dirty pages, and check for write errors. */ -static int +int nfs_file_flush(struct file *file, fl_owner_t id) { struct dentry *dentry = file->f_path.dentry; @@ -167,7 +166,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) return vfs_fsync(file, 0); } -static ssize_t +ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -191,7 +190,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, return result; } -static ssize_t +ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) @@ -213,7 +212,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, return res; } -static int +int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { struct dentry *dentry = file->f_path.dentry; @@ -246,7 +245,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * nfs_file_write() that a write error occurred, and hence cause it to * fall back to doing a synchronous write. */ -static int +int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; @@ -561,8 +560,8 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) return 0; } -static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; @@ -613,9 +612,9 @@ out_swapfile: goto out; } -static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) +ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -767,7 +766,7 @@ out: /* * Lock a (portion of) a file */ -static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; @@ -807,7 +806,7 @@ out_err: /* * Lock a (portion of) a file */ -static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int is_local = 0; @@ -837,7 +836,7 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) * There is no protocol support for leases, so we have no way to implement * them correctly in the face of opens by other clients. */ -static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { dprintk("NFS: setlease(%s/%s, arg=%ld)\n", file->f_path.dentry->d_parent->d_name.name, @@ -863,121 +862,3 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; - -#ifdef CONFIG_NFS_V4 -static int -nfs4_file_open(struct inode *inode, struct file *filp) -{ - struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; - struct dentry *parent = NULL; - struct inode *dir; - unsigned openflags = filp->f_flags; - struct iattr attr; - int err; - - BUG_ON(inode != dentry->d_inode); - /* - * If no cached dentry exists or if it's negative, NFSv4 handled the - * opens in ->lookup() or ->create(). - * - * We only get this far for a cached positive dentry. We skipped - * revalidation, so handle it here by dropping the dentry and returning - * -EOPENSTALE. The VFS will retry the lookup/create/open. - */ - - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); - - if ((openflags & O_ACCMODE) == 3) - openflags--; - - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - parent = dget_parent(dentry); - dir = parent->d_inode; - - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); - err = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - switch (err) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; - - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs_file_set_open_context(filp, ctx); - err = 0; - -out_put_ctx: - put_nfs_open_context(ctx); -out: - dput(parent); - return err; - -out_drop: - d_drop(dentry); - err = -EOPENSTALE; - goto out_put_ctx; -} - -static int -nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - int ret; - struct inode *inode = file->f_path.dentry->d_inode; - - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -const struct file_operations nfs4_file_operations = { - .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, - .mmap = nfs_file_mmap, - .open = nfs4_file_open, - .flush = nfs_file_flush, - .release = nfs_file_release, - .fsync = nfs4_file_fsync, - .lock = nfs_lock, - .flock = nfs_flock, - .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, - .check_flags = nfs_check_flags, - .setlease = nfs_setlease, -}; -#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 35a8ffec69f6..ca7200a53caf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -257,6 +257,23 @@ int nfs_link(struct dentry *, struct inode *, struct dentry *); int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +/* file.c */ +int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); +loff_t nfs_file_llseek(struct file *, loff_t, int); +int nfs_file_flush(struct file *, fl_owner_t); +ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, + size_t, unsigned int); +int nfs_file_mmap(struct file *, struct vm_area_struct *); +ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +int nfs_file_release(struct inode *, struct file *); +int nfs_lock(struct file *, int, struct file_lock *); +int nfs_flock(struct file *, int, struct file_lock *); +ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, + size_t, unsigned int); +int nfs_check_flags(int); +int nfs_setlease(struct file *, long, struct file_lock **); + /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c new file mode 100644 index 000000000000..acb65e7887f8 --- /dev/null +++ b/fs/nfs/nfs4file.c @@ -0,0 +1,126 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + */ +#include +#include "internal.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_FILE + +static int +nfs4_file_open(struct inode *inode, struct file *filp) +{ + struct nfs_open_context *ctx; + struct dentry *dentry = filp->f_path.dentry; + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; + struct iattr attr; + int err; + + BUG_ON(inode != dentry->d_inode); + /* + * If no cached dentry exists or if it's negative, NFSv4 handled the + * opens in ->lookup() or ->create(). + * + * We only get this far for a cached positive dentry. We skipped + * revalidation, so handle it here by dropping the dentry and returning + * -EOPENSTALE. The VFS will retry the lookup/create/open. + */ + + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((openflags & O_ACCMODE) == 3) + openflags--; + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + parent = dget_parent(dentry); + dir = parent->d_inode; + + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + switch (err) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_file_set_open_context(filp, ctx); + err = 0; + +out_put_ctx: + put_nfs_open_context(ctx); +out: + dput(parent); + return err; + +out_drop: + d_drop(dentry); + err = -EOPENSTALE; + goto out_put_ctx; +} + +static int +nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + + return ret; +} + +const struct file_operations nfs4_file_operations = { + .llseek = nfs_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, + .mmap = nfs_file_mmap, + .open = nfs4_file_open, + .flush = nfs_file_flush, + .release = nfs_file_release, + .fsync = nfs4_file_fsync, + .lock = nfs_lock, + .flock = nfs_flock, + .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, + .check_flags = nfs_check_flags, + .setlease = nfs_setlease, +}; -- cgit v1.2.3 From a38a9eac75f0d09f1941a6e85e291c8e96bc8375 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:16 -0400 Subject: NFS: Move the v4 getroot code to nfs4getroot.c Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/getroot.c | 50 -------------------------------------------------- fs/nfs/nfs4getroot.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 51 deletions(-) create mode 100644 fs/nfs/nfs4getroot.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index e882a389b2ed..ec13afe2619a 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,7 +15,7 @@ nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o + nfs4namespace.o nfs4getroot.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o ifeq ($(CONFIG_SYSCTL), y) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a67990f90bd7..4654ced096a6 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -23,21 +23,15 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include -#include "nfs4_fs.h" -#include "delegation.h" -#include "internal.h" - #define NFSDBG_FACILITY NFSDBG_CLIENT /* @@ -135,47 +129,3 @@ out: nfs_free_fattr(fsinfo.fattr); return ret; } - -#ifdef CONFIG_NFS_V4 - -int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) -{ - struct nfs_fsinfo fsinfo; - int ret = -ENOMEM; - - dprintk("--> nfs4_get_rootfh()\n"); - - fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) - goto out; - - /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); - if (ret < 0) { - dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); - goto out; - } - - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) - || !S_ISDIR(fsinfo.fattr->mode)) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot encountered non-directory\n"); - ret = -ENOTDIR; - goto out; - } - - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); -out: - nfs_free_fattr(fsinfo.fattr); - dprintk("<-- nfs4_get_rootfh() = %d\n", ret); - return ret; -} - -#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c new file mode 100644 index 000000000000..6a83780e0ce6 --- /dev/null +++ b/fs/nfs/nfs4getroot.c @@ -0,0 +1,49 @@ +/* +* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. +* Written by David Howells (dhowells@redhat.com) +*/ + +#include +#include "nfs4_fs.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) +{ + struct nfs_fsinfo fsinfo; + int ret = -ENOMEM; + + dprintk("--> nfs4_get_rootfh()\n"); + + fsinfo.fattr = nfs_alloc_fattr(); + if (fsinfo.fattr == NULL) + goto out; + + /* Start by getting the root filehandle from the server */ + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); + goto out; + } + + if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) + || !S_ISDIR(fsinfo.fattr->mode)) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot encountered non-directory\n"); + ret = -ENOTDIR; + goto out; + } + + if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot obtained referral\n"); + ret = -EREMOTE; + goto out; + } + + memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); +out: + nfs_free_fattr(fsinfo.fattr); + dprintk("<-- nfs4_get_rootfh() = %d\n", ret); + return ret; +} -- cgit v1.2.3 From 428360d77c801932e4b28f15160aebbdb5f5a03e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:17 -0400 Subject: NFS: Initialize the NFS v4 client from init_nfs_v4() And split these functions out of the generic client into a v4 specific file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 137 ++----------------------------------------------- fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 135 deletions(-) create mode 100644 fs/nfs/nfs4client.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ec13afe2619a..0b96c2038346 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,7 +15,7 @@ nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o + nfs4namespace.o nfs4getroot.o nfs4client.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o ifeq ($(CONFIG_SYSCTL), y) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 254719c4a575..5664c7bbe50d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -662,9 +662,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, /* * Create an RPC client handle */ -static int nfs_create_rpc_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - rpc_authflavor_t flavor) +int nfs_create_rpc_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + rpc_authflavor_t flavor) { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { @@ -1304,137 +1304,6 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, } #endif /* CONFIG_NFS_V4_1 */ -/* - * Initialize the NFS4 callback service - */ -static int nfs4_init_callback(struct nfs_client *clp) -{ - int error; - - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; - - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); - return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); - } - return 0; -} - -/* - * Initialize the minor version specific parts of an NFS4 client record - */ -static int nfs4_init_client_minor_version(struct nfs_client *clp) -{ -#if defined(CONFIG_NFS_V4_1) - if (clp->cl_mvops->minor_version) { - struct nfs4_session *session = NULL; - /* - * Create the session and mark it expired. - * When a SEQUENCE operation encounters the expired session - * it will do session recovery to initialize it. - */ - session = nfs4_alloc_session(clp); - if (!session) - return -ENOMEM; - - clp->cl_session = session; - /* - * The create session reply races with the server back - * channel probe. Mark the client NFS_CS_SESSION_INITING - * so that the client back channel can find the - * nfs_client struct - */ - nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); - } -#endif /* CONFIG_NFS_V4_1 */ - - return nfs4_init_callback(clp); -} - -/** - * nfs4_init_client - Initialise an NFS4 client record - * - * @clp: nfs_client to initialise - * @timeparms: timeout parameters for underlying RPC transport - * @ip_addr: callback IP address in presentation format - * @authflavor: authentication flavor for underlying RPC transport - * - * Returns pointer to an NFS client, or an ERR_PTR value. - */ -struct nfs_client *nfs4_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour) -{ - char buf[INET6_ADDRSTRLEN + 1]; - int error; - - if (clp->cl_cons_state == NFS_CS_READY) { - /* the client is initialised already */ - dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); - return clp; - } - - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v4_clientops; - - __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - error = nfs_create_rpc_client(clp, timeparms, authflavour); - if (error < 0) - goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); - - error = nfs_idmap_new(clp); - if (error < 0) { - dprintk("%s: failed to create idmapper. Error = %d\n", - __func__, error); - goto error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - - error = nfs4_init_client_minor_version(clp); - if (error < 0) - goto error; - - if (!nfs4_has_session(clp)) - nfs_mark_client_ready(clp, NFS_CS_READY); - return clp; - -error: - nfs_mark_client_ready(clp, error); - nfs_put_client(clp); - dprintk("<-- nfs4_init_client() = xerror %d\n", error); - return ERR_PTR(error); -} - /* * Set up an NFS4 client */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ca7200a53caf..10df28d14f83 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -149,6 +149,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); +int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c new file mode 100644 index 000000000000..c5234b589907 --- /dev/null +++ b/fs/nfs/nfs4client.c @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#include +#include +#include +#include +#include +#include "internal.h" +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +/* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_mvops->minor_version) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/** + * nfs4_init_client - Initialise an NFS4 client record + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: callback IP address in presentation format + * @authflavor: authentication flavor for underlying RPC transport + * + * Returns pointer to an NFS client, or an ERR_PTR value. + */ +struct nfs_client *nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour) +{ + char buf[INET6_ADDRSTRLEN + 1]; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return clp; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + error = nfs_create_rpc_client(clp, timeparms, authflavour); + if (error < 0) + goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (error < 0) + goto error; + error = rpc_ntop(sap, buf, sizeof(buf)); + if (error < 0) + goto error; + ip_addr = (const char *)buf; + } + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, error); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); + return clp; + +error: + nfs_mark_client_ready(clp, error); + nfs_put_client(clp); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return ERR_PTR(error); +} -- cgit v1.2.3 From fcf10398f641c4450119f8a4cc27e9e584edb010 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:18 -0400 Subject: NFS: Split out NFS v4 server creating code These functions are specific to NFS v4 and can be moved to nfs4client.c to keep them out of the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 457 ++-------------------------------------------------- fs/nfs/internal.h | 25 +++ fs/nfs/nfs4client.c | 428 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 462 insertions(+), 448 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5664c7bbe50d..0d50629d9e25 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -81,11 +81,6 @@ retry: } #endif /* CONFIG_NFS_V4 */ -/* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; - /* * RPC cruft for NFS */ @@ -130,17 +125,6 @@ const struct rpc_program nfsacl_program = { }; #endif /* CONFIG_NFS_V3_ACL */ -struct nfs_client_initdata { - unsigned long init_flags; - const char *hostname; - const struct sockaddr *addr; - size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; - int proto; - u32 minorversion; - struct net *net; -}; - /* * Allocate a shared client record * @@ -282,13 +266,6 @@ static void pnfs_init_server(struct nfs_server *server) rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); } -static void nfs4_destroy_server(struct nfs_server *server) -{ - nfs_server_return_all_delegations(server); - unset_pnfs_layoutdriver(server); - nfs4_purge_state_owners(server); -} - #else void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -426,8 +403,8 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. */ -static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, - const struct sockaddr *sa2) +int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) { if (sa1->sa_family != sa2->sa_family) return 0; @@ -461,33 +438,6 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } -#if defined(CONFIG_NFS_V4_1) -/* Common match routine for v4.0 and v4.1 callback services */ -static bool nfs4_cb_match_client(const struct sockaddr *addr, - struct nfs_client *clp, u32 minorversion) -{ - struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; - - /* Don't match clients that failed to initialise */ - if (!(clp->cl_cons_state == NFS_CS_READY || - clp->cl_cons_state == NFS_CS_SESSION_INITING)) - return false; - - smp_rmb(); - - /* Match the version and minorversion */ - if (clp->rpc_ops->version != 4 || - clp->cl_minorversion != minorversion) - return false; - - /* Match only the IP address, not the port number */ - if (!nfs_sockaddr_match_ipaddr(addr, clap)) - return false; - - return true; -} -#endif /* CONFIG_NFS_V4_1 */ - /* * Find an nfs_client on the list that matches the initialisation data * that is supplied. @@ -566,7 +516,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -static struct nfs_client * +struct nfs_client * nfs_get_client(const struct nfs_client_initdata *cl_init, const struct rpc_timeout *timeparms, const char *ip_addr, @@ -621,7 +571,7 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) /* * Initialise the timeout values for a connection */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, +void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) { to->to_initval = timeo * HZ / 10; @@ -781,7 +731,7 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) /* * Create a general RPC client */ -static int nfs_init_server_rpcclient(struct nfs_server *server, +int nfs_init_server_rpcclient(struct nfs_server *server, const struct rpc_timeout *timeo, rpc_authflavor_t pseudoflavour) { @@ -1014,7 +964,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, /* * Probe filesystem information, including the FSID on v2/v3 */ -static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -1058,7 +1008,7 @@ out_error: /* * Copy useful information when duplicating a server record */ -static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; target->rsize = source->rsize; @@ -1071,7 +1021,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve target->options = source->options; } -static void nfs_server_insert_lists(struct nfs_server *server) +void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); @@ -1105,7 +1055,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) /* * Allocate and initialise a server record */ -static struct nfs_server *nfs_alloc_server(void) +struct nfs_server *nfs_alloc_server(void) { struct nfs_server *server; @@ -1238,391 +1188,6 @@ error: return ERR_PTR(error); } -#ifdef CONFIG_NFS_V4 -/* - * NFSv4.0 callback thread helper - * - * Find a client by callback identifier - */ -struct nfs_client * -nfs4_find_client_ident(struct net *net, int cb_ident) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - clp = idr_find(&nn->cb_ident_idr, cb_ident); - if (clp) - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; -} - -#if defined(CONFIG_NFS_V4_1) -/* - * NFSv4.1 callback thread helper - * For CB_COMPOUND calls, find a client by IP address, protocol version, - * minorversion, and sessionID - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 1) == false) - continue; - - if (!nfs4_has_session(clp)) - continue; - - /* Match sessionid*/ - if (memcmp(clp->cl_session->sess_id.data, - sid->data, NFS4_MAX_SESSIONID_LEN) != 0) - continue; - - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; -} - -#else /* CONFIG_NFS_V4_1 */ - -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - return NULL; -} -#endif /* CONFIG_NFS_V4_1 */ - -/* - * Set up an NFS4 client - */ -static int nfs4_set_client(struct nfs_server *server, - const char *hostname, - const struct sockaddr *addr, - const size_t addrlen, - const char *ip_addr, - rpc_authflavor_t authflavour, - int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) -{ - struct nfs_client_initdata cl_init = { - .hostname = hostname, - .addr = addr, - .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = proto, - .minorversion = minorversion, - .net = net, - }; - struct nfs_client *clp; - int error; - - dprintk("--> nfs4_set_client()\n"); - - if (server->flags & NFS_MOUNT_NORESVPORT) - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - - /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); - if (IS_ERR(clp)) { - error = PTR_ERR(clp); - goto error; - } - - /* - * Query for the lease time on clientid setup or renewal - * - * Note that this will be set on nfs_clients that were created - * only for the DS role and did not set this bit, but now will - * serve a dual role. - */ - set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); - - server->nfs_client = clp; - dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); - return 0; -error: - dprintk("<-- nfs4_set_client() = xerror %d\n", error); - return error; -} - -/* - * Set up a pNFS Data Server client. - * - * Return any existing nfs_client that matches server address,port,version - * and minorversion. - * - * For a new nfs_client, use a soft mount (default), a low retrans and a - * low timeout interval so that if a connection is lost, we retry through - * the MDS. - */ -struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, - const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) -{ - struct nfs_client_initdata cl_init = { - .addr = ds_addr, - .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = ds_proto, - .minorversion = mds_clp->cl_minorversion, - .net = mds_clp->cl_net, - }; - struct rpc_timeout ds_timeout; - struct nfs_client *clp; - - /* - * Set an authflavor equual to the MDS value. Use the MDS nfs_client - * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS - * (section 13.1 RFC 5661). - */ - nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, - mds_clp->cl_rpcclient->cl_auth->au_flavor); - - dprintk("<-- %s %p\n", __func__, clp); - return clp; -} -EXPORT_SYMBOL_GPL(nfs4_set_ds_client); - -/* - * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. - */ -static void nfs4_session_set_rwsize(struct nfs_server *server) -{ -#ifdef CONFIG_NFS_V4_1 - struct nfs4_session *sess; - u32 server_resp_sz; - u32 server_rqst_sz; - - if (!nfs4_has_session(server->nfs_client)) - return; - sess = server->nfs_client->cl_session; - server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; - server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - - if (server->rsize > server_resp_sz) - server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) - server->wsize = server_rqst_sz; -#endif /* CONFIG_NFS_V4_1 */ -} - -static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh) -{ - struct nfs_fattr *fattr; - int error; - - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - /* data servers support only a subset of NFSv4.1 */ - if (is_ds_only_client(server->nfs_client)) - return -EPROTONOSUPPORT; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* We must ensure the session is initialised first */ - error = nfs4_init_session(server); - if (error < 0) - goto out; - - /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); - if (error < 0) - goto out; - - dprintk("Server FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); - - nfs4_session_set_rwsize(server); - - error = nfs_probe_fsinfo(server, mntfh, fattr); - if (error < 0) - goto out; - - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - nfs_server_insert_lists(server); - server->mount_time = jiffies; - server->destroy = nfs4_destroy_server; -out: - nfs_free_fattr(fattr); - return error; -} - -/* - * Create a version 4 volume record - */ -static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) -{ - struct rpc_timeout timeparms; - int error; - - dprintk("--> nfs4_init_server()\n"); - - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - - /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; - server->options = data->options; - - /* Get a client record */ - error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->auth_flavors[0], - data->nfs_server.protocol, - &timeparms, - data->minorversion, - data->net); - if (error < 0) - goto error; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - - server->port = data->nfs_server.port; - - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); - -error: - /* Done */ - dprintk("<-- nfs4_init_server() = %d\n", error); - return error; -} - -/* - * Create a version 4 volume record - * - keyed on server and FSID - */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) -{ - struct nfs_server *server; - int error; - - dprintk("--> nfs4_create_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - /* set up the general RPC client */ - error = nfs4_init_server(server, data); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs4_create_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_server() = error %d\n", error); - return ERR_PTR(error); -} - -/* - * Create an NFS4 referral server record - */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) -{ - struct nfs_client *parent_client; - struct nfs_server *server, *parent_server; - int error; - - dprintk("--> nfs4_create_referral_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - parent_server = NFS_SB(data->sb); - parent_client = parent_server->nfs_client; - - /* Initialise the client representation from the parent server */ - nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; - - /* Get a client representation. - * Note: NFSv4 always uses TCP, */ - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, - parent_client->cl_ipaddr, - data->authflavor, - rpc_protocol(parent_server->client), - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_net); - if (error < 0) - goto error; - - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs_create_referral_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_referral_server() = error %d\n", error); - return ERR_PTR(error); -} - -#endif /* CONFIG_NFS_V4 */ - /* * Clone an NFS2, NFS3 or NFS4 server record */ @@ -1972,7 +1537,3 @@ void nfs_fs_proc_exit(void) } #endif /* CONFIG_PROC_FS */ - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 10df28d14f83..b4a35705246c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -85,6 +85,17 @@ struct nfs_clone_mount { */ #define NFS_MAX_READDIR_PAGES 8 +struct nfs_client_initdata { + unsigned long init_flags; + const char *hostname; + const struct sockaddr *addr; + size_t addrlen; + const struct nfs_rpc_ops *rpc_ops; + int proto; + u32 minorversion; + struct net *net; +}; + /* * In-kernel mount arguments */ @@ -150,6 +161,16 @@ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, + const struct rpc_timeout *, const char *, + rpc_authflavor_t); +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); +void nfs_server_insert_lists(struct nfs_server *); +void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); +int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, + rpc_authflavor_t); +struct nfs_server *nfs_alloc_server(void); +void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); @@ -191,6 +212,10 @@ static inline void nfs_fs_proc_exit(void) } #endif +#ifdef CONFIG_NFS_V4_1 +int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c5234b589907..a71d95ecbea9 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -2,16 +2,26 @@ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ +#include #include #include +#include #include #include #include #include "internal.h" #include "callback.h" +#include "delegation.h" +#include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT +/* + * Turn off NFSv4 uid/gid mapping when using AUTH_SYS + */ +static bool nfs4_disable_idmapping = true; + /* * Initialize the NFS4 callback service */ @@ -142,3 +152,421 @@ error: dprintk("<-- nfs4_init_client() = xerror %d\n", error); return ERR_PTR(error); } + +static void nfs4_destroy_server(struct nfs_server *server) +{ + nfs_server_return_all_delegations(server); + unset_pnfs_layoutdriver(server); + nfs4_purge_state_owners(server); +} + +/* + * NFSv4.0 callback thread helper + * + * Find a client by callback identifier + */ +struct nfs_client * +nfs4_find_client_ident(struct net *net, int cb_ident) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + clp = idr_find(&nn->cb_ident_idr, cb_ident); + if (clp) + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; +} + +#if defined(CONFIG_NFS_V4_1) +/* Common match routine for v4.0 and v4.1 callback services */ +static bool nfs4_cb_match_client(const struct sockaddr *addr, + struct nfs_client *clp, u32 minorversion) +{ + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise */ + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) + return false; + + smp_rmb(); + + /* Match the version and minorversion */ + if (clp->rpc_ops->version != 4 || + clp->cl_minorversion != minorversion) + return false; + + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) + return false; + + return true; +} + +/* + * NFSv4.1 callback thread helper + * For CB_COMPOUND calls, find a client by IP address, protocol version, + * minorversion, and sessionID + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 1) == false) + continue; + + if (!nfs4_has_session(clp)) + continue; + + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; + } + spin_unlock(&nn->nfs_client_lock); + return NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + return NULL; +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, + const struct sockaddr *addr, + const size_t addrlen, + const char *ip_addr, + rpc_authflavor_t authflavour, + int proto, const struct rpc_timeout *timeparms, + u32 minorversion, struct net *net) +{ + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = addr, + .addrlen = addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = proto, + .minorversion = minorversion, + .net = net, + }; + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + if (server->flags & NFS_MOUNT_NORESVPORT) + set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + + /* + * Query for the lease time on clientid setup or renewal + * + * Note that this will be set on nfs_clients that were created + * only for the DS role and did not set this bit, but now will + * serve a dual role. + */ + set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Set up a pNFS Data Server client. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = ds_proto, + .minorversion = mds_clp->cl_minorversion, + .net = mds_clp->cl_net, + }; + struct rpc_timeout ds_timeout; + struct nfs_client *clp; + + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS + * (section 13.1 RFC 5661). + */ + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + mds_clp->cl_rpcclient->cl_auth->au_flavor); + + dprintk("<-- %s %p\n", __func__, clp); + return clp; +} +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + struct nfs4_session *sess; + u32 server_resp_sz; + u32 server_rqst_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + sess = server->nfs_client->cl_session; + server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; + server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; + + if (server->rsize > server_resp_sz) + server->rsize = server_resp_sz; + if (server->wsize > server_rqst_sz) + server->wsize = server_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* data servers support only a subset of NFSv4.1 */ + if (is_ds_only_client(server->nfs_client)) + return -EPROTONOSUPPORT; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server); + if (error < 0) + goto out; + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_get_rootfh(server, mntfh); + if (error < 0) + goto out; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, fattr); + if (error < 0) + goto out; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + nfs_server_insert_lists(server); + server->mount_time = jiffies; + server->destroy = nfs4_destroy_server; +out: + nfs_free_fattr(fattr); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) +{ + struct rpc_timeout timeparms; + int error; + + dprintk("--> nfs4_init_server()\n"); + + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + server->options = data->options; + + /* Get a client record */ + error = nfs4_set_client(server, + data->nfs_server.hostname, + (const struct sockaddr *)&data->nfs_server.address, + data->nfs_server.addrlen, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + &timeparms, + data->minorversion, + data->net); + if (error < 0) + goto error; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + server->port = data->nfs_server.port; + + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + +error: + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* set up the general RPC client */ + error = nfs4_init_server(server, data); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *mntfh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, + data->addr, + data->addrlen, + parent_client->cl_ipaddr, + data->authflavor, + rpc_protocol(parent_server->client), + parent_server->client->cl_timeout, + parent_client->cl_mvops->minor_version, + parent_client->cl_net); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); -- cgit v1.2.3 From 3cadf4b864cab9d19b935289c004799d1065cd03 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:19 -0400 Subject: NFS: Create a single nfs_clone_super() function v2 and v3 shared a function for this, but v4 implemented something only slightly different. Might as well share code whenever possible... Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9bad4e753066..ca3c0e8cf774 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2218,7 +2218,7 @@ static void nfs_fill_super(struct super_block *sb, } /* - * Finish setting up a cloned NFS2/3 superblock + * Finish setting up a cloned NFS2/3/4 superblock */ static void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) @@ -2229,16 +2229,17 @@ static void nfs_clone_super(struct super_block *sb, sb->s_blocksize_bits = old_sb->s_blocksize_bits; sb->s_blocksize = old_sb->s_blocksize; sb->s_maxbytes = old_sb->s_maxbytes; + sb->s_xattr = old_sb->s_xattr; + sb->s_op = old_sb->s_op; + sb->s_time_gran = 1; - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ sb->s_flags |= MS_POSIXACL; - sb->s_time_gran = 1; } - sb->s_op = old_sb->s_op; nfs_initialise_sb(sb); } @@ -2579,27 +2580,6 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, #ifdef CONFIG_NFS_V4 -/* - * Finish setting up a cloned NFS4 superblock - */ -static void nfs4_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_time_gran = 1; - sb->s_op = old_sb->s_op; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = old_sb->s_xattr; - nfs_initialise_sb(sb); -} - /* * Set up an NFS4 superblock */ @@ -2883,7 +2863,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs4_clone_super, + .fill_super = nfs_clone_super, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; -- cgit v1.2.3 From fbdefd6442811392e857721573b63a51d1149cc8 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:20 -0400 Subject: NFS: Split out the NFS v4 filesystem types This allows me to move the v4 mounting and unmounting functions out of the generic client and into a file that is only compiled when CONFIG_NFS_V4 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 29 ++++ fs/nfs/nfs4_fs.h | 2 + fs/nfs/nfs4super.c | 328 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/super.c | 395 +++-------------------------------------------------- 4 files changed, 381 insertions(+), 373 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b4a35705246c..cfafd13b6fe9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -153,6 +153,14 @@ struct nfs_mount_request { struct net *net; }; +struct nfs_mount_info { + void (*fill_super)(struct super_block *, struct nfs_mount_info *); + int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); + struct nfs_parsed_mount_data *parsed; + struct nfs_clone_mount *cloned; + struct nfs_fh *mntfh; +}; + extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); @@ -318,6 +326,16 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +void nfs_initialise_sb(struct super_block *); +int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct file_system_type *, struct nfs_server *, + int, const char *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); +struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, + const char *, struct nfs_mount_info *); +void nfs_kill_super(struct super_block *); +void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -364,6 +382,17 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); +/* super.c */ +void nfs_clone_super(struct super_block *, struct nfs_mount_info *); +void nfs_umount_begin(struct super_block *); +int nfs_statfs(struct dentry *, struct kstatfs *); +int nfs_show_options(struct seq_file *, struct dentry *); +int nfs_show_devname(struct seq_file *, struct dentry *); +int nfs_show_path(struct seq_file *, struct dentry *); +int nfs_show_stats(struct seq_file *, struct dentry *); +void nfs_put_super(struct super_block *); +int nfs_remount(struct super_block *sb, int *flags, char *raw_data); + /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b508fef1a32b..b1ecacd8784a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -365,6 +365,8 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta extern const nfs4_stateid zero_stateid; /* nfs4super.c */ +struct nfs_mount_info; +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 70c394e75ca1..2af26913884f 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -2,10 +2,331 @@ * Copyright (c) 2012 Bryan Schumaker */ #include +#include #include +#include #include +#include "internal.h" #include "nfs4_fs.h" +#define NFSDBG_FACILITY NFSDBG_VFS + +static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_xdev_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_xdev_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static const struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs4_write_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .evict_inode = nfs4_evict_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_devname = nfs_show_devname, + .show_path = nfs_show_path, + .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, +}; + +/* + * Set up an NFS4 superblock + */ +static void nfs4_fill_super(struct super_block *sb, + struct nfs_mount_info *mount_info) +{ + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + /* + * The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_xattr = nfs4_xattr_handlers; + nfs_initialise_sb(sb); +} + +/* + * Get the superblock for the NFS4 root partition + */ +static struct dentry * +nfs4_remote_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *info) +{ + struct nfs_mount_info *mount_info = info; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + mount_info->fill_super = nfs4_fill_super; + mount_info->set_security = nfs_set_sb_security; + + /* Get a volume representation */ + server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + +out: + return mntroot; +} + +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 5; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +struct nfs_referral_count { + struct list_head list; + const struct task_struct *task; + unsigned int referral_count; +}; + +static LIST_HEAD(nfs_referral_count_list); +static DEFINE_SPINLOCK(nfs_referral_count_list_lock); + +static struct nfs_referral_count *nfs_find_referral_count(void) +{ + struct nfs_referral_count *p; + + list_for_each_entry(p, &nfs_referral_count_list, list) { + if (p->task == current) + return p; + } + return NULL; +} + +#define NFS_MAX_NESTED_REFERRALS 2 + +static int nfs_referral_loop_protect(void) +{ + struct nfs_referral_count *p, *new; + int ret = -ENOMEM; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out; + new->task = current; + new->referral_count = 1; + + ret = 0; + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + if (p != NULL) { + if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) + ret = -ELOOP; + else + p->referral_count++; + } else { + list_add(&new->list, &nfs_referral_count_list); + new = NULL; + } + spin_unlock(&nfs_referral_count_list_lock); + kfree(new); +out: + return ret; +} + +static void nfs_referral_loop_unprotect(void) +{ + struct nfs_referral_count *p; + + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + p->referral_count--; + if (p->referral_count == 0) + list_del(&p->list); + else + p = NULL; + spin_unlock(&nfs_referral_count_list_lock); + kfree(p); +} + +static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path) +{ + struct dentry *dentry; + int err; + + if (IS_ERR(root_mnt)) + return ERR_CAST(root_mnt); + + err = nfs_referral_loop_protect(); + if (err) { + mntput(root_mnt); + return ERR_PTR(err); + } + + dentry = mount_subtree(root_mnt, export_path); + nfs_referral_loop_unprotect(); + + return dentry; +} + +struct dentry *nfs4_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info) +{ + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + struct nfs_parsed_mount_data *data = mount_info->parsed; + + dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + + mount_info->fill_super = nfs4_fill_super; + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + + dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + +/* + * Clone an NFS4 server record on xdev traversal (FSID-change) + */ +static struct dentry * +nfs4_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; + return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); +} + +static struct dentry * +nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs4_fill_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + dprintk("--> nfs4_referral_get_sb()\n"); + + mount_info.mntfh = nfs_alloc_fhandle(); + if (mount_info.cloned == NULL || mount_info.mntfh == NULL) + goto out; + + /* create a new volume representation */ + server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); +out: + nfs_free_fhandle(mount_info.mntfh); + return mntroot; +} + +/* + * Create an NFS4 server record on referral traversal + */ +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + + dprintk("--> nfs4_referral_mount()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + dprintk("<-- nfs4_referral_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + + int __init init_nfs_v4(void) { int err; @@ -18,7 +339,13 @@ int __init init_nfs_v4(void) if (err) goto out1; + err = register_filesystem(&nfs4_fs_type); + if (err < 0) + goto out2; + return 0; +out2: + nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -27,6 +354,7 @@ out: void __exit exit_nfs_v4(void) { + unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ca3c0e8cf774..95866a8c21bb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -278,29 +278,8 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; -struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); - struct nfs_parsed_mount_data *parsed; - struct nfs_clone_mount *cloned; - struct nfs_fh *mntfh; -}; - -static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct dentry *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct dentry *); -static int nfs_show_devname(struct seq_file *, struct dentry *); -static int nfs_show_path(struct seq_file *, struct dentry *); -static int nfs_show_stats(struct seq_file *, struct dentry *); -static struct dentry *nfs_fs_mount_common(struct file_system_type *, - struct nfs_server *, int, const char *, struct nfs_mount_info *); -static struct dentry *nfs_fs_mount(struct file_system_type *, - int, const char *, void *); static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static void nfs_put_super(struct super_block *); -static void nfs_kill_super(struct super_block *); -static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -337,71 +316,6 @@ static const struct super_operations nfs_sops = { static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static const struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs4_write_inode, - .put_super = nfs_put_super, - .statfs = nfs_statfs, - .evict_inode = nfs4_evict_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_devname = nfs_show_devname, - .show_path = nfs_show_path, - .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, -}; #endif static struct shrinker acl_shrinker = { @@ -423,18 +337,9 @@ int __init register_nfs_fs(void) ret = nfs_register_sysctl(); if (ret < 0) goto error_1; -#ifdef CONFIG_NFS_V4 - ret = register_filesystem(&nfs4_fs_type); - if (ret < 0) - goto error_2; -#endif register_shrinker(&acl_shrinker); return 0; -#ifdef CONFIG_NFS_V4 -error_2: - nfs_unregister_sysctl(); -#endif error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -447,9 +352,6 @@ error_0: void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); -#ifdef CONFIG_NFS_V4 - unregister_filesystem(&nfs4_fs_type); -#endif nfs_unregister_sysctl(); unregister_filesystem(&nfs_fs_type); } @@ -473,7 +375,7 @@ void nfs_sb_deactive(struct super_block *sb) /* * Deliver file system statistics to userspace */ -static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) +int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct nfs_server *server = NFS_SB(dentry->d_sb); unsigned char blockbits; @@ -756,7 +658,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, /* * Describe the mount options on this VFS mountpoint */ -static int nfs_show_options(struct seq_file *m, struct dentry *root) +int nfs_show_options(struct seq_file *m, struct dentry *root) { struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -814,7 +716,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } #endif -static int nfs_show_devname(struct seq_file *m, struct dentry *root) +int nfs_show_devname(struct seq_file *m, struct dentry *root) { char *page = (char *) __get_free_page(GFP_KERNEL); char *devname, *dummy; @@ -830,7 +732,7 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root) return err; } -static int nfs_show_path(struct seq_file *m, struct dentry *dentry) +int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; @@ -839,7 +741,7 @@ static int nfs_show_path(struct seq_file *m, struct dentry *dentry) /* * Present statistical information for this VFS mountpoint */ -static int nfs_show_stats(struct seq_file *m, struct dentry *root) +int nfs_show_stats(struct seq_file *m, struct dentry *root) { int i, cpu; struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -932,7 +834,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) * Begin unmount by attempting to remove all automounted mountpoints we added * in response to xdev traversals and referrals */ -static void nfs_umount_begin(struct super_block *sb) +void nfs_umount_begin(struct super_block *sb) { struct nfs_server *server; struct rpc_clnt *rpc; @@ -2107,7 +2009,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, return 0; } -static int +int nfs_remount(struct super_block *sb, int *flags, char *raw_data) { int error; @@ -2172,7 +2074,7 @@ out: /* * Initialise the common bits of the superblock */ -static inline void nfs_initialise_sb(struct super_block *sb) +inline void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2194,8 +2096,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -static void nfs_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); @@ -2220,8 +2121,7 @@ static void nfs_fill_super(struct super_block *sb, /* * Finish setting up a cloned NFS2/3/4 superblock */ -static void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2381,14 +2281,14 @@ static int nfs_bdi_register(struct nfs_server *server) return bdi_register_dev(&server->backing_dev_info, server->s_dev); } -static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } -static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { /* clone any lsm security options from the parent to the new sb */ security_sb_clone_mnt_opts(mount_info->cloned->sb, s); @@ -2397,10 +2297,10 @@ static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return 0; } -static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, - int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, + struct nfs_server *server, + int flags, const char *dev_name, + struct nfs_mount_info *mount_info) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2470,7 +2370,7 @@ error_splat_bdi: goto out; } -static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, +struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { @@ -2511,7 +2411,7 @@ out: * Ensure that we unregister the bdi before kill_anon_super * releases the device name */ -static void nfs_put_super(struct super_block *s) +void nfs_put_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); @@ -2521,7 +2421,7 @@ static void nfs_put_super(struct super_block *s) /* * Destroy an NFS2/3 superblock */ -static void nfs_kill_super(struct super_block *s) +void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); @@ -2533,7 +2433,7 @@ static void nfs_kill_super(struct super_block *s) /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ -static struct dentry * +struct dentry * nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, const char *dev_name, struct nfs_mount_info *mount_info) { @@ -2580,23 +2480,6 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, #ifdef CONFIG_NFS_V4 -/* - * Set up an NFS4 superblock - */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); -} - static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| @@ -2694,238 +2577,4 @@ out_no_address: return -EINVAL; } -/* - * Get the superblock for the NFS4 root partition - */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - struct nfs_mount_info *mount_info = info; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - mount_info->fill_super = nfs4_fill_super; - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); - -out: - return mntroot; -} - -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; - - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) - return ERR_PTR(-ENOMEM); - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); - kfree(root_devname); - return root_mnt; -} - -struct nfs_referral_count { - struct list_head list; - const struct task_struct *task; - unsigned int referral_count; -}; - -static LIST_HEAD(nfs_referral_count_list); -static DEFINE_SPINLOCK(nfs_referral_count_list_lock); - -static struct nfs_referral_count *nfs_find_referral_count(void) -{ - struct nfs_referral_count *p; - - list_for_each_entry(p, &nfs_referral_count_list, list) { - if (p->task == current) - return p; - } - return NULL; -} - -#define NFS_MAX_NESTED_REFERRALS 2 - -static int nfs_referral_loop_protect(void) -{ - struct nfs_referral_count *p, *new; - int ret = -ENOMEM; - - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (!new) - goto out; - new->task = current; - new->referral_count = 1; - - ret = 0; - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - if (p != NULL) { - if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) - ret = -ELOOP; - else - p->referral_count++; - } else { - list_add(&new->list, &nfs_referral_count_list); - new = NULL; - } - spin_unlock(&nfs_referral_count_list_lock); - kfree(new); -out: - return ret; -} - -static void nfs_referral_loop_unprotect(void) -{ - struct nfs_referral_count *p; - - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - p->referral_count--; - if (p->referral_count == 0) - list_del(&p->list); - else - p = NULL; - spin_unlock(&nfs_referral_count_list_lock); - kfree(p); -} - -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) -{ - struct dentry *dentry; - int err; - - if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); - - err = nfs_referral_loop_protect(); - if (err) { - mntput(root_mnt); - return ERR_PTR(err); - } - - dentry = mount_subtree(root_mnt, export_path); - nfs_referral_loop_unprotect(); - - return dentry; -} - -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) -{ - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - struct nfs_parsed_mount_data *data = mount_info->parsed; - - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - - mount_info->fill_super = nfs4_fill_super; - - export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); - data->nfs_server.export_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - - dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} - -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); - - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; - - /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} - -/* - * Create an NFS4 server record on referral traversal - */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - - dprintk("--> nfs4_referral_mount()\n"); - - export_path = data->mnt_path; - data->mnt_path = "/"; - - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); - data->mnt_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} - #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From ec409897e7c71596cc080135ef5f86b81a0e9813 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:21 -0400 Subject: NFS: Split out NFS v4 client functions These functions are only needed by NFS v4, so they can be moved into a v4 specific file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 93 ----------------------------------------------------- fs/nfs/nfs4client.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 93 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0d50629d9e25..65afa382c5e3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -56,30 +56,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); -#ifdef CONFIG_NFS_V4 - -/* - * Get a unique NFSv4.0 callback identifier which will be used - * by the V4.0 callback service to lookup the nfs_client struct - */ -static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) -{ - int ret = 0; - struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - - if (clp->rpc_ops->version != 4 || minorversion != 0) - return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; - spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; -} -#endif /* CONFIG_NFS_V4 */ /* * RPC cruft for NFS @@ -175,75 +151,6 @@ error_0: } #ifdef CONFIG_NFS_V4 -#ifdef CONFIG_NFS_V4_1 -static void nfs4_shutdown_session(struct nfs_client *clp) -{ - if (nfs4_has_session(clp)) { - nfs4_destroy_session(clp->cl_session); - nfs4_destroy_clientid(clp); - } - -} -#else /* CONFIG_NFS_V4_1 */ -static void nfs4_shutdown_session(struct nfs_client *clp) -{ -} -#endif /* CONFIG_NFS_V4_1 */ - -struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) -{ - int err; - struct nfs_client *clp = nfs_alloc_client(cl_init); - if (IS_ERR(clp)) - return clp; - - err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); - if (err) - goto error; - - spin_lock_init(&clp->cl_lock); - INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; - clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; - return clp; - -error: - kfree(clp); - return ERR_PTR(err); -} - -/* - * Destroy the NFS4 callback service - */ -static void nfs4_destroy_callback(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version); -} - -static void nfs4_shutdown_client(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) - nfs4_kill_renewd(clp); - nfs4_shutdown_session(clp); - nfs4_destroy_callback(clp); - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) - nfs_idmap_delete(clp); - - rpc_destroy_wait_queue(&clp->cl_rpcwaitq); - kfree(clp->cl_serverowner); - kfree(clp->cl_serverscope); - kfree(clp->cl_implid); -} - -void nfs4_free_client(struct nfs_client *clp) -{ - nfs4_shutdown_client(clp); - nfs_free_client(clp); -} - /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index a71d95ecbea9..1c3f13c8e472 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -22,6 +22,97 @@ */ static bool nfs4_disable_idmapping = true; +/* + * Get a unique NFSv4.0 callback identifier which will be used + * by the V4.0 callback service to lookup the nfs_client struct + */ +static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) +{ + int ret = 0; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + + if (clp->rpc_ops->version != 4 || minorversion != 0) + return ret; +retry: + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) + return -ENOMEM; + spin_lock(&nn->nfs_client_lock); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nn->nfs_client_lock); + if (ret == -EAGAIN) + goto retry; + return ret; +} + +#ifdef CONFIG_NFS_V4_1 +static void nfs4_shutdown_session(struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + nfs4_destroy_clientid(clp); + } + +} +#else /* CONFIG_NFS_V4_1 */ +static void nfs4_shutdown_session(struct nfs_client *clp) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) +{ + int err; + struct nfs_client *clp = nfs_alloc_client(cl_init); + if (IS_ERR(clp)) + return clp; + + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error; + + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; + clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + return clp; + +error: + kfree(clp); + return ERR_PTR(err); +} + +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_mvops->minor_version); +} + +static void nfs4_shutdown_client(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + nfs4_shutdown_session(clp); + nfs4_destroy_callback(clp); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); + kfree(clp->cl_serverowner); + kfree(clp->cl_serverscope); + kfree(clp->cl_implid); +} + +void nfs4_free_client(struct nfs_client *clp) +{ + nfs4_shutdown_client(clp); + nfs_free_client(clp); +} + /* * Initialize the NFS4 callback service */ -- cgit v1.2.3 From 67a101f573b0cb1043c8c305112113450cb9fdbf Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jul 2012 11:37:07 -0700 Subject: pstore: Headers should include all stuff they use Headers should really include all the needed prototypes, types, defines etc. to be self-contained. This is a long-standing issue, but apparently the new tracing code unearthed it (SMP=n is also a prerequisite): In file included from fs/pstore/internal.h:4:0, from fs/pstore/ftrace.c:21: include/linux/pstore.h:43:15: error: field ‘read_mutex’ has incomplete type While at it, I also added the following: linux/types.h -> size_t, phys_addr_t, uXX and friends linux/spinlock.h -> spinlock_t linux/errno.h -> Exxxx linux/time.h -> struct timespec (struct passed by value) struct module and rs_control forward declaration (passed via pointers). Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/internal.h | 2 ++ include/linux/pstore.h | 6 ++++++ include/linux/pstore_ram.h | 1 + 3 files changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 958c48d8905c..0d0d3b7d5f12 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -1,6 +1,8 @@ #ifndef __PSTORE_INTERNAL_H__ #define __PSTORE_INTERNAL_H__ +#include +#include #include #if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 120443b0fda5..c892587d9b81 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -24,6 +24,10 @@ #include #include +#include +#include +#include +#include /* types */ enum pstore_type_id { @@ -34,6 +38,8 @@ enum pstore_type_id { PSTORE_TYPE_UNKNOWN = 255 }; +struct module; + struct pstore_info { struct module *owner; char *name; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index af848e1593b9..ba2b211aaa81 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -24,6 +24,7 @@ #include struct persistent_ram_buffer; +struct rs_control; struct persistent_ram_zone { phys_addr_t paddr; -- cgit v1.2.3 From bb6e071f845d32545e3e943058012f328a2e95ad Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 17 Jul 2012 15:18:30 -0400 Subject: NFS: exit_nfs_v4() shouldn't be an __exit function ... yet. Right now, init_nfs() is calling this function if an error is encountered when loading the nfs module. An __exit function can't be called from one declared as __init. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2af26913884f..59264fb335c8 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -352,7 +352,7 @@ out: return err; } -void __exit exit_nfs_v4(void) +void exit_nfs_v4(void) { unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); -- cgit v1.2.3 From cbe7cbf5a666ad9dfe2e0c276066131af73769ab Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jul 2012 12:11:12 -0700 Subject: pstore/ram: Make tracing log versioned Decoding the binary trace w/ a different kernel might be troublesome since we convert addresses to symbols. For kernels with minimal changes, the mappings would probably match, but it's not guaranteed at all. (But still we could convert the addresses by hand, since we do print raw addresses.) If we use modules, the symbols could be loaded at different addresses from the previously booted kernel, and so this would also fail, but there's nothing we can do about it. Also, the binary data format that pstore/ram is using in its ringbuffer may change between the kernels, so here we too must ensure that we're running the same kernel. So, there are two questions really: 1. How to compute the unique kernel tag; 2. Where to store it. In this patch we're using LINUX_VERSION_CODE, just as hibernation (suspend-to-disk) does. This way we are protecting from the kernel version mismatch, making sure that we're running the same kernel version and patch level. We could use CRC of a symbol table (as suggested by Tony Luck), but for now let's not be that strict. And as for storing, we are using a small trick here. Instead of allocating a dedicated buffer for the tag (i.e. another prz), or hacking ram_core routines to "reserve" some control data in the buffer, we are just encoding the tag into the buffer signature (and XOR'ing it with the actual signature value, so that buffers not needing a tag can just pass zero, which will result into the plain old PRZ signature). Suggested-by: Steven Rostedt Suggested-by: Tony Luck Suggested-by: Colin Cross Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 13 ++++++++----- fs/pstore/ram_core.c | 12 +++++++----- include/linux/pstore_ram.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1dd108e0cc60..0b311bc18916 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -309,7 +310,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, for (i = 0; i < cxt->max_dump_cnt; i++) { size_t sz = cxt->record_size; - cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc_size); + cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, cxt->ecc_size); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -327,7 +328,7 @@ fail_prz: static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, struct persistent_ram_zone **prz, - phys_addr_t *paddr, size_t sz) + phys_addr_t *paddr, size_t sz, u32 sig) { if (!sz) return 0; @@ -335,7 +336,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, if (*paddr + sz > *paddr + cxt->size) return -ENOMEM; - *prz = persistent_ram_new(*paddr, sz, cxt->ecc_size); + *prz = persistent_ram_new(*paddr, sz, sig, cxt->ecc_size); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -394,11 +395,13 @@ static int __devinit ramoops_probe(struct platform_device *pdev) if (err) goto fail_out; - err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr, cxt->console_size); + err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr, + cxt->console_size, 0); if (err) goto fail_init_cprz; - err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size); + err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size, + LINUX_VERSION_CODE); if (err) goto fail_init_fprz; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 4dabbb8e4270..eecd2a8a84dd 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -391,7 +391,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, - int ecc_size) + u32 sig, int ecc_size) { int ret; @@ -399,7 +399,9 @@ static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, if (ret) return ret; - if (prz->buffer->sig == PERSISTENT_RAM_SIG) { + sig ^= PERSISTENT_RAM_SIG; + + if (prz->buffer->sig == sig) { if (buffer_size(prz) > prz->buffer_size || buffer_start(prz) > buffer_size(prz)) pr_info("persistent_ram: found existing invalid buffer," @@ -417,7 +419,7 @@ static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, " (sig = 0x%08x)\n", prz->buffer->sig); } - prz->buffer->sig = PERSISTENT_RAM_SIG; + prz->buffer->sig = sig; persistent_ram_zap(prz); return 0; @@ -442,7 +444,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) } struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, + size_t size, u32 sig, int ecc_size) { struct persistent_ram_zone *prz; @@ -458,7 +460,7 @@ struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, if (ret) goto err; - ret = persistent_ram_post_init(prz, ecc_size); + ret = persistent_ram_post_init(prz, sig, ecc_size); if (ret) goto err; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index ba2b211aaa81..098d2a838296 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -47,7 +47,7 @@ struct persistent_ram_zone { }; struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, + size_t size, u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From 72d0d248ca8232dbd30d35b42d0d86e39b3e322b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 16 Jul 2012 15:23:48 -0400 Subject: fuse: add FUSE_AUTO_INVAL_DATA init flag FUSE_AUTO_INVAL_DATA is provided to enable updated/auto cache invalidation logic. Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 +++- include/linux/fuse.h | 7 ++++++- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 771fb6322c07..e24dd74e3068 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -484,6 +484,9 @@ struct fuse_conn { /** Is fallocate not implemented by fs? */ unsigned no_fallocate:1; + /** Use enhanced/automatic page cache invalidation. */ + unsigned auto_inval_data:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1cd61652018c..dd37ee291b8b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -834,6 +834,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->big_writes = 1; if (arg->flags & FUSE_DONT_MASK) fc->dont_mask = 1; + if (arg->flags & FUSE_AUTO_INVAL_DATA) + fc->auto_inval_data = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -859,7 +861,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | - FUSE_FLOCK_LOCKS; + FUSE_FLOCK_LOCKS | FUSE_AUTO_INVAL_DATA; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 9303348965fb..e4a9d2af9aaa 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -57,6 +57,9 @@ * * 7.19 * - add FUSE_FALLOCATE + * + * 7.20 + * - add FUSE_AUTO_INVAL_DATA */ #ifndef _LINUX_FUSE_H @@ -88,7 +91,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 19 +#define FUSE_KERNEL_MINOR_VERSION 20 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -167,6 +170,7 @@ struct fuse_file_lock { * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks + * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -176,6 +180,7 @@ struct fuse_file_lock { #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) #define FUSE_FLOCK_LOCKS (1 << 10) +#define FUSE_AUTO_INVAL_DATA (1 << 12) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From eed2179efe1aac145bf6d54b925b750976380fa6 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 16 Jul 2012 15:23:49 -0400 Subject: fuse: invalidate inode mapping if mtime changes We currently invalidate the inode address space mapping if the file size changes unexpectedly. In the case of a fuse network filesystem, a portion of a file could be overwritten remotely without changing the file size. Compare the old mtime as well to detect this condition and invalidate the mapping if the file has been updated. The original logic (to ignore changes in mtime) is preserved unless the client specifies FUSE_AUTO_INVAL_DATA on init. Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index dd37ee291b8b..dd4401650b47 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; + struct timespec old_mtime; spin_lock(&fc->lock); if (attr_version != 0 && fi->attr_version > attr_version) { @@ -204,15 +205,35 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, return; } + old_mtime = inode->i_mtime; fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); - if (S_ISREG(inode->i_mode) && oldsize != attr->size) { - truncate_pagecache(inode, oldsize, attr->size); - invalidate_inode_pages2(inode->i_mapping); + if (S_ISREG(inode->i_mode)) { + bool inval = false; + + if (oldsize != attr->size) { + truncate_pagecache(inode, oldsize, attr->size); + inval = true; + } else if (fc->auto_inval_data) { + struct timespec new_mtime = { + .tv_sec = attr->mtime, + .tv_nsec = attr->mtimensec, + }; + + /* + * Auto inval mode also checks and invalidates if mtime + * has changed. + */ + if (!timespec_equal(&old_mtime, &new_mtime)) + inval = true; + } + + if (inval) + invalidate_inode_pages2(inode->i_mapping); } } -- cgit v1.2.3 From a8894274a3581125fb311eabfc97cd0123740c5e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 16 Jul 2012 15:23:50 -0400 Subject: fuse: update attributes on aio_read A fuse-based network filesystem might allow for the inode and/or file data to change unexpectedly. A local client that opens and repeatedly reads a file might never pick up on such changes and indefinitely return stale data. Always invoke fuse_update_attributes() in the read path to cause an attr revalidation when the attributes expire. This leads to a page cache invalidation if necessary and ensures fuse issues new read requests to the fuse client. The original logic (reval only on reads beyond EOF) is preserved unless the client specifies FUSE_AUTO_INVAL_DATA on init. Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b321a688cde7..5800101e5ce1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -703,13 +703,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); - if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) { + /* + * In auto invalidate mode, always update attributes on read. + * Otherwise, only update if we attempt to read past EOF (to ensure + * i_size is up to date). + */ + if (fc->auto_inval_data || + (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { int err; - /* - * If trying to read past EOF, make sure the i_size - * attribute is up-to-date. - */ err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); if (err) return err; -- cgit v1.2.3 From 69fe05c90ed58aac956dccb9e6d3a325fb3b8767 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2012 16:09:40 +0200 Subject: fuse: add missing INIT flags Add missing flags that userspace derived from the protocol version number. This makes the protocol more flexible. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 3 ++- include/linux/fuse.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index dd4401650b47..ce0a2838ccd0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -882,7 +882,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | - FUSE_FLOCK_LOCKS | FUSE_AUTO_INVAL_DATA; + FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index e4a9d2af9aaa..6455c5b64c2e 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -169,7 +169,11 @@ struct fuse_file_lock { * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_SPLICE_WRITE: kernel supports splice write on the device + * FUSE_SPLICE_MOVE: kernel supports splice move on the device + * FUSE_SPLICE_READ: kernel supports splice read on the device * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks + * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages */ #define FUSE_ASYNC_READ (1 << 0) @@ -179,7 +183,11 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) +#define FUSE_SPLICE_WRITE (1 << 7) +#define FUSE_SPLICE_MOVE (1 << 8) +#define FUSE_SPLICE_READ (1 << 9) #define FUSE_FLOCK_LOCKS (1 << 10) +#define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) /** -- cgit v1.2.3 From 294f2ad5a545eb71d397623743ddd8201131bdad Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Wed, 18 Jul 2012 11:56:59 -0400 Subject: GFS2: kernel panic with small gfs2 filesystems - 1 RG In the unlikely setup where there's only one resource group in the gfs2 filesystem, gfs2_rgrpd_get_next() returns a NULL rgd that is not dealt with properly, causing a kernel NULL ptr dereference. This patch fixes this issue. Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e53d0a1c234f..fb7079263ea7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1276,6 +1276,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); + rgd = rgd ? : begin; /* if NULL, wrap */ if (rgd != begin) /* If we didn't wrap */ break; -- cgit v1.2.3 From 8e2e00473598dd5379d8408cb974dade000acafc Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 19 Jul 2012 08:12:40 -0400 Subject: GFS2: Reduce file fragmentation This patch reduces GFS2 file fragmentation by pre-reserving blocks. The resulting improved on disk layout greatly speeds up operations in cases which would have resulted in interlaced allocation of blocks previously. A typical example of this is 10 parallel dd processes, each writing to a file in a common dirctory. The implementation uses an rbtree of reservations attached to each resource group (and each inode). Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 3 + fs/gfs2/file.c | 24 +-- fs/gfs2/incore.h | 49 +++-- fs/gfs2/inode.c | 37 +++- fs/gfs2/rgrp.c | 578 ++++++++++++++++++++++++++++++++++++++++++++++----- fs/gfs2/rgrp.h | 31 ++- fs/gfs2/super.c | 7 + fs/gfs2/trace_gfs2.h | 59 +++++- fs/gfs2/xattr.c | 12 ++ 9 files changed, 708 insertions(+), 92 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6d957a86482b..49cd7dd4a9fa 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (error) goto out_rlist; + if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ + gfs2_rs_deltree(ip->i_res); + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, revokes); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6fbf3cbd974d..9f94832cefec 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) return ret; + atomic_set(&ip->i_res->rs_sizehint, + PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -571,22 +574,15 @@ fail: static int gfs2_release(struct inode *inode, struct file *file) { - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_file *fp; struct gfs2_inode *ip = GFS2_I(inode); - fp = file->private_data; + kfree(file->private_data); file->private_data = NULL; - if ((file->f_mode & FMODE_WRITE) && ip->i_res && + if ((file->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) gfs2_rs_delete(ip); - if (gfs2_assert_warn(sdp, fp)) - return -EIO; - - kfree(fp); - return 0; } @@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + size_t writesize = iov_length(iov, nr_segs); struct dentry *dentry = file->f_dentry; struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct gfs2_sbd *sdp; int ret; + sdp = GFS2_SB(file->f_mapping->host); ret = gfs2_rs_alloc(ip); if (ret) return ret; + atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (unlikely(error)) goto out_uninit; + atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize); + while (len > 0) { if (len < bytes) bytes = len; @@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, offset += bytes; continue; } - error = gfs2_rindex_update(sdp); - if (error) - goto out_unlock; - error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index dc730700b3b4..aaecc8085fc5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -84,6 +84,7 @@ struct gfs2_rgrpd { u32 rd_data; /* num of data blocks in rgrp */ u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; + u32 rd_reserved; /* number of blocks reserved */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; @@ -96,6 +97,9 @@ struct gfs2_rgrpd { #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ + spinlock_t rd_rsspin; /* protects reservation related vars */ + struct rb_root rd_rstree; /* multi-block reservation tree */ + u32 rd_rs_cnt; /* count of current reservations */ }; enum gfs2_state_bits { @@ -233,6 +237,38 @@ struct gfs2_holder { unsigned long gh_ip; }; +/* Resource group multi-block reservation, in order of appearance: + + Step 1. Function prepares to write, allocates a mb, sets the size hint. + Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info + Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use + Step 4. Bits are assigned from the rgrp based on either the reservation + or wherever it can. +*/ + +struct gfs2_blkreserv { + /* components used during write (step 1): */ + atomic_t rs_sizehint; /* hint of the write size */ + + /* components used during inplace_reserve (step 2): */ + u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ + + /* components used during get_local_rgrp (step 3): */ + struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ + struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ + struct rb_node rs_node; /* link to other block reservations */ + + /* components used during block searches and assignments (step 4): */ + struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ + u32 rs_biblk; /* start block relative to the bi */ + u32 rs_free; /* how many blocks are still free */ + + /* ancillary quota stuff */ + struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; + struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; + unsigned int rs_qa_qd_num; +}; + enum { GLF_LOCK = 1, GLF_DEMOTE = 3, @@ -290,16 +326,6 @@ struct gfs2_glock { #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ -struct gfs2_blkreserv { - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */ - - /* ancillary quota stuff */ - struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; - struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; - unsigned int rs_qa_qd_num; -}; - enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, @@ -307,7 +333,6 @@ enum { GIF_SW_PAGED = 3, }; - struct gfs2_inode { struct inode i_inode; u64 i_no_addr; @@ -318,7 +343,7 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ - struct gfs2_blkreserv *i_res; /* resource group block reservation */ + struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2b035e0959b2..c53c67e30bd4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, int error; munge_mode_uid_gid(dip, &mode, &uid, &gid); + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_lock(dip, uid, gid); if (error) @@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct buffer_head *dibh; int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) goto fail; @@ -596,7 +603,8 @@ fail_end_trans: gfs2_trans_end(sdp); fail_ipreserv: - gfs2_inplace_release(dip); + if (alloc_required) + gfs2_inplace_release(dip); fail_quota_locks: gfs2_quota_unlock(dip); @@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, const struct qstr *name = &dentry->d_name; struct gfs2_holder ghs[2]; struct inode *inode = NULL; - struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; @@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + /* We need a reservation to allocate the new dinode block. The + directory ip temporarily points to the reservation, but this is + being done to get a set of contiguous blocks for the new dinode. + Since this is a create, we don't have a sizehint yet, so it will + have to use the minimum reservation size. */ error = gfs2_rs_alloc(dip); if (error) return error; @@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto fail_gunlock2; - error = gfs2_inode_refresh(GFS2_I(inode)); + ip = GFS2_I(inode); + error = gfs2_inode_refresh(ip); if (error) goto fail_gunlock2; - /* the new inode needs a reservation so it can allocate xattrs. */ - error = gfs2_rs_alloc(GFS2_I(inode)); - if (error) - goto fail_gunlock2; + /* The newly created inode needs a reservation so it can allocate + xattrs. At the same time, we want new blocks allocated to the new + dinode to be as contiguous as possible. Since we allocated the + dinode block under the directory's reservation, we transfer + ownership of that reservation to the new inode. The directory + doesn't need a reservation unless it needs a new allocation. */ + ip->i_res = dip->i_res; + dip->i_res = NULL; error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; - error = gfs2_security_init(dip, GFS2_I(inode), name); + error = gfs2_security_init(dip, ip, name); if (error) goto fail_gunlock2; - error = link_dinode(dip, name, GFS2_I(inode)); + error = link_dinode(dip, name, ip); if (error) goto fail_gunlock2; @@ -738,6 +756,7 @@ fail_gunlock: iput(inode); } fail: + gfs2_rs_delete(dip); if (bh) brelse(bh); return error; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fb7079263ea7..4d34887a601d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,6 +35,9 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +#define RSRV_CONTENTION_FACTOR 4 +#define RGRP_RSRV_MAX_CONTENDERS 2 + #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) return tmp; } +/** + * rs_cmp - multi-block reservation range compare + * @blk: absolute file system block number of the new reservation + * @len: number of blocks in the new reservation + * @rs: existing reservation to compare against + * + * returns: 1 if the block range is beyond the reach of the reservation + * -1 if the block range is before the start of the reservation + * 0 if the block range overlaps with the reservation + */ +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +{ + u64 startblk = gfs2_rs_startblk(rs); + + if (blk >= startblk + rs->rs_free) + return 1; + if (blk + len - 1 < startblk) + return -1; + return 0; +} + +/** + * rs_find - Find a rgrp multi-block reservation that contains a given block + * @rgd: The rgrp + * @rgblk: The block we're looking for, relative to the rgrp + */ +static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) +{ + struct rb_node **newn; + int rc; + u64 fsblk = rgblk + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(fsblk, 1, cur); + if (rc < 0) + newn = &((*newn)->rb_left); + else if (rc > 0) + newn = &((*newn)->rb_right); + else { + spin_unlock(&rgd->rd_rsspin); + return cur; + } + } + spin_unlock(&rgd->rd_rsspin); + return NULL; +} + /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) int gfs2_rs_alloc(struct gfs2_inode *ip) { int error = 0; + struct gfs2_blkreserv *res; + + if (ip->i_res) + return 0; + + res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!res) + error = -ENOMEM; down_write(&ip->i_rw_mutex); - if (!ip->i_res) { - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - error = -ENOMEM; - } + if (ip->i_res) + kmem_cache_free(gfs2_rsrv_cachep, res); + else + ip->i_res = res; up_write(&ip->i_rw_mutex); return error; } +static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +{ + gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", + rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, + rs->rs_free); +} + /** - * gfs2_rs_delete - delete a reservation + * __rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +static void __rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + /* We can't do this: The reason is that when the rgrp is invalidated, + it's in the "middle" of acquiring the glock, but the HOLDER bit + isn't set yet: + BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ + trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); + + if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) + rb_erase(&rs->rs_node, &rgd->rd_rstree); + BUG_ON(!rgd->rd_rs_cnt); + rgd->rd_rs_cnt--; + + if (rs->rs_free) { + /* return reserved blocks to the rgrp and the ip */ + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); + rs->rs_rgd->rd_reserved -= rs->rs_free; + rs->rs_free = 0; + clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + smp_mb__after_clear_bit(); + } + /* We can't change any of the step 1 or step 2 components of the rs. + E.g. We can't set rs_rgd to NULL because the rgd glock is held and + dequeued through this pointer. + Can't: atomic_set(&rs->rs_sizehint, 0); + Can't: rs->rs_requested = 0; + Can't: rs->rs_rgd = NULL;*/ + rs->rs_bi = NULL; + rs->rs_biblk = 0; +} + +/** + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + __rs_deltree(rs); + spin_unlock(&rgd->rd_rsspin); +} + +/** + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation * */ @@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { + gfs2_rs_deltree(ip->i_res); + trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; } up_write(&ip->i_rw_mutex); } +/** + * return_all_reservations - return all reserved blocks back to the rgrp. + * @rgd: the rgrp that needs its space back + * + * We previously reserved a bunch of blocks for allocation. Now we need to + * give them back. This leave the reservation structures in tact, but removes + * all of their corresponding "no-fly zones". + */ +static void return_all_reservations(struct gfs2_rgrpd *rgd) +{ + struct rb_node *n; + struct gfs2_blkreserv *rs; + + spin_lock(&rgd->rd_rsspin); + while ((n = rb_first(&rgd->rd_rstree))) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + __rs_deltree(rs); + } + spin_unlock(&rgd->rd_rsspin); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } @@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data0 = be64_to_cpu(buf.ri_data0); rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); + spin_lock_init(&rgd->rd_rsspin); error = compute_bitstructs(rgd); if (error) @@ -1114,30 +1268,213 @@ out: return ret; } +/** + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree + * @bi: the bitmap with the blocks + * @ip: the inode structure + * @biblk: the 32-bit block number relative to the start of the bitmap + * @amount: the number of blocks to reserve + * + * Returns: NULL - reservation was already taken, so not inserted + * pointer to the inserted reservation + */ +static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, + struct gfs2_inode *ip, u32 biblk, + int amount) +{ + struct rb_node **newn, *parent = NULL; + int rc; + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + BUG_ON(!ip->i_res); + BUG_ON(gfs2_rs_active(rs)); + /* Figure out where to put new node */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + + parent = *newn; + rc = rs_cmp(fsblock, amount, cur); + if (rc > 0) + newn = &((*newn)->rb_right); + else if (rc < 0) + newn = &((*newn)->rb_left); + else { + spin_unlock(&rgd->rd_rsspin); + return NULL; /* reservation already in use */ + } + } + + /* Do our reservation work */ + rs = ip->i_res; + rs->rs_free = amount; + rs->rs_biblk = biblk; + rs->rs_bi = bi; + rb_link_node(&rs->rs_node, parent, newn); + rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + + /* Do our inode accounting for the reservation */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + + /* Do our rgrp accounting for the reservation */ + rgd->rd_reserved += amount; /* blocks reserved */ + rgd->rd_rs_cnt++; /* number of in-tree reservations */ + spin_unlock(&rgd->rd_rsspin); + trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + return rs; +} + +/** + * unclaimed_blocks - return number of blocks that aren't spoken for + */ +static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) +{ + return rgd->rd_free_clone - rgd->rd_reserved; +} + +/** + * rg_mblk_search - find a group of multiple free blocks + * @rgd: the resource group descriptor + * @rs: the block reservation + * @ip: pointer to the inode for which we're reserving blocks + * + * This is very similar to rgblk_search, except we're looking for whole + * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing + * on aligned dwords for speed's sake. + * + * Returns: 0 if successful or BFITNOENT if there isn't enough free space + */ + +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +{ + struct gfs2_bitmap *bi = rgd->rd_bits; + const u32 length = rgd->rd_length; + u32 blk; + unsigned int buf, x, search_bytes; + u8 *buffer = NULL; + u8 *ptr, *end, *nonzero; + u32 goal, rsv_bytes; + struct gfs2_blkreserv *rs; + u32 best_rs_bytes, unclaimed; + int best_rs_blocks; + + /* Find bitmap block that contains bits for goal block */ + if (rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + /* Convert scope of "goal" from rgrp-wide to within + found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } + } + buf = 0; + goal = 0; + +do_search: + best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), + (RGRP_RSRV_MINBLKS * rgd->rd_length)); + best_rs_bytes = (best_rs_blocks * + (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / + GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ + best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); + unclaimed = unclaimed_blocks(rgd); + if (best_rs_bytes * GFS2_NBBY > unclaimed) + best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; + + for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags)) + goto skip; + + WARN_ON(!buffer_uptodate(bi->bi_bh)); + if (bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; + else + buffer = bi->bi_bh->b_data + bi->bi_offset; + + /* We have to keep the reservations aligned on u64 boundaries + otherwise we could get situations where a byte can't be + used because it's after a reservation, but a free bit still + is within the reservation's area. */ + ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); + end = (buffer + bi->bi_len); + while (ptr < end) { + rsv_bytes = 0; + if ((ptr + best_rs_bytes) <= end) + search_bytes = best_rs_bytes; + else + search_bytes = end - ptr; + BUG_ON(!search_bytes); + nonzero = memchr_inv(ptr, 0, search_bytes); + /* If the lot is all zeroes, reserve the whole size. If + there's enough zeroes to satisfy the request, use + what we can. If there's not enough, keep looking. */ + if (nonzero == NULL) + rsv_bytes = search_bytes; + else if ((nonzero - ptr) * GFS2_NBBY >= + ip->i_res->rs_requested) + rsv_bytes = (nonzero - ptr); + + if (rsv_bytes) { + blk = ((ptr - buffer) * GFS2_NBBY); + BUG_ON(blk >= bi->bi_len * GFS2_NBBY); + rs = rs_insert(bi, ip, blk, + rsv_bytes * GFS2_NBBY); + if (IS_ERR(rs)) + return PTR_ERR(rs); + if (rs) + return 0; + } + ptr += ALIGN(search_bytes, sizeof(u64)); + } +skip: + /* Try next bitmap block (wrap back to rgrp header + if at end) */ + buf++; + buf %= length; + goal = 0; + } + + return BFITNOENT; +} + /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data * @ip: the inode * * If there's room for the requested blocks to be allocated from the RG: + * This will try to get a multi-block reservation first, and if that doesn't + * fit, it will take what it can. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - const struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = ip->i_res; if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; - if (rgd->rd_free_clone >= rs->rs_requested) + /* Look for a multi-block reservation. */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && + rg_mblk_search(rgd, ip) != BFITNOENT) + return 1; + if (unclaimed_blocks(rgd) >= rs->rs_requested) return 1; - return 0; -} -static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; + return 0; } /** @@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd, *begin = NULL; + struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; int error = 0, rg_locked, flags = LM_FLAG_TRY; u64 last_unlinked = NO_BLOCK; @@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; } - - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) - rgd = begin = ip->i_rgd; - else - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); - - if (rgd == NULL) + if (gfs2_rs_active(rs)) { + begin = rs->rs_rgd; + flags = 0; /* Yoda: Do or do not. There is no try */ + } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { + rs->rs_rgd = begin = ip->i_rgd; + } else { + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + } + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { rg_locked = 1; error = 0; + } else if (!loops && !gfs2_rs_active(rs) && + rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + /* If the rgrp already is maxed out for contenders, + we can eliminate it as a "first pass" without even + requesting the rgrp glock. */ + error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - flags, &rs->rs_rgd_gh); + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &rs->rs_rgd_gh); if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rgd); + error = update_rgrp_lvb(rs->rs_rgd); if (error) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; @@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) } switch (error) { case 0: - if (try_rgrp_fit(rgd, ip)) { + if (gfs2_rs_active(rs)) { + if (unclaimed_blocks(rs->rs_rgd) + + rs->rs_free >= rs->rs_requested) { + ip->i_rgd = rs->rs_rgd; + return 0; + } + /* We have a multi-block reservation, but the + rgrp doesn't have enough free blocks to + satisfy the request. Free the reservation + and look for a suitable rgrp. */ + gfs2_rs_deltree(rs); + } + if (try_rgrp_fit(rs->rs_rgd, ip)) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rgd); - ip->i_rgd = rgd; + gfs2_rgrp_bh_get(rs->rs_rgd); + ip->i_rgd = rs->rs_rgd; return 0; } - if (rgd->rd_flags & GFS2_RDF_CHECK) { + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rgd); - try_rgrp_unlink(rgd, &last_unlinked, + gfs2_rgrp_bh_get(rs->rs_rgd); + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rgd = gfs2_rgrpd_get_next(rgd); - rgd = rgd ? : begin; /* if NULL, wrap */ - if (rgd != begin) /* If we didn't wrap */ + rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); + rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rgd != begin) /* If we didn't wrap */ break; flags &= ~LM_FLAG_TRY; @@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; + if (!rs) + return; + + if (!rs->rs_free) + gfs2_rs_deltree(rs); + if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); rs->rs_requested = 0; @@ -1413,7 +1776,27 @@ do_search: if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + while (1) { + struct gfs2_blkreserv *rs; + u32 rgblk; + + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + if (biblk == BFITNOENT) + break; + /* Check if this block is reserved() */ + rgblk = gfs2_bi2rgd_blk(bi, biblk); + rs = rs_find(rgd, rgblk); + if (rs == NULL) + break; + + BUG_ON(rs->rs_bi != bi); + biblk = BFITNOENT; + /* This should jump to the first block after the + reservation. */ + goal = rs->rs_biblk + rs->rs_free; + if (goal >= bi->bi_len * GFS2_NBBY) + break; + } if (biblk != BFITNOENT) break; @@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, u32 blk, bool dinode, unsigned int *n) { const unsigned int elen = *n; - u32 goal; + u32 goal, rgblk; const u8 *buffer = NULL; + struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; + rgblk = gfs2_bi2rgd_blk(bi, goal); + rs = rs_find(rgd, rgblk); + if (rs) /* Oops, we bumped into someone's reservation */ + break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { - const struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_blkreserv *trs; + const struct rb_node *n; + if (rgd == NULL) return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, + rgd->rd_reserved); + spin_lock(&rgd->rd_rsspin); + for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { + trs = rb_entry(n, struct gfs2_blkreserv, rs_node); + dump_rs(seq, trs); + } + spin_unlock(&rgd->rd_rsspin); return 0; } @@ -1557,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) rgd->rd_flags |= GFS2_RDF_ERROR; } +/** + * claim_reserved_blks - Claim previously reserved blocks + * @ip: the inode that's claiming the reservation + * @dinode: 1 if this block is a dinode block, otherwise data block + * @nblocks: desired extent length + * + * Lay claim to previously allocated block reservation blocks. + * Returns: Starting block number of the blocks claimed. + * Sets *nblocks to the actual extent length allocated. + */ +static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, + unsigned int *nblocks) +{ + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_bitmap *bi; + u64 start_block = gfs2_rs_startblk(rs); + const unsigned int elen = *nblocks; + + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + gfs2_assert_withdraw(sdp, rgd); + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + bi = rs->rs_bi; + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + + for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { + /* Make sure the bitmap hasn't changed */ + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + rs->rs_biblk++; + rs->rs_free--; + + BUG_ON(!rgd->rd_reserved); + rgd->rd_reserved--; + dinode = false; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + } + + if (!rs->rs_free) { + struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; + + gfs2_rs_deltree(rs); + /* -nblocks because we haven't returned to do the math yet. + I'm doing the math backwards to prevent negative numbers, + but think of it as: + if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) + rg_mblk_search(rgd, ip); + } + return start_block; +} + /** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number - * @ndata: requested number of blocks/extent length (value/result) + * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 * @generation: the generation number of the inode * @@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, if (ip->i_res->rs_requested == 0) return -ECANCELED; - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + /* Check if we have a multi-block reservation, and if so, claim the + next free block from it. */ + if (gfs2_rs_active(ip->i_res)) { + BUG_ON(!ip->i_res->rs_free); + rgd = ip->i_res->rs_rgd; + block = claim_reserved_blks(ip, dinode, nblocks); + } else { + rgd = ip->i_rgd; - /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) - goto rgrp_error; + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + + /* Since all blocks are reserved in advance, this shouldn't + happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", + *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; + } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + } ndata = *nblocks; if (dinode) ndata--; @@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rgd->rd_free < *nblocks) + if (rgd->rd_free < *nblocks) { + printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; + } rgd->rd_free -= *nblocks; if (dinode) { @@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_uninit(&rlist->rl_ghs[x]); kfree(rlist->rl_ghs); + rlist->rl_ghs = NULL; } } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 5d8314dbc899..ca6e26729b86 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -13,6 +13,14 @@ #include #include +/* Since each block in the file system is represented by two bits in the + * bitmap, one 64-bit word in the bitmap will represent 32 blocks. + * By reserving 32 blocks at a time, we can optimize / shortcut how we search + * through the bitmaps by looking a word at a time. + */ +#define RGRP_RSRV_MINBYTES 8 +#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) + struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; @@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); + extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); extern void gfs2_inplace_release(struct gfs2_inode *ip); @@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); @@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a reservation is "inplace" reserved: */ +/* This is how to tell if a multi-block reservation is "inplace" reserved: */ static inline int gfs2_mb_reserved(struct gfs2_inode *ip) { if (ip->i_res && ip->i_res->rs_requested) @@ -70,4 +81,22 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip) return 0; } +/* This is how to tell if a multi-block reservation is in the rgrp tree: */ +static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) +{ + if (rs && rs->rs_bi) + return 1; + return 0; +} + +static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) +{ + return (bi->bi_start * GFS2_NBBY) + blk; +} + +static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) +{ + return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 788068758f3a..b1502c4bc60d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) return -EIO; } + error = gfs2_rindex_update(sdp); + if (error) + return error; + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) return error; @@ -1550,6 +1554,9 @@ out_truncate: out_unlock: /* Error path for case 1 */ + if (gfs2_rs_active(ip->i_res)) + gfs2_rs_deltree(ip->i_res); + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 1b8b81588199..a25c252fe412 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -14,6 +14,7 @@ #include #include "incore.h" #include "glock.h" +#include "rgrp.h" #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } #define glock_trace_name(x) __print_symbolic(x, \ @@ -31,6 +32,17 @@ { GFS2_BLKST_DINODE, "dinode" }, \ { GFS2_BLKST_UNLINKED, "unlinked" }) +#define TRACE_RS_DELETE 0 +#define TRACE_RS_TREEDEL 1 +#define TRACE_RS_INSERT 2 +#define TRACE_RS_CLAIM 3 + +#define rs_func_name(x) __print_symbolic(x, \ + { 0, "del " }, \ + { 1, "tdel" }, \ + { 2, "ins " }, \ + { 3, "clm " }) + #define show_glock_flags(flags) __print_flags(flags, "", \ {(1UL << GLF_LOCK), "l" }, \ {(1UL << GLF_DEMOTE), "D" }, \ @@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u8, block_state ) __field( u64, rd_addr ) __field( u32, rd_free_clone ) + __field( u32, rd_reserved ) ), TP_fast_assign( @@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc, __entry->block_state = block_state; __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; + __entry->rd_reserved = rgd->rd_reserved; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone) + __entry->rd_free_clone, (unsigned long)__entry->rd_reserved) +); + +/* Keep track of multi-block reservations as they are allocated/freed */ +TRACE_EVENT(gfs2_rs, + + TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, + u8 func), + + TP_ARGS(ip, rs, func), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, rd_addr ) + __field( u32, rd_free_clone ) + __field( u32, rd_reserved ) + __field( u64, inum ) + __field( u64, start ) + __field( u32, free ) + __field( u8, func ) + ), + + TP_fast_assign( + __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; + __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; + __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; + __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; + __entry->inum = ip ? ip->i_no_addr : 0; + __entry->start = gfs2_rs_startblk(rs); + __entry->free = rs->rs_free; + __entry->func = func; + ), + + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " + "f:%lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->start, + (unsigned long long)__entry->rd_addr, + (unsigned long)__entry->rd_free_clone, + (unsigned long)__entry->rd_reserved, + rs_func_name(__entry->func), (unsigned long)__entry->free) ); #endif /* _TRACE_GFS2_H */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 523c0de0d805..27a0b4a901f5 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, { int error; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) goto out_alloc; @@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, struct buffer_head *dibh; int error; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; + error = gfs2_quota_lock_check(ip); if (error) return error; @@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) { int error; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) return error; -- cgit v1.2.3 From 782759b9f5f5223e0962af60c3457c912fab755f Mon Sep 17 00:00:00 2001 From: Alexandre Pereira da Silva Date: Mon, 25 Jun 2012 17:47:49 -0300 Subject: UBIFS: fix compilation warning Fix the following compilation warning: fs/ubifs/dir.c: In function 'ubifs_rename': fs/ubifs/dir.c:972:15: warning: 'saved_nlink' may be used uninitialized in this function Use the 'uninitialized_var()' macro to get rid of this false-positive. Artem: massaged the patch a bit. Signed-off-by: Alexandre Pereira da Silva Signed-off-by: Artem Bityutskiy --- fs/ubifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index a6d42efc76d2..390198d91e6d 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -969,7 +969,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - unsigned int saved_nlink; + unsigned int uninitialized_var(saved_nlink); /* * Budget request settings: deletion direntry, new direntry, removing -- cgit v1.2.3 From 06bef9451a4c5ad882cd15fd7a0df9890c0249f3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 14 Jul 2012 14:19:46 +0300 Subject: UBIFS: add debugfs knob to switch to R/O mode This patch adds another debugfs knob which switches UBIFS to R/O mode. I needed it while trying to reproduce the 'first log node is not CS node' bug. Without this debugfs knob you have to perform a power cut to repruduce the bug. The knob is named 'ro_error' and all it does is it sets the 'ro_error' UBIFS flag which makes UBIFS disallow any further writes - even write-back will fail with -EROFS. Useful for debugging. Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 11 +++++++++++ fs/ubifs/debug.h | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 92df3b081539..bb3167257aab 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2802,6 +2802,8 @@ static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, val = d->chk_fs; else if (dent == d->dfs_tst_rcvry) val = d->tst_rcvry; + else if (dent == d->dfs_ro_error) + val = c->ro_error; else return -EINVAL; @@ -2885,6 +2887,8 @@ static ssize_t dfs_file_write(struct file *file, const char __user *u, d->chk_fs = val; else if (dent == d->dfs_tst_rcvry) d->tst_rcvry = val; + else if (dent == d->dfs_ro_error) + c->ro_error = !!val; else return -EINVAL; @@ -2996,6 +3000,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) goto out_remove; d->dfs_tst_rcvry = dent; + fname = "ro_error"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_ro_error = dent; + return 0; out_remove: diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 486a8e024fb6..8b8cc4e945f4 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -79,6 +79,10 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing + * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to + * re-mounting to R/O mode because it does not flush any buffers + * and UBIFS just starts returning -EROFS on all write + * operations) */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; @@ -122,6 +126,7 @@ struct ubifs_debug_info { struct dentry *dfs_chk_lprops; struct dentry *dfs_chk_fs; struct dentry *dfs_tst_rcvry; + struct dentry *dfs_ro_error; }; /** -- cgit v1.2.3 From d51f17ea0a3afe11fb4c4ad6635877e24df2758f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 14 Jul 2012 20:52:58 +0300 Subject: UBIFS: simplify reply code a bit In the log reply code we assume that 'c->lhead_offs' is known and may be non-zero, which is not the case because we do not store it in the master node and have to find out by scanning on every mount. Knowing this fact allows us to simplify the log scanning loop a bit and remove a couple of unneeded local variables. Signed-off-by: Artem Bityutskiy --- fs/ubifs/replay.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 3a2da7e476e5..eba46d4a7619 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1007,7 +1007,7 @@ out: */ int ubifs_replay_journal(struct ubifs_info *c) { - int err, i, lnum, offs, free; + int err, lnum, free; BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); @@ -1025,25 +1025,17 @@ int ubifs_replay_journal(struct ubifs_info *c) dbg_mnt("start replaying the journal"); c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - offs = c->lhead_offs; - for (i = 0; i < c->log_lebs; i++, lnum++) { - if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { - /* - * The log is logically circular, we reached the last - * LEB, switch to the first one. - */ - lnum = UBIFS_LOG_LNUM; - offs = 0; - } - err = replay_log_leb(c, lnum, offs, c->sbuf); + lnum = UBIFS_LOG_LNUM; + do { + err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) /* We hit the end of the log */ break; if (err) goto out; - offs = 0; - } + lnum = ubifs_next_log_lnum(c, lnum); + } while (lnum != UBIFS_LOG_LNUM); err = replay_buds(c); if (err) -- cgit v1.2.3 From 7074e5eb233343e4bad8c0a3f9e73167cf85a159 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 9 Jul 2012 09:27:14 +0200 Subject: UBIFS: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. Replace a field access from orphan by NULL in two places. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier c; expression E; iterator name list_for_each_entry; statement S; @@ list_for_each_entry(c,...) { ... when != break; when forall when strict } ... ( c = E | *c ) // Artem: fortunately, this did not cause any issues because we iterate the orphan list using the elements count, so we never dereferenced the corrupted pointer. This is why I do not send this patch to -stable. But otherwise - well spotted! Signed-off-by: Julia Lawall Signed-off-by: Artem Bityutskiy --- fs/ubifs/orphan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index b02734db187c..cebf17ea0458 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -176,7 +176,7 @@ int ubifs_orphan_start_commit(struct ubifs_info *c) *last = orphan; last = &orphan->cnext; } - *last = orphan->cnext; + *last = NULL; c->cmt_orphans = c->new_orphans; c->new_orphans = 0; dbg_cmt("%d orphans to commit", c->cmt_orphans); @@ -382,7 +382,7 @@ static int consolidate(struct ubifs_info *c) last = &orphan->cnext; cnt += 1; } - *last = orphan->cnext; + *last = NULL; ubifs_assert(cnt == c->tot_orphans - c->new_orphans); c->cmt_orphans = cnt; c->ohead_lnum = c->orph_first; -- cgit v1.2.3 From 15e1c960227dc22d976c270fc854dfe363c04bbd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 20 Jul 2012 14:05:05 -0400 Subject: GFS2: Eliminate 64-bit divides This patch removes the 64-bit divides introduced in the previous patch in favor of shifting, so that it will compile properly on 32-bit machines. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9f94832cefec..9aa6af13823c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -384,7 +384,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) return ret; atomic_set(&ip->i_res->rs_sizehint, - PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize); + PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); @@ -669,7 +669,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret) return ret; - atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize); + atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -795,7 +795,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (unlikely(error)) goto out_uninit; - atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize); + atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); while (len > 0) { if (len < bytes) -- cgit v1.2.3 From 96ee34be7a0965a117080a28b2c0cc6d4ee6adb1 Mon Sep 17 00:00:00 2001 From: Chen Baozi Date: Sat, 14 Jul 2012 03:38:13 +0800 Subject: xfs: fix comment typo of struct xfs_da_blkinfo. Fix trivial typo error that has written "It" to "Is". Signed-off-by: Chen Baozi Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_da_btree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 9f37aa03eb38..132adafb041e 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -32,7 +32,7 @@ struct zone; /* * This structure is common to both leaf nodes and non-leaf nodes in the Btree. * - * Is is used to manage a doubly linked list of all blocks at the same + * It is used to manage a doubly linked list of all blocks at the same * level in the Btree, and to identify which type of block this is. */ #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ -- cgit v1.2.3 From 69ff2826117f1cde9a2491be57a578212bca551e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jun 2012 17:01:28 -0400 Subject: xfs: implement ->update_time Use this new method to replace our hacky use of ->dirty_inode. An additional benefit is that we can now propagate errors up the stack. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_iops.c | 45 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_super.c | 56 ------------------------------------------------------ fs/xfs/xfs_trace.h | 2 +- 3 files changed, 46 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1a25fd802798..5439c3f99458 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -897,6 +897,47 @@ xfs_vn_setattr( return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); } +STATIC int +xfs_vn_update_time( + struct inode *inode, + struct timespec *now, + int flags) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + trace_xfs_update_time(ip); + + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return -error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (flags & S_CTIME) { + inode->i_ctime = *now; + ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec; + } + if (flags & S_MTIME) { + inode->i_mtime = *now; + ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec; + } + if (flags & S_ATIME) { + inode->i_atime = *now; + ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec; + } + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); + return -xfs_trans_commit(tp, 0); +} + #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) /* @@ -991,6 +1032,7 @@ static const struct inode_operations xfs_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .fiemap = xfs_vn_fiemap, + .update_time = xfs_vn_update_time, }; static const struct inode_operations xfs_dir_inode_operations = { @@ -1016,6 +1058,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, + .update_time = xfs_vn_update_time, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1041,6 +1084,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, + .update_time = xfs_vn_update_time, }; static const struct inode_operations xfs_symlink_inode_operations = { @@ -1054,6 +1098,7 @@ static const struct inode_operations xfs_symlink_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, + .update_time = xfs_vn_update_time, }; STATIC void diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 07f70e17c745..cb2deb13b063 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -868,61 +868,6 @@ xfs_fs_inode_init_once( "xfsino", ip->i_ino); } -/* - * This is called by the VFS when dirtying inode metadata. This can happen - * for a few reasons, but we only care about timestamp updates, given that - * we handled the rest ourselves. In theory no other calls should happen, - * but for example generic_write_end() keeps dirtying the inode after - * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC, - * and skip this call otherwise. - * - * We'll hopefull get a different method just for updating timestamps soon, - * at which point this hack can go away, and maybe we'll also get real - * error handling here. - */ -STATIC void -xfs_fs_dirty_inode( - struct inode *inode, - int flags) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - if (flags != I_DIRTY_SYNC) - return; - - trace_xfs_dirty_inode(ip); - - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto trouble; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - /* - * Grab all the latest timestamps from the Linux inode. - */ - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; - ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - error = xfs_trans_commit(tp, 0); - if (error) - goto trouble; - return; - -trouble: - xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino); -} - STATIC void xfs_fs_evict_inode( struct inode *inode) @@ -1436,7 +1381,6 @@ xfs_fs_free_cached_objects( static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, - .dirty_inode = xfs_fs_dirty_inode, .evict_inode = xfs_fs_evict_inode, .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index caf5dabfd553..e5795dd6013a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -578,8 +578,8 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr); DEFINE_INODE_EVENT(xfs_dir_fsync); DEFINE_INODE_EVENT(xfs_file_fsync); DEFINE_INODE_EVENT(xfs_destroy_inode); -DEFINE_INODE_EVENT(xfs_dirty_inode); DEFINE_INODE_EVENT(xfs_evict_inode); +DEFINE_INODE_EVENT(xfs_update_time); DEFINE_INODE_EVENT(xfs_dquot_dqalloc); DEFINE_INODE_EVENT(xfs_dquot_dqdetach); -- cgit v1.2.3 From 6b7a03f03a2f8b1629133e35729eba4727fae3cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jul 2012 12:20:00 -0400 Subject: xfs: handle EOF correctly in xfs_vm_writepage We need to zero out part of a page which beyond EOF before setting uptodate, otherwise, mapread or write will see non-zero data beyond EOF. Based on the code in fs/buffer.c and the following ext4 commit: ext4: handle EOF correctly in ext4_bio_write_page() And yes, I wish we had a good test case for it. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 84e372596d56..91d77ac51bba 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -927,11 +927,26 @@ xfs_vm_writepage( end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; if (page->index >= end_index) { - if ((page->index >= end_index + 1) || - !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); + + /* + * Just skip the page if it is fully outside i_size, e.g. due + * to a truncate operation that is in progress. + */ + if (page->index >= end_index + 1 || offset_into_page == 0) { unlock_page(page); return 0; } + + /* + * The page straddles i_size. It must be zeroed out on each + * and every writepage invocation because it may be mmapped. + * "A file is mapped in multiples of the page size. For a file + * that is not a multiple of the page size, the remaining + * memory is zeroed when mapped, and writes to that region are + * not written out to the file." + */ + zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); } end_offset = min_t(unsigned long long, -- cgit v1.2.3 From 475ee413f34165f8e6fcd7fbff3a4da1dce48c99 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jul 2012 12:21:22 -0400 Subject: xfs: merge xfs_itobp into xfs_imap_to_bp All callers of xfs_imap_to_bp want the dinode pointer, so let's calculate it inside xfs_imap_to_bp. Once that is done xfs_itobp becomes a fairly pointless wrapper which can be replaced with direct calls to xfs_imap_to_bp. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 131 +++++++++++++++++------------------------------ fs/xfs/xfs_inode.h | 6 +-- fs/xfs/xfs_itable.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_sync.c | 4 +- 5 files changed, 54 insertions(+), 91 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 257f3c463e0e..34c985de5fa0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -132,23 +132,28 @@ xfs_inobp_check( #endif /* - * Find the buffer associated with the given inode map - * We do basic validation checks on the buffer once it has been - * retrieved from disk. + * This routine is called to map an inode to the buffer containing the on-disk + * version of the inode. It returns a pointer to the buffer containing the + * on-disk inode in the bpp parameter, and in the dipp parameter it returns a + * pointer to the on-disk inode within that buffer. + * + * If a non-zero error is returned, then the contents of bpp and dipp are + * undefined. */ -STATIC int +int xfs_imap_to_bp( - xfs_mount_t *mp, - xfs_trans_t *tp, - struct xfs_imap *imap, - xfs_buf_t **bpp, - uint buf_flags, - uint iget_flags) + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_imap *imap, + struct xfs_dinode **dipp, + struct xfs_buf **bpp, + uint buf_flags, + uint iget_flags) { - int error; - int i; - int ni; - xfs_buf_t *bp; + struct xfs_buf *bp; + int error; + int i; + int ni; buf_flags |= XBF_UNMAPPED; error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, @@ -189,8 +194,8 @@ xfs_imap_to_bp( xfs_trans_brelse(tp, bp); return XFS_ERROR(EINVAL); } - XFS_CORRUPTION_ERROR("xfs_imap_to_bp", - XFS_ERRLEVEL_HIGH, mp, dip); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, + mp, dip); #ifdef DEBUG xfs_emerg(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -204,7 +209,9 @@ xfs_imap_to_bp( } xfs_inobp_check(mp, bp); + *bpp = bp; + *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); return 0; } @@ -240,63 +247,15 @@ xfs_inotobp( if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, dipp, &bp, 0, imap_flags); if (error) return error; - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); *bpp = bp; *offset = imap.im_boffset; return 0; } - -/* - * This routine is called to map an inode to the buffer containing - * the on-disk version of the inode. It returns a pointer to the - * buffer containing the on-disk inode in the bpp parameter, and in - * the dip parameter it returns a pointer to the on-disk inode within - * that buffer. - * - * If a non-zero error is returned, then the contents of bpp and - * dipp are undefined. - * - * The inode is expected to already been mapped to its buffer and read - * in once, thus we can use the mapping information stored in the inode - * rather than calling xfs_imap(). This allows us to avoid the overhead - * of looking at the inode btree for small block file systems - * (see xfs_imap()). - */ -int -xfs_itobp( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dinode_t **dipp, - xfs_buf_t **bpp, - uint buf_flags) -{ - xfs_buf_t *bp; - int error; - - ASSERT(ip->i_imap.im_blkno != 0); - - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); - if (error) - return error; - - if (!bp) { - ASSERT(buf_flags & XBF_TRYLOCK); - ASSERT(tp == NULL); - *bpp = NULL; - return EAGAIN; - } - - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); - *bpp = bp; - return 0; -} - /* * Move inode type and inode format specific information from the * on-disk inode to the in-core inode. For fifos, devs, and sockets @@ -796,10 +755,9 @@ xfs_iread( /* * Get pointers to the on-disk inode and the buffer containing it. */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; - dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* * If we got something that isn't an inode it means someone @@ -876,7 +834,7 @@ xfs_iread( /* * Use xfs_trans_brelse() to release the buffer containing the * on-disk inode, because it was acquired with xfs_trans_read_buf() - * in xfs_itobp() above. If tp is NULL, this is just a normal + * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, @@ -1355,7 +1313,8 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, + 0, 0); if (error) return error; @@ -1429,16 +1388,16 @@ xfs_iunlink_remove( if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { /* - * We're at the head of the list. Get the inode's - * on-disk buffer to see if there is anyone after us - * on the list. Only modify our next pointer if it - * is not already NULLAGINO. This saves us the overhead - * of dealing with the buffer when there is no need to - * change it. + * We're at the head of the list. Get the inode's on-disk + * buffer to see if there is anyone after us on the list. + * Only modify our next pointer if it is not already NULLAGINO. + * This saves us the overhead of dealing with the buffer when + * there is no need to change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, + 0, 0); if (error) { - xfs_warn(mp, "%s: xfs_itobp() returned error %d.", + xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", __func__, error); return error; } @@ -1493,13 +1452,15 @@ xfs_iunlink_remove( ASSERT(next_agino != NULLAGINO); ASSERT(next_agino != 0); } + /* - * Now last_ibp points to the buffer previous to us on - * the unlinked list. Pull us from the list. + * Now last_ibp points to the buffer previous to us on the + * unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, + 0, 0); if (error) { - xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", + xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", __func__, error); return error; } @@ -1749,7 +1710,8 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0); + error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, + 0, 0); if (error) return error; @@ -2428,7 +2390,7 @@ xfs_iflush( /* * For stale inodes we cannot rely on the backing buffer remaining * stale in cache for the remaining life of the stale inode and so - * xfs_itobp() below may give us a buffer that no longer contains + * xfs_imap_to_bp() below may give us a buffer that no longer contains * inodes below. We have to check this after ensuring the inode is * unpinned so that it is safe to reclaim the stale inode after the * flush call. @@ -2454,7 +2416,8 @@ xfs_iflush( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK); + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, + 0); if (error || !bp) { xfs_ifunlock(ip); return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1efff36a75b6..942fd7f9110b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -560,9 +560,9 @@ do { \ int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, struct xfs_buf **, int *, uint); -int xfs_itobp(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, struct xfs_dinode **, - struct xfs_buf **, uint); +int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, + struct xfs_imap *, struct xfs_dinode **, + struct xfs_buf **, uint, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, uint); void xfs_dinode_to_disk(struct xfs_dinode *, diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index eff577a9b67f..01d10a66e302 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -555,7 +555,7 @@ xfs_bulkstat_single( /* * note that requesting valid inode numbers which are not allocated - * to inodes will most likely cause xfs_itobp to generate warning + * to inodes will most likely cause xfs_imap_to_bp to generate warning * messages about bad magic numbers. This is ok. The fact that * the inode isn't actually an inode is handled by the * error check below. Done this way to make the usual case faster diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a76ba886e738..5da3ace352bf 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3106,7 +3106,7 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0); + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); if (error) goto fail_iput; diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 1e9ee064dbb2..e61fc1519073 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -712,8 +712,8 @@ restart: * Note that xfs_iflush will never block on the inode buffer lock, as * xfs_ifree_cluster() can lock the inode buffer before it locks the * ip->i_lock, and we are doing the exact opposite here. As a result, - * doing a blocking xfs_itobp() to get the cluster buffer would result - * in an ABBA deadlock with xfs_ifree_cluster(). + * doing a blocking xfs_imap_to_bp() to get the cluster buffer would + * result in an ABBA deadlock with xfs_ifree_cluster(). * * As xfs_ifree_cluser() must gather all inodes that are active in the * cache to mark them stale, if we hit this case we don't actually want -- cgit v1.2.3 From 129dbc9a2d93bab823e57fe47f53d098a0d350f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jul 2012 12:21:51 -0400 Subject: xfs: remove xfs_inotobp There is no need to keep this helper around, opencoding it in the only caller is just as clear. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 70 +++++++++++++++--------------------------------------- fs/xfs/xfs_inode.h | 3 --- 2 files changed, 19 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34c985de5fa0..d48e406de078 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -215,47 +215,6 @@ xfs_imap_to_bp( return 0; } -/* - * This routine is called to map an inode number within a file - * system to the buffer containing the on-disk version of the - * inode. It returns a pointer to the buffer containing the - * on-disk inode in the bpp parameter, and in the dip parameter - * it returns a pointer to the on-disk inode within that buffer. - * - * If a non-zero error is returned, then the contents of bpp and - * dipp are undefined. - * - * Use xfs_imap() to determine the size and location of the - * buffer to read from disk. - */ -int -xfs_inotobp( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - xfs_dinode_t **dipp, - xfs_buf_t **bpp, - int *offset, - uint imap_flags) -{ - struct xfs_imap imap; - xfs_buf_t *bp; - int error; - - imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, imap_flags); - if (error) - return error; - - error = xfs_imap_to_bp(mp, tp, &imap, dipp, &bp, 0, imap_flags); - if (error) - return error; - - *bpp = bp; - *offset = imap.im_boffset; - return 0; -} - /* * Move inode type and inode format specific information from the * on-disk inode to the in-core inode. For fifos, devs, and sockets @@ -1431,23 +1390,32 @@ xfs_iunlink_remove( next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); last_ibp = NULL; while (next_agino != agino) { - /* - * If the last inode wasn't the one pointing to - * us, then release its buffer since we're not - * going to do anything with it. - */ - if (last_ibp != NULL) { + struct xfs_imap imap; + + if (last_ibp) xfs_trans_brelse(tp, last_ibp); - } + + imap.im_blkno = 0; next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); - error = xfs_inotobp(mp, tp, next_ino, &last_dip, - &last_ibp, &last_offset, 0); + + error = xfs_imap(mp, tp, next_ino, &imap, 0); + if (error) { + xfs_warn(mp, + "%s: xfs_imap returned error %d.", + __func__, error); + return error; + } + + error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, + &last_ibp, 0, 0); if (error) { xfs_warn(mp, - "%s: xfs_inotobp() returned error %d.", + "%s: xfs_imap_to_bp returned error %d.", __func__, error); return error; } + + last_offset = imap.im_boffset; next_agino = be32_to_cpu(last_dip->di_next_unlinked); ASSERT(next_agino != NULLAGINO); ASSERT(next_agino != 0); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 942fd7f9110b..c2e2da3abae2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -557,9 +557,6 @@ do { \ #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 -int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, - xfs_ino_t, struct xfs_dinode **, - struct xfs_buf **, int *, uint); int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, struct xfs_imap *, struct xfs_dinode **, struct xfs_buf **, uint, uint); -- cgit v1.2.3 From 0d882a360b9012bc7a7e921c935774c3fba1bfd9 Mon Sep 17 00:00:00 2001 From: Alain Renaud Date: Tue, 22 May 2012 15:56:21 -0500 Subject: Prefix IO_XX flags with XFS_IO_XX to avoid namespace colision. Add a XFS_ prefix to IO_DIRECT,XFS_IO_DELALLOC, XFS_IO_UNWRITTEN and XFS_IO_OVERWRITE. This to avoid namespace conflict with other modules. Signed-off-by: Alain Renaud Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 48 ++++++++++++++++++++++++------------------------ fs/xfs/xfs_aops.h | 14 +++++++------- 2 files changed, 31 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 91d77ac51bba..15052ff916ec 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -179,7 +179,7 @@ xfs_finish_ioend( if (atomic_dec_and_test(&ioend->io_remaining)) { struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - if (ioend->io_type == IO_UNWRITTEN) + if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); @@ -210,7 +210,7 @@ xfs_end_io( * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. */ - if (ioend->io_type == IO_UNWRITTEN) { + if (ioend->io_type == XFS_IO_UNWRITTEN) { /* * For buffered I/O we never preallocate a transaction when * doing the unwritten extent conversion, but for direct I/O @@ -312,7 +312,7 @@ xfs_map_blocks( if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); - if (type == IO_UNWRITTEN) + if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { @@ -336,7 +336,7 @@ xfs_map_blocks( if (error) return -XFS_ERROR(error); - if (type == IO_DELALLOC && + if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, count, imap); if (!error) @@ -345,7 +345,7 @@ xfs_map_blocks( } #ifdef DEBUG - if (type == IO_UNWRITTEN) { + if (type == XFS_IO_UNWRITTEN) { ASSERT(nimaps); ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); @@ -634,11 +634,11 @@ xfs_check_page_type( bh = head = page_buffers(page); do { if (buffer_unwritten(bh)) - acceptable += (type == IO_UNWRITTEN); + acceptable += (type == XFS_IO_UNWRITTEN); else if (buffer_delay(bh)) - acceptable += (type == IO_DELALLOC); + acceptable += (type == XFS_IO_DELALLOC); else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable += (type == IO_OVERWRITE); + acceptable += (type == XFS_IO_OVERWRITE); else break; } while ((bh = bh->b_this_page) != head); @@ -721,11 +721,11 @@ xfs_convert_page( if (buffer_unwritten(bh) || buffer_delay(bh) || buffer_mapped(bh)) { if (buffer_unwritten(bh)) - type = IO_UNWRITTEN; + type = XFS_IO_UNWRITTEN; else if (buffer_delay(bh)) - type = IO_DELALLOC; + type = XFS_IO_DELALLOC; else - type = IO_OVERWRITE; + type = XFS_IO_OVERWRITE; if (!xfs_imap_valid(inode, imap, offset)) { done = 1; @@ -733,7 +733,7 @@ xfs_convert_page( } lock_buffer(bh); - if (type != IO_OVERWRITE) + if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, imap, offset); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -831,7 +831,7 @@ xfs_aops_discard_page( struct buffer_head *bh, *head; loff_t offset = page_offset(page); - if (!xfs_check_page_type(page, IO_DELALLOC)) + if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) goto out_invalidate; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -956,7 +956,7 @@ xfs_vm_writepage( bh = head = page_buffers(page); offset = page_offset(page); - type = IO_OVERWRITE; + type = XFS_IO_OVERWRITE; if (wbc->sync_mode == WB_SYNC_NONE) nonblocking = 1; @@ -981,18 +981,18 @@ xfs_vm_writepage( } if (buffer_unwritten(bh)) { - if (type != IO_UNWRITTEN) { - type = IO_UNWRITTEN; + if (type != XFS_IO_UNWRITTEN) { + type = XFS_IO_UNWRITTEN; imap_valid = 0; } } else if (buffer_delay(bh)) { - if (type != IO_DELALLOC) { - type = IO_DELALLOC; + if (type != XFS_IO_DELALLOC) { + type = XFS_IO_DELALLOC; imap_valid = 0; } } else if (buffer_uptodate(bh)) { - if (type != IO_OVERWRITE) { - type = IO_OVERWRITE; + if (type != XFS_IO_OVERWRITE) { + type = XFS_IO_OVERWRITE; imap_valid = 0; } } else { @@ -1028,7 +1028,7 @@ xfs_vm_writepage( } if (imap_valid) { lock_buffer(bh); - if (type != IO_OVERWRITE) + if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, &imap, offset); xfs_add_to_ioend(inode, bh, offset, type, &ioend, new_ioend); @@ -1069,7 +1069,7 @@ xfs_vm_writepage( * Reserve log space if we might write beyond the on-disk * inode size. */ - if (ioend->io_type != IO_UNWRITTEN && + if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) { err = xfs_setfilesize_trans_alloc(ioend); if (err) @@ -1366,7 +1366,7 @@ xfs_end_io_direct_write( ioend->io_iocb = iocb; ioend->io_result = ret; if (private && size > 0) - ioend->io_type = IO_UNWRITTEN; + ioend->io_type = XFS_IO_UNWRITTEN; if (is_async) { ioend->io_isasync = 1; @@ -1398,7 +1398,7 @@ xfs_vm_direct_IO( * and converts at least on unwritten extent we will cancel * the still clean transaction after the I/O has finished. */ - iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); + iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); if (offset + size > XFS_I(inode)->i_d.di_size) { ret = xfs_setfilesize_trans_alloc(ioend); if (ret) diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 84eafbcb0d9d..c325abb8d61a 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -24,17 +24,17 @@ extern mempool_t *xfs_ioend_pool; * Types of I/O for bmap clustering and I/O completion tracking. */ enum { - IO_DIRECT = 0, /* special case for direct I/O ioends */ - IO_DELALLOC, /* mapping covers delalloc region */ - IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ - IO_OVERWRITE, /* mapping covers already allocated extent */ + XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ + XFS_IO_DELALLOC, /* covers delalloc region */ + XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ + XFS_IO_OVERWRITE, /* covers already allocated extent */ }; #define XFS_IO_TYPES \ { 0, "" }, \ - { IO_DELALLOC, "delalloc" }, \ - { IO_UNWRITTEN, "unwritten" }, \ - { IO_OVERWRITE, "overwrite" } + { XFS_IO_DELALLOC, "delalloc" }, \ + { XFS_IO_UNWRITTEN, "unwritten" }, \ + { XFS_IO_OVERWRITE, "overwrite" } /* * xfs_ioend struct manages large extent writes for XFS. -- cgit v1.2.3 From 824c313139c2ce678011bf11c4823a0c99651c1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:45 -0400 Subject: xfs: remove xfs_ialloc_find_free This function is entirely trivial and only has one caller, so remove it to simplify the code. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 177a21a7ac49..30b816d1f7e0 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -609,13 +609,6 @@ xfs_ialloc_get_rec( /* * Visible inode allocation functions. */ -/* - * Find a free (set) bit in the inode bitmask. - */ -static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) -{ - return xfs_lowbit64(*fp); -} /* * Allocate an inode on disk. @@ -995,7 +988,7 @@ newino: } alloc_inode: - offset = xfs_ialloc_find_free(&rec.ir_free); + offset = xfs_lowbit64(rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % -- cgit v1.2.3 From 1e0ea0014479f066ba26f937e8740b8902229616 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2012 23:46:21 +0400 Subject: use __lookup_hash() in kern_path_parent() No need to bother with lookup_one_len() here - it's an overkill Signed-off-by Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 1b6474687698..c14dfac83c2b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1875,7 +1875,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path) return ERR_PTR(-EINVAL); } mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + d = __lookup_hash(&nd.last, nd.path.dentry, 0); if (IS_ERR(d)) { mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -- cgit v1.2.3 From 4a9d4b024a3102fc083c925c242d98ac27b1c5f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 09:56:45 +0400 Subject: switch fput to task_work_add ... and schedule_work() for interrupt/kernel_thread callers (and yes, now it *is* OK to call from interrupt). We are guaranteed that __fput() will be done before we return to userland (or exit). Note that for fput() from a kernel thread we get an async behaviour; it's almost always OK, but sometimes you might need to have __fput() completed before you do anything else. There are two mechanisms for that - a general barrier (flush_delayed_fput()) and explicit __fput_sync(). Both should be used with care (as was the case for fput() from kernel threads all along). See comments in fs/file_table.c for details. Signed-off-by: Al Viro --- fs/file_table.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/file.h | 3 +++ init/main.c | 3 ++- 3 files changed, 75 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index 9ace2781931e..b3fc4d67a26b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include @@ -251,7 +253,6 @@ static void __fput(struct file *file) } fops_put(file->f_op); put_pid(file->f_owner.pid); - file_sb_list_del(file); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); if (file->f_mode & FMODE_WRITE) @@ -263,10 +264,77 @@ static void __fput(struct file *file) mntput(mnt); } +static DEFINE_SPINLOCK(delayed_fput_lock); +static LIST_HEAD(delayed_fput_list); +static void delayed_fput(struct work_struct *unused) +{ + LIST_HEAD(head); + spin_lock_irq(&delayed_fput_lock); + list_splice_init(&delayed_fput_list, &head); + spin_unlock_irq(&delayed_fput_lock); + while (!list_empty(&head)) { + struct file *f = list_first_entry(&head, struct file, f_u.fu_list); + list_del_init(&f->f_u.fu_list); + __fput(f); + } +} + +static void ____fput(struct callback_head *work) +{ + __fput(container_of(work, struct file, f_u.fu_rcuhead)); +} + +/* + * If kernel thread really needs to have the final fput() it has done + * to complete, call this. The only user right now is the boot - we + * *do* need to make sure our writes to binaries on initramfs has + * not left us with opened struct file waiting for __fput() - execve() + * won't work without that. Please, don't add more callers without + * very good reasons; in particular, never call that with locks + * held and never call that from a thread that might need to do + * some work on any kind of umount. + */ +void flush_delayed_fput(void) +{ + delayed_fput(NULL); +} + +static DECLARE_WORK(delayed_fput_work, delayed_fput); + void fput(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) + if (atomic_long_dec_and_test(&file->f_count)) { + struct task_struct *task = current; + file_sb_list_del(file); + if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { + unsigned long flags; + spin_lock_irqsave(&delayed_fput_lock, flags); + list_add(&file->f_u.fu_list, &delayed_fput_list); + schedule_work(&delayed_fput_work); + spin_unlock_irqrestore(&delayed_fput_lock, flags); + return; + } + init_task_work(&file->f_u.fu_rcuhead, ____fput); + task_work_add(task, &file->f_u.fu_rcuhead, true); + } +} + +/* + * synchronous analog of fput(); for kernel threads that might be needed + * in some umount() (and thus can't use flush_delayed_fput() without + * risking deadlocks), need to wait for completion of __fput() and know + * for this specific struct file it won't involve anything that would + * need them. Use only if you really need it - at the very least, + * don't blindly convert fput() by kernel thread to that. + */ +void __fput_sync(struct file *file) +{ + if (atomic_long_dec_and_test(&file->f_count)) { + struct task_struct *task = current; + file_sb_list_del(file); + BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); + } } EXPORT_SYMBOL(fput); diff --git a/include/linux/file.h b/include/linux/file.h index 58bf158c53d9..a22408bac0d0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); +extern void flush_delayed_fput(void); +extern void __fput_sync(struct file *); + #endif /* __LINUX_FILE_H */ diff --git a/init/main.c b/init/main.c index b5cc0a7c4708..3f151f6c6da7 100644 --- a/init/main.c +++ b/init/main.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -804,8 +805,8 @@ static noinline int init_post(void) system_state = SYSTEM_RUNNING; numa_default_policy(); - current->signal->flags |= SIGNAL_UNKILLABLE; + flush_delayed_fput(); if (ramdisk_execute_command) { run_init_process(ramdisk_execute_command); -- cgit v1.2.3 From 3ffa3c0e3f6e62f67fc2346ca60161dfb030083d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 10:00:10 +0400 Subject: aio: now fput() is OK from interrupt context; get rid of manual delayed __fput() Signed-off-by: Al Viro --- fs/aio.c | 73 +++------------------------------------------------------------- 1 file changed, 3 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 55c4c7656053..71f613cf4a85 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -56,13 +56,6 @@ static struct kmem_cache *kioctx_cachep; static struct workqueue_struct *aio_wq; -/* Used for rare fput completion. */ -static void aio_fput_routine(struct work_struct *); -static DECLARE_WORK(fput_work, aio_fput_routine); - -static DEFINE_SPINLOCK(fput_lock); -static LIST_HEAD(fput_head); - static void aio_kick_handler(struct work_struct *); static void aio_queue_work(struct kioctx *); @@ -479,7 +472,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) { unsigned short allocated, to_alloc; long avail; - bool called_fput = false; struct kiocb *req, *n; struct aio_ring *ring; @@ -495,28 +487,11 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) if (allocated == 0) goto out; -retry: spin_lock_irq(&ctx->ctx_lock); ring = kmap_atomic(ctx->ring_info.ring_pages[0]); avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; BUG_ON(avail < 0); - if (avail == 0 && !called_fput) { - /* - * Handle a potential starvation case. It is possible that - * we hold the last reference on a struct file, causing us - * to delay the final fput to non-irq context. In this case, - * ctx->reqs_active is artificially high. Calling the fput - * routine here may free up a slot in the event completion - * ring, allowing this allocation to succeed. - */ - kunmap_atomic(ring); - spin_unlock_irq(&ctx->ctx_lock); - aio_fput_routine(NULL); - called_fput = true; - goto retry; - } - if (avail < allocated) { /* Trim back the number of requests. */ list_for_each_entry_safe(req, n, &batch->head, ki_batch) { @@ -570,36 +545,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) wake_up_all(&ctx->wait); } -static void aio_fput_routine(struct work_struct *data) -{ - spin_lock_irq(&fput_lock); - while (likely(!list_empty(&fput_head))) { - struct kiocb *req = list_kiocb(fput_head.next); - struct kioctx *ctx = req->ki_ctx; - - list_del(&req->ki_list); - spin_unlock_irq(&fput_lock); - - /* Complete the fput(s) */ - if (req->ki_filp != NULL) - fput(req->ki_filp); - - /* Link the iocb into the context's free list */ - rcu_read_lock(); - spin_lock_irq(&ctx->ctx_lock); - really_put_req(ctx, req); - /* - * at that point ctx might've been killed, but actual - * freeing is RCU'd - */ - spin_unlock_irq(&ctx->ctx_lock); - rcu_read_unlock(); - - spin_lock_irq(&fput_lock); - } - spin_unlock_irq(&fput_lock); -} - /* __aio_put_req * Returns true if this put was the last user of the request. */ @@ -618,21 +563,9 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) req->ki_cancel = NULL; req->ki_retry = NULL; - /* - * Try to optimize the aio and eventfd file* puts, by avoiding to - * schedule work in case it is not final fput() time. In normal cases, - * we would not be holding the last reference to the file*, so - * this function will be executed w/out any aio kthread wakeup. - */ - if (unlikely(!fput_atomic(req->ki_filp))) { - spin_lock(&fput_lock); - list_add(&req->ki_list, &fput_head); - spin_unlock(&fput_lock); - schedule_work(&fput_work); - } else { - req->ki_filp = NULL; - really_put_req(ctx, req); - } + fput(req->ki_filp); + req->ki_filp = NULL; + really_put_req(ctx, req); return 1; } -- cgit v1.2.3 From 0a81861978deedfe9267d9fe905c756d3af3af38 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:28 +0300 Subject: hfsplus: make hfsplus_sync_fs static ... because it is used only in fs/hfsplus/super.c. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfsplus/hfsplus_fs.h | 1 - fs/hfsplus/super.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 4e75ac646fea..66a9365041be 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -428,7 +428,6 @@ int hfsplus_show_options(struct seq_file *, struct dentry *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); -int hfsplus_sync_fs(struct super_block *sb, int wait); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index a9bca4b8768b..5df771e4ddda 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -161,7 +161,7 @@ static void hfsplus_evict_inode(struct inode *inode) } } -int hfsplus_sync_fs(struct super_block *sb, int wait) +static int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; -- cgit v1.2.3 From b7a90e8043e7ab1922126e1c1c5c004b470f9e2a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:29 +0300 Subject: hfsplus: amend debugging print Print correct function name in the debugging print of the 'hfsplus_sync_fs()' function. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfsplus/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 5df771e4ddda..9e9c278830de 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -171,7 +171,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - dprint(DBG_SUPER, "hfsplus_write_super\n"); + dprint(DBG_SUPER, "hfsplus_sync_fs\n"); sb->s_dirt = 0; -- cgit v1.2.3 From 58770d7e83eede5fafbcdf54a604277d70010705 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:30 +0300 Subject: hfsplus: remove useless check This check is useless because we always have 'sb->s_fs_info' to be non-NULL. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfsplus/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9e9c278830de..f4f3d5463061 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -240,9 +240,6 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); - if (!sb->s_fs_info) - return; - if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; -- cgit v1.2.3 From 9e6c5829b07c9ba6668807631914efc557fab059 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:31 +0300 Subject: hfsplus: get rid of write_super This patch makes hfsplus stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfsplus/bitmap.c | 4 ++-- fs/hfsplus/dir.c | 2 +- fs/hfsplus/hfsplus_fs.h | 6 +++++- fs/hfsplus/inode.c | 6 +++--- fs/hfsplus/super.c | 41 ++++++++++++++++++++++++++++++++--------- 5 files changed, 43 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 1cad80c789cb..4cfbe2edd296 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -153,7 +153,7 @@ done: kunmap(page); *max = offset + (curr - pptr) * 32 + i - start; sbi->free_blocks -= *max; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); out: mutex_unlock(&sbi->alloc_mutex); @@ -228,7 +228,7 @@ out: set_page_dirty(page); kunmap(page); sbi->free_blocks += len; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); mutex_unlock(&sbi->alloc_mutex); return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 378ea0c43f19..6b9f921ef2fa 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -316,7 +316,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); sbi->file_count++; - dst_dir->i_sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(dst_dir->i_sb); out: mutex_unlock(&sbi->vh_mutex); return res; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 66a9365041be..558dbb463a4e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -153,8 +153,11 @@ struct hfsplus_sb_info { gid_t gid; int part, session; - unsigned long flags; + + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work sync_work; /* FS sync delayed work */ + spinlock_t work_lock; /* protects sync_work and work_queued */ }; #define HFSPLUS_SB_WRITEBACKUP 0 @@ -428,6 +431,7 @@ int hfsplus_show_options(struct seq_file *, struct dentry *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); +void hfsplus_mark_mdb_dirty(struct super_block *sb); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 7009265b746f..3d8b4a675ba0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -431,7 +431,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) sbi->file_count++; insert_inode_hash(inode); mark_inode_dirty(inode); - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); return inode; } @@ -442,7 +442,7 @@ void hfsplus_delete_inode(struct inode *inode) if (S_ISDIR(inode->i_mode)) { HFSPLUS_SB(sb)->folder_count--; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); return; } HFSPLUS_SB(sb)->file_count--; @@ -455,7 +455,7 @@ void hfsplus_delete_inode(struct inode *inode) inode->i_size = 0; hfsplus_file_truncate(inode); } - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); } void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index f4f3d5463061..473332098013 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -124,7 +124,7 @@ static int hfsplus_system_write_inode(struct inode *inode) if (fork->total_size != cpu_to_be64(inode->i_size)) { set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); - inode->i_sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(inode->i_sb); } hfsplus_inode_write_fork(inode, fork); if (tree) @@ -173,8 +173,6 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) dprint(DBG_SUPER, "hfsplus_sync_fs\n"); - sb->s_dirt = 0; - /* * Explicitly write out the special metadata inodes. * @@ -226,12 +224,34 @@ out: return error; } -static void hfsplus_write_super(struct super_block *sb) +static void delayed_sync_fs(struct work_struct *work) { - if (!(sb->s_flags & MS_RDONLY)) - hfsplus_sync_fs(sb, 1); - else - sb->s_dirt = 0; + struct hfsplus_sb_info *sbi; + + sbi = container_of(work, struct hfsplus_sb_info, sync_work.work); + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); +} + +void hfsplus_mark_mdb_dirty(struct super_block *sb) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sync_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); } static void hfsplus_put_super(struct super_block *sb) @@ -240,6 +260,8 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); + cancel_delayed_work_sync(&sbi->sync_work); + if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; @@ -325,7 +347,6 @@ static const struct super_operations hfsplus_sops = { .write_inode = hfsplus_write_inode, .evict_inode = hfsplus_evict_inode, .put_super = hfsplus_put_super, - .write_super = hfsplus_write_super, .sync_fs = hfsplus_sync_fs, .statfs = hfsplus_statfs, .remount_fs = hfsplus_remount, @@ -352,6 +373,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; mutex_init(&sbi->alloc_mutex); mutex_init(&sbi->vh_mutex); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); hfsplus_fill_defaults(sbi); err = -EINVAL; -- cgit v1.2.3 From 715189d836ab276b3d0fc114681f12b423686ffa Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:44 +0300 Subject: hfs: push lock_super down HFS uses 'lock_super()'/'unlock_super()' around 'hfs_mdb_commit()' in order to serialize MDB (Master Directory Block) changes. Push it down to 'hfs_mdb_commit()' in order to simplify the code a bit. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/inode.c | 2 -- fs/hfs/mdb.c | 2 ++ fs/hfs/super.c | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 451c97281b83..f2deefdb4066 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -645,11 +645,9 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; if (sb->s_dirt) { - lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) hfs_mdb_commit(sb); - unlock_super(sb); } /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 1563d5ce5764..3f558d58fba2 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,6 +260,7 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; + lock_super(sb); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ mdb->drLsMod = hfs_mtime(); @@ -317,6 +318,7 @@ void hfs_mdb_commit(struct super_block *sb) size -= len; } } + unlock_super(sb); } void hfs_mdb_close(struct super_block *sb) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 7b4c537d6e13..f7c06bbf33bc 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -50,21 +50,17 @@ MODULE_LICENSE("GPL"); */ static void hfs_write_super(struct super_block *sb) { - lock_super(sb); sb->s_dirt = 0; /* sync everything to the buffers */ if (!(sb->s_flags & MS_RDONLY)) hfs_mdb_commit(sb); - unlock_super(sb); } static int hfs_sync_fs(struct super_block *sb, int wait) { - lock_super(sb); hfs_mdb_commit(sb); sb->s_dirt = 0; - unlock_super(sb); return 0; } -- cgit v1.2.3 From b59352359d6555aa8537d74ac9b15c1c6bcf3c68 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:45 +0300 Subject: hfs: get rid of lock_super Stop using lock_super for serializing the MDB changes - use the buffer-head own lock instead. Tested with fsstress. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/mdb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 3f558d58fba2..7a3224049f30 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,7 +260,7 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; - lock_super(sb); + lock_buffer(HFS_SB(sb)->mdb_bh); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ mdb->drLsMod = hfs_mtime(); @@ -284,9 +284,13 @@ void hfs_mdb_commit(struct super_block *sb) &mdb->drXTFlSize, NULL); hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec, &mdb->drCTFlSize, NULL); + + lock_buffer(HFS_SB(sb)->alt_mdb_bh); memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE); HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); + unlock_buffer(HFS_SB(sb)->alt_mdb_bh); + mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh); } @@ -309,7 +313,11 @@ void hfs_mdb_commit(struct super_block *sb) break; } len = min((int)sb->s_blocksize - off, size); + + lock_buffer(bh); memcpy(bh->b_data + off, ptr, len); + unlock_buffer(bh); + mark_buffer_dirty(bh); brelse(bh); block++; @@ -318,7 +326,7 @@ void hfs_mdb_commit(struct super_block *sb) size -= len; } } - unlock_super(sb); + unlock_buffer(HFS_SB(sb)->mdb_bh); } void hfs_mdb_close(struct super_block *sb) -- cgit v1.2.3 From a3742d4828caeffab4cb075b562a4ed92d3e74d6 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:46 +0300 Subject: hfs: remove extra mdb write on unmount HFS calls 'hfs_write_super()' from 'hfs_put_super()' in order to write the MDB to the media. However, it is not needed because VFS calls '->sync_fs()' before calling '->put_super()' - so by the time we are in 'hfs_write_super()', the MDB is already synchronized. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/hfs/super.c b/fs/hfs/super.c index f7c06bbf33bc..47e4119a0650 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -74,8 +74,6 @@ static int hfs_sync_fs(struct super_block *sb, int wait) */ static void hfs_put_super(struct super_block *sb) { - if (sb->s_dirt) - hfs_write_super(sb); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); -- cgit v1.2.3 From 4527440d5db8ff27ae8801de3f819843a1e6c6f6 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:47 +0300 Subject: hfs: simplify a bit checking for R/O We have the following pattern in 2 places in HFS if (!RDONLY) hfs_mdb_commit(); This patch pushes the RDONLY check down to 'hfs_mdb_commit()'. This will make the following patches a bit simpler. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/inode.c | 3 +-- fs/hfs/mdb.c | 3 +++ fs/hfs/super.c | 3 +-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index f2deefdb4066..90c1ccbff8e4 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -646,8 +646,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, sb = inode->i_sb; if (sb->s_dirt) { sb->s_dirt = 0; - if (!(sb->s_flags & MS_RDONLY)) - hfs_mdb_commit(sb); + hfs_mdb_commit(sb); } /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7a3224049f30..5fd51a5833ff 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,6 +260,9 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; + if (sb->s_flags & MS_RDONLY) + return; + lock_buffer(HFS_SB(sb)->mdb_bh); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 47e4119a0650..0730135b771e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -53,8 +53,7 @@ static void hfs_write_super(struct super_block *sb) sb->s_dirt = 0; /* sync everything to the buffers */ - if (!(sb->s_flags & MS_RDONLY)) - hfs_mdb_commit(sb); + hfs_mdb_commit(sb); } static int hfs_sync_fs(struct super_block *sb, int wait) -- cgit v1.2.3 From b16ca626358cbf056b752eab63ba8f20087afeaf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:48 +0300 Subject: hfs: introduce VFS superblock object back-reference Add an 'sb' VFS superblock back-reference to the 'struct hfs_sb_info' data structure - we will need to find the VFS superblock from a 'struct hfs_sb_info' object in the next patch, so this change is jut a preparation. Remove few useless newlines while on it. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/hfs_fs.h | 6 +----- fs/hfs/super.c | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 1bf967c6bfdc..351561223ec3 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -137,16 +137,12 @@ struct hfs_sb_info { gid_t s_gid; /* The gid of all files */ int session, part; - struct nls_table *nls_io, *nls_disk; - struct mutex bitmap_lock; - unsigned long flags; - u16 blockoffset; - int fs_div; + struct super_block *sb; }; #define HFS_FLG_BITMAP_DIRTY 0 diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 0730135b771e..99c6239bc3a1 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -380,6 +380,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; + sbi->sb = sb; sb->s_fs_info = sbi; res = -EINVAL; -- cgit v1.2.3 From 5687b5780e90278a62d4cd916a3632087066f59d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:49 +0300 Subject: hfs: get rid of hfs_sync_super This patch makes hfs stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/extent.c | 2 +- fs/hfs/hfs_fs.h | 9 +++++++- fs/hfs/inode.c | 11 ++++------ fs/hfs/super.c | 65 +++++++++++++++++++++++++++++++-------------------------- 4 files changed, 48 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 2c16316d2917..a67955a0c36f 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -432,7 +432,7 @@ out: if (inode->i_ino < HFS_FIRSTUSER_CNID) set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags); set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } return res; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 351561223ec3..8275175acf6e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,9 @@ struct hfs_sb_info { u16 blockoffset; int fs_div; struct super_block *sb; + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work mdb_work; /* MDB flush delayed work */ + spinlock_t work_lock; /* protects mdb_work and work_queued */ }; #define HFS_FLG_BITMAP_DIRTY 0 @@ -222,6 +226,9 @@ extern int hfs_compare_dentry(const struct dentry *parent, extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *); +/* super.c */ +extern void hfs_mark_mdb_dirty(struct super_block *sb); + extern struct timezone sys_tz; /* @@ -249,7 +256,7 @@ static inline const char *hfs_mdb_name(struct super_block *sb) static inline void hfs_bitmap_dirty(struct super_block *sb) { set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } #define sb_bread512(sb, sec, data) ({ \ diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 90c1ccbff8e4..ee1bc55677f1 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -220,7 +220,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode) insert_inode_hash(inode); mark_inode_dirty(inode); set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); return inode; } @@ -235,7 +235,7 @@ void hfs_delete_inode(struct inode *inode) if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) HFS_SB(sb)->root_dirs--; set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); return; } HFS_SB(sb)->file_count--; @@ -248,7 +248,7 @@ void hfs_delete_inode(struct inode *inode) } } set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, @@ -644,10 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; - if (sb->s_dirt) { - sb->s_dirt = 0; - hfs_mdb_commit(sb); - } + flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); if (!ret) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 99c6239bc3a1..4eb873e0c07b 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -29,38 +29,9 @@ static struct kmem_cache *hfs_inode_cachep; MODULE_LICENSE("GPL"); -/* - * hfs_write_super() - * - * Description: - * This function is called by the VFS only. When the filesystem - * is mounted r/w it updates the MDB on disk. - * Input Variable(s): - * struct super_block *sb: Pointer to the hfs superblock - * Output Variable(s): - * NONE - * Returns: - * void - * Preconditions: - * 'sb' points to a "valid" (struct super_block). - * Postconditions: - * The MDB is marked 'unsuccessfully unmounted' by clearing bit 8 of drAtrb - * (hfs_put_super() must set this flag!). Some MDB fields are updated - * and the MDB buffer is written to disk by calling hfs_mdb_commit(). - */ -static void hfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; - - /* sync everything to the buffers */ - hfs_mdb_commit(sb); -} - static int hfs_sync_fs(struct super_block *sb, int wait) { hfs_mdb_commit(sb); - sb->s_dirt = 0; - return 0; } @@ -73,11 +44,44 @@ static int hfs_sync_fs(struct super_block *sb, int wait) */ static void hfs_put_super(struct super_block *sb) { + cancel_delayed_work_sync(&HFS_SB(sb)->mdb_work); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); } +static void flush_mdb(struct work_struct *work) +{ + struct hfs_sb_info *sbi; + struct super_block *sb; + + sbi = container_of(work, struct hfs_sb_info, mdb_work.work); + sb = sbi->sb; + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + hfs_mdb_commit(sb); +} + +void hfs_mark_mdb_dirty(struct super_block *sb) +{ + struct hfs_sb_info *sbi = HFS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->mdb_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); +} + /* * hfs_statfs() * @@ -177,7 +181,6 @@ static const struct super_operations hfs_super_operations = { .write_inode = hfs_write_inode, .evict_inode = hfs_evict_inode, .put_super = hfs_put_super, - .write_super = hfs_write_super, .sync_fs = hfs_sync_fs, .statfs = hfs_statfs, .remount_fs = hfs_remount, @@ -382,6 +385,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sbi->sb = sb; sb->s_fs_info = sbi; + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb); res = -EINVAL; if (!parse_options((char *)data, sbi)) { -- cgit v1.2.3 From a4d05d315a4fdf5ccb0dbf0ce38bac12d522d33e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:26 +0300 Subject: fs/sysv: remove useless write_super call We do not need to call 'sysv_write_super()' from 'sysv_put_super()', because VFS has called 'sysv_sync_fs()' before calling '->put_super()'. So remove it. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/sysv/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 08d0b2568cd3..af13d1342f23 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -81,9 +81,6 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - if (sb->s_dirt) - sysv_write_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); -- cgit v1.2.3 From eee458936b52bd3a9ff0ff577313b637905fff08 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:27 +0300 Subject: fs/sysv: remove another useless write_super call We do not need to call 'sysv_write_super()' from 'sysv_remount()', because VFS has called 'sysv_sync_fs()' before calling '->remount()'. So remove it. Remove also '(un)lock_super()' which obvioulsy is becoming useless in this function. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/sysv/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index af13d1342f23..f20ffe32b398 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -68,12 +68,9 @@ static void sysv_write_super(struct super_block *sb) static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); - lock_super(sb); + if (sbi->s_forced_ro) *flags |= MS_RDONLY; - if (*flags & MS_RDONLY) - sysv_write_super(sb); - unlock_super(sb); return 0; } -- cgit v1.2.3 From 9d46be294d12871adf4206f89168b14d27adb8b5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:28 +0300 Subject: fs/sysv: stop using write_super and s_dirt It does not look like sysv FS needs 'write_super()' at all, because all it does is a timestamp update. I cannot test this patch, because this file-system is so old and probably has not been used by anyone for years, so there are no tools to create it in Linux. But from the code I see that marking the superblock as dirty is basically marking the superblock buffers as drity and then setting the s_dirt flag. And when 'write_super()' is executed to handle the s_dirt flag, we just update the timestamp and again mark the superblock buffer as dirty. Seems pointless. It looks like we can update the timestamp more opprtunistically - on unmount or remount of sync, and nothing should change. Thus, this patch removes 'sysv_write_super()' and 's_dirt'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/sysv/inode.c | 10 ---------- fs/sysv/sysv.h | 1 - 2 files changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index f20ffe32b398..80e1e2b18df1 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -43,7 +43,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) * then attach current time stamp. * But if the filesystem was marked clean, keep it clean. */ - sb->s_dirt = 0; old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); if (sbi->s_type == FSTYPE_SYSV4) { if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) @@ -57,14 +56,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) return 0; } -static void sysv_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - sysv_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); @@ -351,7 +342,6 @@ const struct super_operations sysv_sops = { .write_inode = sysv_write_inode, .evict_inode = sysv_evict_inode, .put_super = sysv_put_super, - .write_super = sysv_write_super, .sync_fs = sysv_sync_fs, .remount_fs = sysv_remount, .statfs = sysv_statfs, diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 11b07672f6c5..0bc35fdc58e2 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -117,7 +117,6 @@ static inline void dirty_sb(struct super_block *sb) mark_buffer_dirty(sbi->s_bh1); if (sbi->s_bh1 != sbi->s_bh2) mark_buffer_dirty(sbi->s_bh2); - sb->s_dirt = 1; } -- cgit v1.2.3 From 65e5e83f7d01a3790deb1ba2e0d887e715c43307 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:06 +0300 Subject: fs/ufs: remove extra superblock write on unmount UFS calls 'ufs_write_super()' from 'ufs_put_super()' in order to write the superblocks to the media. However, it is not needed because VFS calls '->sync_fs()' before calling '->put_super()' - so by the time we are in 'ufs_write_super()', the superblocks are already synchronized. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ufs/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 302f340d0071..ae91e0af2664 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1238,9 +1238,6 @@ static void ufs_put_super(struct super_block *sb) UFSD("ENTER\n"); - if (sb->s_dirt) - ufs_write_super(sb); - if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); -- cgit v1.2.3 From 7bd54ef722e9938768f524677be0ac4985d8473a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:07 +0300 Subject: fs/ufs: re-arrange the code a bit This patch does not do any functional changes. It only moves 3 functions in fs/ufs/super.c a little bit up in order to prepare for further changes where I'll need this new arrangement to avoid forward declarations. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ufs/super.c | 117 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 58 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ae91e0af2664..ad56c6dffc64 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -691,6 +691,64 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("EXIT\n"); } +static int ufs_sync_fs(struct super_block *sb, int wait) +{ + struct ufs_sb_private_info * uspi; + struct ufs_super_block_first * usb1; + struct ufs_super_block_third * usb3; + unsigned flags; + + lock_ufs(sb); + lock_super(sb); + + UFSD("ENTER\n"); + + flags = UFS_SB(sb)->s_flags; + uspi = UFS_SB(sb)->s_uspi; + usb1 = ubh_get_usb_first(uspi); + usb3 = ubh_get_usb_third(uspi); + + usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + if ((flags & UFS_ST_MASK) == UFS_ST_SUN || + (flags & UFS_ST_MASK) == UFS_ST_SUNOS || + (flags & UFS_ST_MASK) == UFS_ST_SUNx86) + ufs_set_fs_state(sb, usb1, usb3, + UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); + ufs_put_cstotal(sb); + sb->s_dirt = 0; + + UFSD("EXIT\n"); + unlock_super(sb); + unlock_ufs(sb); + + return 0; +} + +static void ufs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ufs_sync_fs(sb, 1); + else + sb->s_dirt = 0; +} + +static void ufs_put_super(struct super_block *sb) +{ + struct ufs_sb_info * sbi = UFS_SB(sb); + + UFSD("ENTER\n"); + + if (!(sb->s_flags & MS_RDONLY)) + ufs_put_super_internal(sb); + + ubh_brelse_uspi (sbi->s_uspi); + kfree (sbi->s_uspi); + kfree (sbi); + sb->s_fs_info = NULL; + UFSD("EXIT\n"); + return; +} + static int ufs_fill_super(struct super_block *sb, void *data, int silent) { struct ufs_sb_info * sbi; @@ -1191,65 +1249,6 @@ failed_nomem: return -ENOMEM; } -static int ufs_sync_fs(struct super_block *sb, int wait) -{ - struct ufs_sb_private_info * uspi; - struct ufs_super_block_first * usb1; - struct ufs_super_block_third * usb3; - unsigned flags; - - lock_ufs(sb); - lock_super(sb); - - UFSD("ENTER\n"); - - flags = UFS_SB(sb)->s_flags; - uspi = UFS_SB(sb)->s_uspi; - usb1 = ubh_get_usb_first(uspi); - usb3 = ubh_get_usb_third(uspi); - - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); - if ((flags & UFS_ST_MASK) == UFS_ST_SUN || - (flags & UFS_ST_MASK) == UFS_ST_SUNOS || - (flags & UFS_ST_MASK) == UFS_ST_SUNx86) - ufs_set_fs_state(sb, usb1, usb3, - UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ufs_put_cstotal(sb); - sb->s_dirt = 0; - - UFSD("EXIT\n"); - unlock_super(sb); - unlock_ufs(sb); - - return 0; -} - -static void ufs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - ufs_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - -static void ufs_put_super(struct super_block *sb) -{ - struct ufs_sb_info * sbi = UFS_SB(sb); - - UFSD("ENTER\n"); - - if (!(sb->s_flags & MS_RDONLY)) - ufs_put_super_internal(sb); - - ubh_brelse_uspi (sbi->s_uspi); - kfree (sbi->s_uspi); - kfree (sbi); - sb->s_fs_info = NULL; - UFSD("EXIT\n"); - return; -} - - static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) { struct ufs_sb_private_info * uspi; -- cgit v1.2.3 From 9e9ad5f408889db6038a59b38ede29ff1ba9ef2f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:08 +0300 Subject: fs/ufs: get rid of write_super This patch makes UFS stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The way we implement this is that we schedule a delay job instead relying on 's_dirt' and '->write_super()'. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ufs/balloc.c | 8 ++++---- fs/ufs/ialloc.c | 4 ++-- fs/ufs/super.c | 40 ++++++++++++++++++++++++++++++---------- fs/ufs/ufs.h | 5 +++++ fs/ufs/ufs_fs.h | 1 + 5 files changed, 42 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 42694e11c23d..1b3e410bf334 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -116,7 +116,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); @@ -214,7 +214,7 @@ do_more: goto do_more; } - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); return; @@ -557,7 +557,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -677,7 +677,7 @@ succed: ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); result += cgno * uspi->s_fpg; UFSD("EXIT3, result %llu\n", (unsigned long long)result); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 4ec5c1085a87..e84cbe21b986 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -116,7 +116,7 @@ void ufs_free_inode (struct inode * inode) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); } @@ -288,7 +288,7 @@ cg_found: ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); inode->i_ino = cg * uspi->s_ipg + bit; inode_init_owner(inode, dir, mode); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ad56c6dffc64..444927e5706b 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -302,7 +302,7 @@ void ufs_error (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); sb->s_flags |= MS_RDONLY; } va_start (args, fmt); @@ -334,7 +334,7 @@ void ufs_panic (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); } va_start (args, fmt); vsnprintf (error_buf, sizeof(error_buf), fmt, args); @@ -715,7 +715,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ufs_put_cstotal(sb); - sb->s_dirt = 0; UFSD("EXIT\n"); unlock_super(sb); @@ -724,12 +723,31 @@ static int ufs_sync_fs(struct super_block *sb, int wait) return 0; } -static void ufs_write_super(struct super_block *sb) +static void delayed_sync_fs(struct work_struct *work) { - if (!(sb->s_flags & MS_RDONLY)) - ufs_sync_fs(sb, 1); - else - sb->s_dirt = 0; + struct ufs_sb_info *sbi; + + sbi = container_of(work, struct ufs_sb_info, sync_work.work); + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + ufs_sync_fs(sbi->sb, 1); +} + +void ufs_mark_sb_dirty(struct super_block *sb) +{ + struct ufs_sb_info *sbi = UFS_SB(sb); + unsigned long delay; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sync_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); } static void ufs_put_super(struct super_block *sb) @@ -740,6 +758,7 @@ static void ufs_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); + cancel_delayed_work_sync(&sbi->sync_work); ubh_brelse_uspi (sbi->s_uspi); kfree (sbi->s_uspi); @@ -774,6 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) goto failed_nomem; sb->s_fs_info = sbi; + sbi->sb = sb; UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); @@ -785,6 +805,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) } #endif mutex_init(&sbi->mutex); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* * Set default mount options * Parse mount options @@ -1304,7 +1326,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ubh_mark_buffer_dirty (USPI_UBH(uspi)); - sb->s_dirt = 0; sb->s_flags |= MS_RDONLY; } else { /* @@ -1454,7 +1475,6 @@ static const struct super_operations ufs_super_ops = { .write_inode = ufs_write_inode, .evict_inode = ufs_evict_inode, .put_super = ufs_put_super, - .write_super = ufs_write_super, .sync_fs = ufs_sync_fs, .statfs = ufs_statfs, .remount_fs = ufs_remount, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 528750b7e701..343e6fc571e5 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -20,6 +20,10 @@ struct ufs_sb_info { unsigned s_mount_opt; struct mutex mutex; struct task_struct *mutex_owner; + struct super_block *sb; + int work_queued; /* non-zero if the delayed work is queued */ + struct delayed_work sync_work; /* FS sync delayed work */ + spinlock_t work_lock; /* protects sync_work and work_queued */ }; struct ufs_inode_info { @@ -123,6 +127,7 @@ extern __printf(3, 4) void ufs_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ufs_panic(struct super_block *, const char *, const char *, ...); +void ufs_mark_sb_dirty(struct super_block *sb); /* symlink.c */ extern const struct inode_operations ufs_fast_symlink_inode_operations; diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 8aba544f9fad..0cbd5d340b67 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h @@ -34,6 +34,7 @@ #include #include #include +#include #include typedef __u64 __bitwise __fs64; -- cgit v1.2.3 From 6eedc70150d55b5885800eb6664ea226dc2cb66f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:27 +0200 Subject: vfs: Move noop_backing_dev_info check from sync into writeback In principle, a filesystem may want to have ->sync_fs() called during sync(1) although it does not have a bdi (i.e. s_bdi is set to noop_backing_dev_info). Only writeback code really needs bdi set to something reasonable. So move the checks where they are more logical. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/fs-writeback.c | 5 +++++ fs/sync.c | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 41a3ccff18d8..8f660dd6137a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1315,6 +1315,8 @@ void writeback_inodes_sb_nr(struct super_block *sb, .reason = reason, }; + if (sb->s_bdi == &noop_backing_dev_info) + return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); @@ -1398,6 +1400,9 @@ void sync_inodes_sb(struct super_block *sb) .reason = WB_REASON_SYNC, }; + /* Nothing to do? */ + if (sb->s_bdi == &noop_backing_dev_info) + return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_queue_work(sb->s_bdi, &work); diff --git a/fs/sync.c b/fs/sync.c index 11e3d1c44901..b3d2a001293f 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,13 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - /* - * This should be safe, as we require bdi backing to actually - * write out data in the first place - */ - if (sb->s_bdi == &noop_backing_dev_info) - return 0; - if (sb->s_qcop && sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, -1, wait); -- cgit v1.2.3 From ceed17236a7491b44ee2be21f56a41ab997cbe7d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:28 +0200 Subject: quota: Split dquot_quota_sync() to writeback and cache flushing part Split off part of dquot_quota_sync() which writes dquots into a quota file to a separate function. In the next patch we will use the function from filesystems and we do not want to abuse ->quota_sync quotactl callback more than necessary. Acked-by: Steven Whitehouse Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/gfs2/quota.c | 4 ++-- fs/gfs2/quota.h | 2 +- fs/gfs2/super.c | 2 +- fs/gfs2/sys.c | 2 +- fs/quota/dquot.c | 24 +++++++++++++++++++++--- fs/quota/quota.c | 4 ++-- fs/sync.c | 2 +- include/linux/quota.h | 2 +- include/linux/quotaops.h | 8 +++++++- 9 files changed, 37 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b97178e7d397..27b5cc7d6881 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1108,7 +1108,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct super_block *sb, int type, int wait) +int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; @@ -1154,7 +1154,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait) static int gfs2_quota_sync_timeo(struct super_block *sb, int type) { - return gfs2_quota_sync(sb, type, 0); + return gfs2_quota_sync(sb, type); } int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 90bf1c302a98..f25d98b87904 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct super_block *sb, int type, int wait); +extern int gfs2_quota_sync(struct super_block *sb, int type); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 713e621c240b..313c329490e2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -838,7 +838,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp->sd_vfs, 0, 1); + gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9c2592b1d5ff..73ecc34c4342 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp->sd_vfs, 0, 1); + gfs2_quota_sync(sdp->sd_vfs, 0); return len; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 10cbe841cb7e..d679fc48ef27 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -595,12 +595,14 @@ out: } EXPORT_SYMBOL(dquot_scan_active); -int dquot_quota_sync(struct super_block *sb, int type, int wait) +/* Write all dquot structures to quota files */ +int dquot_writeback_dquots(struct super_block *sb, int type) { struct list_head *dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; + int err, ret = 0; mutex_lock(&dqopt->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); - sb->dq_op->write_dquot(dquot); + err = sb->dq_op->write_dquot(dquot); + if (!ret && err) + err = ret; dqput(dquot); spin_lock(&dq_list_lock); } @@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) dqstats_inc(DQST_SYNCS); mutex_unlock(&dqopt->dqonoff_mutex); - if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE)) + return ret; +} +EXPORT_SYMBOL(dquot_writeback_dquots); + +/* Write all dquot structures to disk and make them visible from userspace */ +int dquot_quota_sync(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret; + + ret = dquot_writeback_dquots(sb, type); + if (ret) + return ret; + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) return 0; /* This is not very clever (and fast) but currently I don't know about diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9a391204ca27..c659f92298d3 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, static void quota_sync_one(struct super_block *sb, void *arg) { if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, *(int *)arg, 1); + sb->s_qcop->quota_sync(sb, *(int *)arg); } static int quota_sync_all(int type) @@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_SYNC: if (!sb->s_qcop->quota_sync) return -ENOSYS; - return sb->s_qcop->quota_sync(sb, type, 1); + return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: case Q_XQUOTAOFF: case Q_XQUOTARM: diff --git a/fs/sync.c b/fs/sync.c index b3d2a001293f..cae145dd8018 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -30,7 +30,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) { if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1, wait); + sb->s_qcop->quota_sync(sb, -1); if (wait) sync_inodes_sb(sb); diff --git a/include/linux/quota.h b/include/linux/quota.h index c09fa042b5ea..524ede8a160a 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -333,7 +333,7 @@ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); - int (*quota_sync)(struct super_block *, int, int); + int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 17b977304a09..ec6b65feaaba 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -83,7 +83,8 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id, int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); int dquot_quota_off(struct super_block *sb, int type); -int dquot_quota_sync(struct super_block *sb, int type, int wait); +int dquot_writeback_dquots(struct super_block *sb, int type); +int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, @@ -255,6 +256,11 @@ static inline int dquot_resume(struct super_block *sb, int type) #define dquot_file_open generic_file_open +static inline int dquot_writeback_dquots(struct super_block *sb, int type) +{ + return 0; +} + #endif /* CONFIG_QUOTA */ static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr) -- cgit v1.2.3 From a1177825719ccef3f76ef39bbfd5ebb6087d53c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:29 +0200 Subject: quota: Move quota syncing to ->sync_fs method Since the moment writes to quota files are using block device page cache and space for quota structures is reserved at the moment they are first accessed we have no reason to sync quota before inode writeback. In fact this order is now only harmful since quota information can easily change during inode writeback (either because conversion of delayed-allocated extents or simply because of allocation of new blocks for simple filesystems not using page_mkwrite). So move syncing of quota information after writeback of inodes into ->sync_fs method. This way we do not have to use ->quota_sync callback which is primarily intended for use by quotactl syscall anyway and we get rid of calling ->sync_fs() twice unnecessarily. We skip quota syncing for OCFS2 since it does proper quota journalling in all cases (unlike ext3, ext4, and reiserfs which also support legacy non-journalled quotas) and thus there are no dirty quota structures. CC: "Theodore Ts'o" CC: Joel Becker CC: reiserfs-devel@vger.kernel.org Acked-by: Steven Whitehouse Acked-by: Dave Kleikamp Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ext2/super.c | 6 ++++++ fs/ext3/super.c | 5 +++++ fs/ext4/super.c | 5 +++++ fs/gfs2/super.c | 2 ++ fs/jfs/super.c | 5 +++++ fs/reiserfs/super.c | 5 +++++ fs/sync.c | 3 --- 7 files changed, 28 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b3621cb7ea31..5df3d2d8169c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait) struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = EXT2_SB(sb)->s_es; + /* + * Write quota structures to quota file, sync_blockdev() will write + * them to disk later + */ + dquot_writeback_dquots(sb, -1); + spin_lock(&sbi->s_lock); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c3a44b7c375..4ac304c55c53 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2526,6 +2526,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait) tid_t target; trace_ext3_sync_fs(sb, wait); + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(sb, -1); if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index eb7aa3e4ef05..d8759401ecae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) trace_ext4_sync_fs(sb, wait); flush_workqueue(sbi->dio_unwritten_wq); + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(sb, -1); if (jbd2_journal_start_commit(sbi->s_journal, &target)) { if (wait) jbd2_log_wait_commit(sbi->s_journal, target); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 313c329490e2..f3d6bbfb32c5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -952,6 +952,8 @@ restart: static int gfs2_sync_fs(struct super_block *sb, int wait) { struct gfs2_sbd *sdp = sb->s_fs_info; + + gfs2_quota_sync(sb, -1); if (wait && sdp) gfs2_log_flush(sdp, NULL); return 0; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4a82950f412f..c55c7452d285 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait) /* log == NULL indicates read-only mount */ if (log) { + /* + * Write quota structures to quota file, sync_blockdev() will + * write them to disk later + */ + dquot_writeback_dquots(sb, -1); jfs_flush_journal(log, wait); jfs_syncpt(log, 0); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 651ce767b55d..7a37dabf5a96 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) { struct reiserfs_transaction_handle th; + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(s, -1); reiserfs_write_lock(s); if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th, s, 1)) diff --git a/fs/sync.c b/fs/sync.c index cae145dd8018..66acd2ba91c4 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,9 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1); - if (wait) sync_inodes_sb(sb); else -- cgit v1.2.3 From b3de653105180b57af90ef2f5b8441f085f4ff56 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:30 +0200 Subject: vfs: Reorder operations during sys_sync Change the order of operations during sync from for_each_sb { writeback_inodes_sb(); sync_fs(nowait); __sync_blockdev(nowait); } for_each_sb { sync_inodes_sb(); sync_fs(wait); __sync_blockdev(wait); } to for_each_sb writeback_inodes_sb(); for_each_sb sync_fs(nowait); for_each_sb __sync_blockdev(nowait); for_each_sb sync_inodes_sb(); for_each_sb sync_fs(wait); for_each_sb __sync_blockdev(wait); This is a preparation for the following patches in this series. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/sync.c b/fs/sync.c index 66acd2ba91c4..490e90201135 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -67,18 +67,28 @@ int sync_filesystem(struct super_block *sb) } EXPORT_SYMBOL_GPL(sync_filesystem); -static void sync_one_sb(struct super_block *sb, void *arg) +static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - __sync_filesystem(sb, *(int *)arg); + sync_inodes_sb(sb); } -/* - * Sync all the data for all the filesystems (called by sys_sync() and - * emergency sync) - */ -static void sync_filesystems(int wait) + +static void writeback_inodes_one_sb(struct super_block *sb, void *arg) { - iterate_supers(sync_one_sb, &wait); + if (!(sb->s_flags & MS_RDONLY)) + writeback_inodes_sb(sb, WB_REASON_SYNC); +} + +static void sync_fs_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, *(int *)arg); +} + +static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY)) + __sync_blockdev(sb->s_bdev, *(int *)arg); } /* @@ -87,9 +97,15 @@ static void sync_filesystems(int wait) */ SYSCALL_DEFINE0(sync) { + int nowait = 0, wait = 1; + wakeup_flusher_threads(0, WB_REASON_SYNC); - sync_filesystems(0); - sync_filesystems(1); + iterate_supers(writeback_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &wait); + iterate_supers(sync_blkdev_one_sb, &wait); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -97,12 +113,18 @@ SYSCALL_DEFINE0(sync) static void do_sync_work(struct work_struct *work) { + int nowait = 0; + /* * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ - sync_filesystems(0); - sync_filesystems(0); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.3 From 5c0d6b60a0ba46d45020547eacf7199171920935 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:31 +0200 Subject: vfs: Create function for iterating over block devices Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/block_dev.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 5 +++++ 2 files changed, 41 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index c2bbe1fb1326..1e519195d45b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1710,3 +1710,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty) return res; } EXPORT_SYMBOL(__invalidate_device); + +void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) +{ + struct inode *inode, *old_inode = NULL; + + spin_lock(&inode_sb_list_lock); + list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { + struct address_space *mapping = inode->i_mapping; + + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || + mapping->nrpages == 0) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); + /* + * We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the + * inode_sb_list_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it under + * inode_sb_list_lock. So we keep the reference and iput it + * later. + */ + iput(old_inode); + old_inode = inode; + + func(I_BDEV(inode), arg); + + spin_lock(&inode_sb_list_lock); + } + spin_unlock(&inode_sb_list_lock); + iput(old_inode); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 48548bdd7722..6a6ca85bee23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2102,6 +2102,7 @@ extern sector_t blkdev_max_block(struct block_device *bdev); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); +extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); @@ -2123,6 +2124,10 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) { return 0; } + +static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) +{ +} #endif extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; -- cgit v1.2.3 From a8c7176b6ded413d5044a00f1d05477b95a6d7ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:32 +0200 Subject: vfs: Make sys_sync writeout also block device inodes In case block device does not have filesystem mounted on it, sys_sync will just ignore it and doesn't writeout its dirty pages. This is because writeback code avoids writing inodes from superblock without backing device and blockdev_superblock is such a superblock. Since it's unexpected that sync doesn't writeout dirty data for block devices be nice to users and change the behavior to do so. So now we iterate over all block devices on blockdev_super instead of iterating over all superblocks when syncing block devices. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/sync.c b/fs/sync.c index 490e90201135..0b166f26362d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,10 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +static void flush_one_bdev(struct block_device *bdev, void *arg) { - if (!(sb->s_flags & MS_RDONLY)) - __sync_blockdev(sb->s_bdev, *(int *)arg); + __sync_blockdev(bdev, 0); +} + +static void sync_one_bdev(struct block_device *bdev, void *arg) +{ + sync_blockdev(bdev); } /* @@ -102,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_supers(sync_blkdev_one_sb, &wait); + iterate_bdevs(sync_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -121,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.3 From d0e91b13eb34d449922124c34f8a05e498daa089 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:33 +0200 Subject: vfs: Remove unnecessary flushing of block devices It is not necessary to write block devices twice. The reason why we first did flush and then proper sync is that for_each_bdev() { write_bdev() wait_for_completion() } is much slower than for_each_bdev() write_bdev() for_each_bdev() wait_for_completion() when there is bigger amount of data. But as is seen in the above, there's no real need to scan pages and submit them twice. We just need to separate the submission and waiting part. This patch does that. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/sync.c b/fs/sync.c index 0b166f26362d..131ddae87a1d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,14 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void flush_one_bdev(struct block_device *bdev, void *arg) +static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) { - __sync_blockdev(bdev, 0); + filemap_fdatawrite(bdev->bd_inode->i_mapping); } -static void sync_one_bdev(struct block_device *bdev, void *arg) +static void fdatawait_one_bdev(struct block_device *bdev, void *arg) { - sync_blockdev(bdev); + filemap_fdatawait(bdev->bd_inode->i_mapping); } /* @@ -106,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_bdevs(sync_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); + iterate_bdevs(fdatawait_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -125,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.3 From 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:34 +0200 Subject: vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes wakeup_flusher_threads(0) will queue work doing complete writeback for each flusher thread. Thus there is not much point in submitting another work doing full inode WB_SYNC_NONE writeback by writeback_inodes_sb(). After this change it does not make sense to call nonblocking ->sync_fs and block device flush before calling sync_inodes_sb() because wakeup_flusher_threads() is completely asynchronous and thus these functions would be called in parallel with inode writeback running which will effectively void any work they do. So we move sync_inodes_sb() call before these two functions. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/sync.c b/fs/sync.c index 131ddae87a1d..eb8722dc556f 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -73,12 +73,6 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg) sync_inodes_sb(sb); } -static void writeback_inodes_one_sb(struct super_block *sb, void *arg) -{ - if (!(sb->s_flags & MS_RDONLY)) - writeback_inodes_sb(sb, WB_REASON_SYNC); -} - static void sync_fs_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) @@ -96,17 +90,22 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) } /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. + * Sync everything. We start by waking flusher threads so that most of + * writeback runs on all devices in parallel. Then we sync all inodes reliably + * which effectively also waits for all flusher threads to finish doing + * writeback. At this point all data is on disk so metadata should be stable + * and we tell filesystems to sync their metadata via ->sync_fs() calls. + * Finally, we writeout all block devices because some filesystems (e.g. ext2) + * just write metadata (such as inodes or bitmaps) to block device page cache + * and do not sync it on their own in ->sync_fs(). */ SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(writeback_inodes_one_sb, NULL); - iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); -- cgit v1.2.3 From e8b96eb5034a0ccebf36760f88e31ea3e3cdf1e4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:11:29 -0500 Subject: vfs: allow custom EOF in generic_file_llseek code For ext3/4 htree directories, using the vfs llseek function with SEEK_END goes to i_size like for any other file, but in reality we want the maximum possible hash value. Recent changes in ext4 have cut & pasted generic_file_llseek() back into fs/ext4/dir.c, but replicating this core code seems like a bad idea, especially since the copy has already diverged from the vfs. This patch updates generic_file_llseek_size to accept both a custom maximum offset, and a custom EOF position. With this in place, ext4_dir_llseek can pass in the appropriate maximum hash position for both maxsize and eof, and get what it wants. As far as I know, this does not fix any bugs - nfs in the kernel doesn't use SEEK_END, and I don't know of any user who does. But some ext4 folks seem keen on doing the right thing here, and I can't really argue. (Patch also fixes up some comments slightly) Signed-off-by: Eric Sandeen Signed-off-by: Al Viro --- fs/ext3/dir.c | 3 ++- fs/ext4/file.c | 3 ++- fs/read_write.c | 18 ++++++++++-------- include/linux/fs.h | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 92490e9f85ca..901f67e37864 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -303,7 +303,8 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) if (likely(dx_dir)) return generic_file_llseek_size(file, offset, origin, - ext3_get_htree_eof(file)); + ext3_get_htree_eof(file), + i_size_read(inode)); else return generic_file_llseek(file, offset, origin); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8c7642a00054..f3dadd0a0d51 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) else maxbytes = inode->i_sb->s_maxbytes; - return generic_file_llseek_size(file, offset, origin, maxbytes); + return generic_file_llseek_size(file, offset, origin, + maxbytes, i_size_read(inode)); } const struct file_operations ext4_file_operations = { diff --git a/fs/read_write.c b/fs/read_write.c index c20614f86c01..1adfb691e4f1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -55,10 +55,11 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, * @file: file structure to seek on * @offset: file offset to seek to * @origin: type of seek - * @size: max size of file system + * @size: max size of this file in file system + * @eof: offset used for SEEK_END position * * This is a variant of generic_file_llseek that allows passing in a custom - * file size. + * maximum file size and a custom EOF position, for e.g. hashed directories * * Synchronization: * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) @@ -67,13 +68,13 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, */ loff_t generic_file_llseek_size(struct file *file, loff_t offset, int origin, - loff_t maxsize) + loff_t maxsize, loff_t eof) { struct inode *inode = file->f_mapping->host; switch (origin) { case SEEK_END: - offset += i_size_read(inode); + offset += eof; break; case SEEK_CUR: /* @@ -99,7 +100,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, * In the generic case the entire file is data, so as long as * offset isn't at the end of the file then the offset is data. */ - if (offset >= i_size_read(inode)) + if (offset >= eof) return -ENXIO; break; case SEEK_HOLE: @@ -107,9 +108,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, * There is a virtual hole at the end of the file, so as long as * offset isn't i_size or larger, return i_size. */ - if (offset >= i_size_read(inode)) + if (offset >= eof) return -ENXIO; - offset = i_size_read(inode); + offset = eof; break; } @@ -132,7 +133,8 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) struct inode *inode = file->f_mapping->host; return generic_file_llseek_size(file, offset, origin, - inode->i_sb->s_maxbytes); + inode->i_sb->s_maxbytes, + i_size_read(inode)); } EXPORT_SYMBOL(generic_file_llseek); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a6ca85bee23..34acf51273dd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2454,7 +2454,7 @@ extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, - int origin, loff_t maxsize); + int origin, loff_t maxsize, loff_t eof); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From ec7268ce21b379a248705548573393e4f346b20b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:14:03 -0500 Subject: ext4: use core vfs llseek code for dir seeks Use the new functionality in generic_file_llseek_size() to accept a custom EOF position, and un-cut-and-paste all the vfs llseek code from ext4. Also fix up comments on ext4_llseek() to reflect reality. Signed-off-by: Eric Sandeen Signed-off-by: Al Viro --- fs/ext4/dir.c | 75 +++++++++++----------------------------------------------- fs/ext4/file.c | 6 ++--- 2 files changed, 17 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index aa39e600d159..8e07d2a5a139 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) /* - * ext4_dir_llseek() based on generic_file_llseek() to handle both - * non-htree and htree directories, where the "offset" is in terms - * of the filename hash value instead of the byte offset. + * ext4_dir_llseek() calls generic_file_llseek_size to handle htree + * directories, where the "offset" is in terms of the filename hash + * value instead of the byte offset. * - * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) - * will be invalid once the directory was converted into a dx directory + * Because we may return a 64-bit hash that is well beyond offset limits, + * we need to pass the max hash as the maximum allowable offset in + * the htree directory case. + * + * For non-htree, ext4_llseek already chooses the proper max offset. */ loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; - loff_t ret = -EINVAL; int dx_dir = is_dx_dir(inode); + loff_t htree_max = ext4_get_htree_eof(file); - mutex_lock(&inode->i_mutex); - - /* NOTE: relative offsets with dx directories might not work - * as expected, as it is difficult to figure out the - * correct offset between dx hashes */ - - switch (origin) { - case SEEK_END: - if (unlikely(offset > 0)) - goto out_err; /* not supported for directories */ - - /* so only negative offsets are left, does that have a - * meaning for directories at all? */ - if (dx_dir) - offset += ext4_get_htree_eof(file); - else - offset += inode->i_size; - break; - case SEEK_CUR: - /* - * Here we special-case the lseek(fd, 0, SEEK_CUR) - * position-querying operation. Avoid rewriting the "same" - * f_pos value back to the file because a concurrent read(), - * write() or lseek() might have altered it - */ - if (offset == 0) { - offset = file->f_pos; - goto out_ok; - } - - offset += file->f_pos; - break; - } - - if (unlikely(offset < 0)) - goto out_err; - - if (!dx_dir) { - if (offset > inode->i_sb->s_maxbytes) - goto out_err; - } else if (offset > ext4_get_htree_eof(file)) - goto out_err; - - /* Special lock needed here? */ - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - -out_ok: - ret = offset; -out_err: - mutex_unlock(&inode->i_mutex); - - return ret; + if (likely(dx_dir)) + return generic_file_llseek_size(file, offset, origin, + htree_max, htree_max); + else + return ext4_llseek(file, offset, origin); } /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index f3dadd0a0d51..782eecb57e43 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp) } /* - * ext4_llseek() copied from generic_file_llseek() to handle both - * block-mapped and extent-mapped maxbytes values. This should - * otherwise be identical with generic_file_llseek(). + * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values + * by calling generic_file_llseek_size() with the appropriate maxbytes + * value for each. */ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) { -- cgit v1.2.3 From de9b9422026357c878a9fa3714936be0e1388e9c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:16:04 -0500 Subject: ext3: pass custom EOF to generic_file_llseek_size() Use the new custom EOF argument to generic_file_llseek_size so that SEEK_END will go to the max hash value for htree dirs in ext3 rather than to i_size_read() Signed-off-by: Eric Sandeen Signed-off-by: Al Viro --- fs/ext3/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 901f67e37864..c8fff930790d 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -300,11 +300,11 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); + loff_t htree_max = ext3_get_htree_eof(file); if (likely(dx_dir)) return generic_file_llseek_size(file, offset, origin, - ext3_get_htree_eof(file), - i_size_read(inode)); + htree_max, htree_max); else return generic_file_llseek(file, offset, origin); } -- cgit v1.2.3 From 3c0a6163688b8ca3f44029c7bdb3d91a865c878a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 17:32:50 +0400 Subject: unobfuscate follow_up() a bit really convoluted test in there has grown up during struct mount introduction; what it checks is that we'd reached the root of mount tree. --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index c14dfac83c2b..d4d15bbc8af7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -722,7 +722,7 @@ int follow_up(struct path *path) br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; - if (&parent->mnt == path->mnt) { + if (parent == mnt) { br_read_unlock(&vfsmount_lock); return 0; } -- cgit v1.2.3 From 32a7991b6a9c758e4e2b8166c5e1cc7563c3dcde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 20:43:19 +0400 Subject: tidy up namei.c a bit locking/unlocking for rcu walk taken to a couple of inline helpers Signed-off-by: Al Viro --- fs/namei.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index d4d15bbc8af7..2ccc35c4dc24 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -427,6 +427,18 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ +static inline void lock_rcu_walk(void) +{ + br_read_lock(&vfsmount_lock); + rcu_read_lock(); +} + +static inline void unlock_rcu_walk(void) +{ + rcu_read_unlock(); + br_read_unlock(&vfsmount_lock); +} + /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -480,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) } mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); nd->flags &= ~LOOKUP_RCU; return 0; @@ -522,15 +533,13 @@ static int complete_walk(struct nameidata *nd) spin_lock(&dentry->d_lock); if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -985,8 +994,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } @@ -1323,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } } @@ -1691,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); @@ -1704,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); set_root_rcu(nd); } else { set_root(nd); @@ -1717,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); do { seq = read_seqcount_begin(&fs->seq); @@ -1753,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); } else { path_get(&file->f_path); fput_light(file, fput_needed); -- cgit v1.2.3 From 8fc37ec54cd8e37193b0d42809b785ff19661c34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 09:18:15 +0400 Subject: don't expose I_NEW inodes via dentry->d_inode d_instantiate(dentry, inode); unlock_new_inode(inode); is a bad idea; do it the other way round... Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 2 +- fs/ext2/namei.c | 4 ++-- fs/ext3/namei.c | 4 ++-- fs/ext4/namei.c | 4 ++-- fs/jffs2/dir.c | 8 ++++---- fs/jfs/namei.c | 8 ++++---- fs/reiserfs/namei.c | 8 ++++---- 7 files changed, 19 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index da52cdbe8388..ffa2be57804d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -269,8 +269,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iput(ecryptfs_inode); goto out; } - d_instantiate(ecryptfs_dentry, ecryptfs_inode); unlock_new_inode(ecryptfs_inode); + d_instantiate(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 9ba7de0e5903..73b0d9519836 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,8 +41,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -242,8 +242,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out: return err; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 85286dbe2753..8f4fddac01a6 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1671,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle, int err = ext3_add_entry(handle, dentry, inode); if (!err) { ext3_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -1836,8 +1836,8 @@ out_clear_inode: if (err) goto out_clear_inode; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out_stop: brelse(dir_block); ext3_journal_stop(handle); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index eca3e48a62f8..d0d3f0e87f99 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -2249,8 +2249,8 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out_stop: brelse(dir_block); ext4_journal_stop(handle); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 23245191c5b5..ad7774d32095 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -226,8 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -446,8 +446,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -591,8 +591,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -766,8 +766,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index c426293e16c1..3b91a7ad6086 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -176,8 +176,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -309,8 +309,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1043,8 +1043,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1424,8 +1424,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out1: diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 3916be1a330b..8567fb847601 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -634,8 +634,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -712,8 +712,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -800,8 +800,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode // the above add_entry did not update dir's stat data reiserfs_update_sd(&th, dir); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock_once(dir->i_sb, lock_depth); @@ -1096,8 +1096,8 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, parent_dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock(parent_dir->i_sb); -- cgit v1.2.3 From 3b8b487114c95ef6db5fef708ef69bfb5209014e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jun 2012 11:38:56 +0400 Subject: ecryptfs: don't reinvent the wheels, please - use struct completion ... and keep the sodding requests on stack - they are small enough. Signed-off-by: Al Viro --- fs/ecryptfs/ecryptfs_kernel.h | 14 --------- fs/ecryptfs/kthread.c | 72 ++++++++++++++++--------------------------- fs/ecryptfs/main.c | 5 --- 3 files changed, 26 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 867b64c5d84f..989e034f02bd 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -550,20 +550,6 @@ extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; extern struct kmem_cache *ecryptfs_global_auth_tok_cache; extern struct kmem_cache *ecryptfs_key_tfm_cache; -extern struct kmem_cache *ecryptfs_open_req_cache; - -struct ecryptfs_open_req { -#define ECRYPTFS_REQ_PROCESSED 0x00000001 -#define ECRYPTFS_REQ_DROPPED 0x00000002 -#define ECRYPTFS_REQ_ZOMBIE 0x00000004 - u32 flags; - struct file **lower_file; - struct dentry *lower_dentry; - struct vfsmount *lower_mnt; - wait_queue_head_t wait; - struct mutex mux; - struct list_head kthread_ctl_list; -}; struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 0dbe58a8b172..c7d199dc7d24 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -27,7 +27,13 @@ #include #include "ecryptfs_kernel.h" -struct kmem_cache *ecryptfs_open_req_cache; +struct ecryptfs_open_req { + struct file **lower_file; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct completion done; + struct list_head kthread_ctl_list; +}; static struct ecryptfs_kthread_ctl { #define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001 @@ -67,18 +73,13 @@ static int ecryptfs_threadfn(void *ignored) req = list_first_entry(&ecryptfs_kthread_ctl.req_list, struct ecryptfs_open_req, kthread_ctl_list); - mutex_lock(&req->mux); list_del(&req->kthread_ctl_list); - if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) { - dget(req->lower_dentry); - mntget(req->lower_mnt); - (*req->lower_file) = dentry_open( - req->lower_dentry, req->lower_mnt, - (O_RDWR | O_LARGEFILE), current_cred()); - req->flags |= ECRYPTFS_REQ_PROCESSED; - } - wake_up(&req->wait); - mutex_unlock(&req->mux); + dget(req->lower_dentry); + mntget(req->lower_mnt); + (*req->lower_file) = dentry_open( + req->lower_dentry, req->lower_mnt, + (O_RDWR | O_LARGEFILE), current_cred()); + complete(&req->done); } mutex_unlock(&ecryptfs_kthread_ctl.mux); } @@ -111,10 +112,9 @@ void ecryptfs_destroy_kthread(void) ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, kthread_ctl_list) { - mutex_lock(&req->mux); - req->flags |= ECRYPTFS_REQ_ZOMBIE; - wake_up(&req->wait); - mutex_unlock(&req->mux); + list_del(&req->kthread_ctl_list); + *req->lower_file = ERR_PTR(-EIO); + complete(&req->done); } mutex_unlock(&ecryptfs_kthread_ctl.mux); kthread_stop(ecryptfs_kthread); @@ -136,7 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file, struct vfsmount *lower_mnt, const struct cred *cred) { - struct ecryptfs_open_req *req; + struct ecryptfs_open_req req; int flags = O_LARGEFILE; int rc = 0; @@ -153,17 +153,10 @@ int ecryptfs_privileged_open(struct file **lower_file, rc = PTR_ERR((*lower_file)); goto out; } - req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - goto out; - } - mutex_init(&req->mux); - req->lower_file = lower_file; - req->lower_dentry = lower_dentry; - req->lower_mnt = lower_mnt; - init_waitqueue_head(&req->wait); - req->flags = 0; + init_completion(&req.done); + req.lower_file = lower_file; + req.lower_dentry = lower_dentry; + req.lower_mnt = lower_mnt; mutex_lock(&ecryptfs_kthread_ctl.mux); if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { rc = -EIO; @@ -171,27 +164,14 @@ int ecryptfs_privileged_open(struct file **lower_file, printk(KERN_ERR "%s: We are in the middle of shutting down; " "aborting privileged request to open lower file\n", __func__); - goto out_free; + goto out; } - list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); + list_add_tail(&req.kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); - wait_event(req->wait, (req->flags != 0)); - mutex_lock(&req->mux); - BUG_ON(req->flags == 0); - if (req->flags & ECRYPTFS_REQ_DROPPED - || req->flags & ECRYPTFS_REQ_ZOMBIE) { - rc = -EIO; - printk(KERN_WARNING "%s: Privileged open request dropped\n", - __func__); - goto out_unlock; - } - if (IS_ERR(*req->lower_file)) - rc = PTR_ERR(*req->lower_file); -out_unlock: - mutex_unlock(&req->mux); -out_free: - kmem_cache_free(ecryptfs_open_req_cache, req); + wait_for_completion(&req.done); + if (IS_ERR(*lower_file)) + rc = PTR_ERR(*lower_file); out: return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 7edeb3d893c1..1c0b3b6b75c6 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -681,11 +681,6 @@ static struct ecryptfs_cache_info { .name = "ecryptfs_key_tfm_cache", .size = sizeof(struct ecryptfs_key_tfm), }, - { - .cache = &ecryptfs_open_req_cache, - .name = "ecryptfs_open_req_cache", - .size = sizeof(struct ecryptfs_open_req), - }, }; static void ecryptfs_free_kmem_caches(void) -- cgit v1.2.3 From 765927b2d508712d320c8934db963bbe14c3fcec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 21:58:53 +0400 Subject: switch dentry_open() to struct path, make it grab references itself Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 18 ++--- fs/autofs4/dev-ioctl.c | 4 +- fs/cachefiles/rdwr.c | 8 +- fs/ecryptfs/kthread.c | 21 ++---- fs/exportfs/expfs.c | 13 ++-- fs/hppfs/hppfs.c | 20 ++--- fs/nfsd/vfs.c | 10 +-- fs/notify/fanotify/fanotify_user.c | 8 +- fs/open.c | 17 ++--- fs/xfs/xfs_ioctl.c | 7 +- include/linux/fs.h | 3 +- ipc/mqueue.c | 117 ++++++++++++------------------ security/selinux/hooks.c | 3 +- security/selinux/include/security.h | 2 +- security/selinux/selinuxfs.c | 6 +- 15 files changed, 106 insertions(+), 151 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1c9cac0cf895..d544d7816df3 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -317,7 +317,7 @@ out: return ret; } -static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) +static int spufs_context_open(struct path *path) { int ret; struct file *filp; @@ -326,11 +326,7 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) if (ret < 0) return ret; - /* - * get references for dget and mntget, will be released - * in error path of *_open(). - */ - filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); + filp = dentry_open(path, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); return PTR_ERR(filp); @@ -452,6 +448,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, int affinity; struct spu_gang *gang; struct spu_context *neighbor; + struct path path = {.mnt = mnt, .dentry = dentry}; ret = -EPERM; if ((flags & SPU_CREATE_NOSCHED) && @@ -494,7 +491,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, put_spu_context(neighbor); } - ret = spufs_context_open(dentry, mnt); + ret = spufs_context_open(&path); if (ret < 0) { WARN_ON(spufs_rmdir(inode, dentry)); if (affinity) @@ -551,7 +548,7 @@ out: return ret; } -static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) +static int spufs_gang_open(struct path *path) { int ret; struct file *filp; @@ -564,7 +561,7 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) * get references for dget and mntget, will be released * in error path of *_open(). */ - filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); + filp = dentry_open(path, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); return PTR_ERR(filp); @@ -579,13 +576,14 @@ static int spufs_create_gang(struct inode *inode, struct dentry *dentry, struct vfsmount *mnt, umode_t mode) { + struct path path = {.mnt = mnt, .dentry = dentry}; int ret; ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); if (ret) goto out; - ret = spufs_gang_open(dentry, mnt); + ret = spufs_gang_open(&path); if (ret < 0) { int err = simple_rmdir(inode, dentry); WARN_ON(err); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index aa9103f8f01b..abf645c1703b 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -257,8 +257,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) * corresponding to the autofs fs we want to open. */ - filp = dentry_open(path.dentry, path.mnt, O_RDONLY, - current_cred()); + filp = dentry_open(&path, O_RDONLY, current_cred()); + path_put(&path); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 0e3c0924cc3a..c0353dfac51f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -891,6 +891,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) struct cachefiles_cache *cache; mm_segment_t old_fs; struct file *file; + struct path path; loff_t pos, eof; size_t len; void *data; @@ -916,10 +917,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) /* write the page to the backing filesystem and let it store it in its * own time */ - dget(object->backer); - mntget(cache->mnt); - file = dentry_open(object->backer, cache->mnt, O_RDWR, - cache->cache_cred); + path.mnt = cache->mnt; + path.dentry = object->backer; + file = dentry_open(&path, O_RDWR, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); } else { diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index c7d199dc7d24..809e67d05ca3 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -29,8 +29,7 @@ struct ecryptfs_open_req { struct file **lower_file; - struct dentry *lower_dentry; - struct vfsmount *lower_mnt; + struct path path; struct completion done; struct list_head kthread_ctl_list; }; @@ -74,10 +73,7 @@ static int ecryptfs_threadfn(void *ignored) struct ecryptfs_open_req, kthread_ctl_list); list_del(&req->kthread_ctl_list); - dget(req->lower_dentry); - mntget(req->lower_mnt); - (*req->lower_file) = dentry_open( - req->lower_dentry, req->lower_mnt, + *req->lower_file = dentry_open(&req->path, (O_RDWR | O_LARGEFILE), current_cred()); complete(&req->done); } @@ -140,23 +136,22 @@ int ecryptfs_privileged_open(struct file **lower_file, int flags = O_LARGEFILE; int rc = 0; + init_completion(&req.done); + req.lower_file = lower_file; + req.path.dentry = lower_dentry; + req.path.mnt = lower_mnt; + /* Corresponding dput() and mntput() are done when the * lower file is fput() when all eCryptfs files for the inode are * released. */ - dget(lower_dentry); - mntget(lower_mnt); flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; - (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); + (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) goto out; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } - init_completion(&req.done); - req.lower_file = lower_file; - req.lower_dentry = lower_dentry; - req.lower_mnt = lower_mnt; mutex_lock(&ecryptfs_kthread_ctl.mux); if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { rc = -EIO; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b42063cf1b2d..29ab099e3e08 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -19,19 +19,19 @@ #define dprintk(fmt, args...) do{}while(0) -static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, - struct dentry *child); +static int get_name(const struct path *path, char *name, struct dentry *child); static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, char *name, struct dentry *child) { const struct export_operations *nop = dir->d_sb->s_export_op; + struct path path = {.mnt = mnt, .dentry = dir}; if (nop->get_name) return nop->get_name(dir, name, child); else - return get_name(mnt, dir, name, child); + return get_name(&path, name, child); } /* @@ -249,11 +249,10 @@ static int filldir_one(void * __buf, const char * name, int len, * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ -static int get_name(struct vfsmount *mnt, struct dentry *dentry, - char *name, struct dentry *child) +static int get_name(const struct path *path, char *name, struct dentry *child) { const struct cred *cred = current_cred(); - struct inode *dir = dentry->d_inode; + struct inode *dir = path->dentry->d_inode; int error; struct file *file; struct getdents_callback buffer; @@ -267,7 +266,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, /* * Open the directory ... */ - file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); + file = dentry_open(path, O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index e5c06531dcc4..c1dffe47fde2 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -420,8 +420,7 @@ static int hppfs_open(struct inode *inode, struct file *file) { const struct cred *cred = file->f_cred; struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; + struct path path; char *host_file; int err, fd, type, filter; @@ -434,12 +433,11 @@ static int hppfs_open(struct inode *inode, struct file *file) if (host_file == NULL) goto out_free2; - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; + path.mnt = inode->i_sb->s_fs_info; + path.dentry = HPPFS_I(inode)->proc_dentry; /* XXX This isn't closed anywhere */ - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode), cred); + data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free1; @@ -484,8 +482,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) { const struct cred *cred = file->f_cred; struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; + struct path path; int err; err = -ENOMEM; @@ -493,10 +490,9 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) if (data == NULL) goto out; - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode), cred); + path.mnt = inode->i_sb->s_fs_info; + path.dentry = HPPFS_I(inode)->proc_dentry; + data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 05d9eee6be3a..4700a0a929d7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -745,7 +745,7 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { - struct dentry *dentry; + struct path path; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; __be32 err; @@ -762,8 +762,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; /* Disallow write access to files with the append-only bit set * or any access when mandatory locking enabled @@ -792,8 +793,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, else flags = O_WRONLY|O_LARGEFILE; } - *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, current_cred()); + *filp = dentry_open(&path, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); else { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3568c8a8b138..d43803669739 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -61,8 +61,6 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) { int client_fd; - struct dentry *dentry; - struct vfsmount *mnt; struct file *new_file; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -81,12 +79,10 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) * we need a new file handle for the userspace program so it can read even if it was * originally opened O_WRONLY. */ - dentry = dget(event->path.dentry); - mnt = mntget(event->path.mnt); /* it's possible this event was an overflow event. in that case dentry and mnt * are NULL; That's fine, just don't call dentry open */ - if (dentry && mnt) - new_file = dentry_open(dentry, mnt, + if (event->path.dentry && event->path.mnt) + new_file = dentry_open(&event->path, group->fanotify_data.f_flags | FMODE_NONOTIFY, current_cred()); else diff --git a/fs/open.c b/fs/open.c index 75bea868ef8a..1e914b397e12 100644 --- a/fs/open.c +++ b/fs/open.c @@ -766,11 +766,7 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/* - * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an - * error. - */ -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, +struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { int error; @@ -779,19 +775,16 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, validate_creds(cred); /* We must always pass in a valid mount pointer. */ - BUG_ON(!mnt); + BUG_ON(!path->mnt); error = -ENFILE; f = get_empty_filp(); - if (f == NULL) { - dput(dentry); - mntput(mnt); + if (f == NULL) return ERR_PTR(error); - } f->f_flags = flags; - f->f_path.mnt = mnt; - f->f_path.dentry = dentry; + f->f_path = *path; + path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3a05a41b5d76..1f1535d25a9b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -208,6 +208,7 @@ xfs_open_by_handle( struct inode *inode; struct dentry *dentry; fmode_t fmode; + struct path path; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -252,8 +253,10 @@ xfs_open_by_handle( goto out_dput; } - filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), - hreq->oflags, cred); + path.mnt = parfilp->f_path.mnt; + path.dentry = dentry; + filp = dentry_open(&path, hreq->oflags, cred); + dput(dentry); if (IS_ERR(filp)) { put_unused_fd(fd); return PTR_ERR(filp); diff --git a/include/linux/fs.h b/include/linux/fs.h index 34acf51273dd..8fabb037a48d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2060,8 +2060,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags, extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, - const struct cred *); +extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); enum { diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 2dee38d53c73..f8e54f5b9080 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -721,8 +721,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) /* * Invoked when creating a new queue via sys_mq_open */ -static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, - struct dentry *dentry, int oflag, umode_t mode, +static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, + struct path *path, int oflag, umode_t mode, struct mq_attr *attr) { const struct cred *cred = current_cred(); @@ -732,9 +732,9 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, if (attr) { ret = mq_attr_ok(ipc_ns, attr); if (ret) - goto out; + return ERR_PTR(ret); /* store for use during create */ - dentry->d_fsdata = attr; + path->dentry->d_fsdata = attr; } else { struct mq_attr def_attr; @@ -744,71 +744,51 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, ipc_ns->mq_msgsize_default); ret = mq_attr_ok(ipc_ns, &def_attr); if (ret) - goto out; + return ERR_PTR(ret); } mode &= ~current_umask(); - ret = mnt_want_write(ipc_ns->mq_mnt); + ret = mnt_want_write(path->mnt); if (ret) - goto out; - ret = vfs_create(dir->d_inode, dentry, mode, true); - dentry->d_fsdata = NULL; - if (ret) - goto out_drop_write; - - result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); + return ERR_PTR(ret); + ret = vfs_create(dir, path->dentry, mode, true); + path->dentry->d_fsdata = NULL; + if (!ret) + result = dentry_open(path, oflag, cred); + else + result = ERR_PTR(ret); /* * dentry_open() took a persistent mnt_want_write(), * so we can now drop this one. */ - mnt_drop_write(ipc_ns->mq_mnt); + mnt_drop_write(path->mnt); return result; - -out_drop_write: - mnt_drop_write(ipc_ns->mq_mnt); -out: - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(ret); } /* Opens existing queue */ -static struct file *do_open(struct ipc_namespace *ipc_ns, - struct dentry *dentry, int oflag) +static struct file *do_open(struct path *path, int oflag) { - int ret; - const struct cred *cred = current_cred(); - static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; - - if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { - ret = -EINVAL; - goto err; - } - - if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { - ret = -EACCES; - goto err; - } - - return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); - -err: - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(ret); + int acc; + if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) + return ERR_PTR(-EINVAL); + acc = oflag2acc[oflag & O_ACCMODE]; + if (inode_permission(path->dentry->d_inode, acc)) + return ERR_PTR(-EACCES); + return dentry_open(path, oflag, current_cred()); } SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, struct mq_attr __user *, u_attr) { - struct dentry *dentry; + struct path path; struct file *filp; char *name; struct mq_attr attr; int fd, error; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + struct dentry *root = ipc_ns->mq_mnt->mnt_root; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; @@ -822,52 +802,49 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, if (fd < 0) goto out_putname; - mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); - dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); + error = 0; + mutex_lock(&root->d_inode->i_mutex); + path.dentry = lookup_one_len(name, root, strlen(name)); + if (IS_ERR(path.dentry)) { + error = PTR_ERR(path.dentry); goto out_putfd; } - mntget(ipc_ns->mq_mnt); + path.mnt = mntget(ipc_ns->mq_mnt); if (oflag & O_CREAT) { - if (dentry->d_inode) { /* entry already exists */ - audit_inode(name, dentry); + if (path.dentry->d_inode) { /* entry already exists */ + audit_inode(name, path.dentry); if (oflag & O_EXCL) { error = -EEXIST; goto out; } - filp = do_open(ipc_ns, dentry, oflag); + filp = do_open(&path, oflag); } else { - filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root, - dentry, oflag, mode, + filp = do_create(ipc_ns, root->d_inode, + &path, oflag, mode, u_attr ? &attr : NULL); } } else { - if (!dentry->d_inode) { + if (!path.dentry->d_inode) { error = -ENOENT; goto out; } - audit_inode(name, dentry); - filp = do_open(ipc_ns, dentry, oflag); + audit_inode(name, path.dentry); + filp = do_open(&path, oflag); } - if (IS_ERR(filp)) { + if (!IS_ERR(filp)) + fd_install(fd, filp); + else error = PTR_ERR(filp); - goto out_putfd; - } - - fd_install(fd, filp); - goto out_upsem; - out: - dput(dentry); - mntput(ipc_ns->mq_mnt); + path_put(&path); out_putfd: - put_unused_fd(fd); - fd = error; -out_upsem: - mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); + if (error) { + put_unused_fd(fd); + fd = error; + } + mutex_unlock(&root->d_inode->i_mutex); out_putname: putname(name); return fd; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 372ec6502aa8..e423f5fe67fa 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2157,8 +2157,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, get_file(devnull); } else { devnull = dentry_open( - dget(selinux_null), - mntget(selinuxfs_mount), + &selinux_null, O_RDWR, cred); if (IS_ERR(devnull)) { devnull = NULL; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index dde2005407aa..6d3885165d14 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -221,7 +221,7 @@ extern void selinux_status_update_policyload(int seqno); extern void selinux_complete_init(void); extern int selinux_disable(void); extern void exit_sel_fs(void); -extern struct dentry *selinux_null; +extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 3ad290251288..298e695d6822 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1297,7 +1297,7 @@ out: #define NULL_FILE_NAME "null" -struct dentry *selinux_null; +struct path selinux_null; static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf, size_t count, loff_t *ppos) @@ -1838,7 +1838,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_add(dentry, inode); - selinux_null = dentry; + selinux_null.dentry = dentry; dentry = sel_make_dir(sb->s_root, "avc", &sel_last_ino); if (IS_ERR(dentry)) { @@ -1912,7 +1912,7 @@ static int __init init_sel_fs(void) return err; } - selinuxfs_mount = kern_mount(&sel_fs_type); + selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type); if (IS_ERR(selinuxfs_mount)) { printk(KERN_ERR "selinuxfs: could not mount!\n"); err = PTR_ERR(selinuxfs_mount); -- cgit v1.2.3 From 11e62a8fabd003352e852e74e1b64a437fd908c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 11:17:49 +0400 Subject: btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file() Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0e92e5763005..1e9f6c019ad0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3268,7 +3268,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3338,7 +3338,7 @@ out_bargs: out: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return ret; } -- cgit v1.2.3 From 8cae6f7158ec1fa44c8a04a43db7d8020ec60437 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 11:19:07 +0400 Subject: ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file() Signed-off-by: Al Viro --- fs/ext4/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac3f366..23788b345975 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -390,7 +390,7 @@ group_add_out: if (err) return err; - err = mnt_want_write(filp->f_path.mnt); + err = mnt_want_write_file(filp); if (err) goto resizefs_out; @@ -402,7 +402,7 @@ group_add_out: } if (err == 0) err = err2; - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); resizefs_out: ext4_resize_end(sb); return err; -- cgit v1.2.3 From 4bd809dbbf177ad0c450d702466b1da63e1b4b7e Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 22 Jul 2012 20:19:31 -0400 Subject: ext4: don't take the i_mutex lock when doing DIO overwrites Aligned and overwrite direct I/O can be parallelized. In ext4_file_dio_write, we first check whether these conditions are satisfied or not. If so, we take i_data_sem and release i_mutex lock directly. Meanwhile iocb->private is set to indicate that this is a dio overwrite, and it will be handled in ext4_ext_direct_IO. [ Added fix from Dan Carpenter to fix locking bug on the error path. ] CC: Tao Ma CC: Eric Sandeen CC: Robin Dong Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Signed-off-by: Dan Carpenter --- fs/ext4/file.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/inode.c | 24 ++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a10dc7742aec..1c81509f5bd9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -93,9 +93,13 @@ static ssize_t ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct blk_plug plug; int unaligned_aio = 0; ssize_t ret; + int overwrite = 0; + size_t length = iov_length(iov, nr_segs); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb)) @@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, ext4_aiodio_wait(inode); } - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + BUG_ON(iocb->ki_pos != pos); + + mutex_lock(&inode->i_mutex); + blk_start_plug(&plug); + + iocb->private = &overwrite; + + /* check whether we do a DIO overwrite or not */ + if (ext4_should_dioread_nolock(inode) && !unaligned_aio && + !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { + struct ext4_map_blocks map; + unsigned int blkbits = inode->i_blkbits; + int err, len; + + map.m_lblk = pos >> blkbits; + map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) + - map.m_lblk; + len = map.m_len; + + err = ext4_map_blocks(NULL, inode, &map, 0); + /* + * 'err==len' means that all of blocks has been preallocated no + * matter they are initialized or not. For excluding + * uninitialized extents, we need to check m_flags. There are + * two conditions that indicate for initialized extents. + * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; + * 2) If we do a real lookup, non-flags are returned. + * So we should check these two conditions. + */ + if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) + overwrite = 1; + } + + ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + + if (ret > 0 || ret == -EIOCBQUEUED) { + ssize_t err; + + err = generic_write_sync(file, pos, ret); + if (err < 0 && ret > 0) + ret = err; + } + blk_finish_plug(&plug); if (unaligned_aio) mutex_unlock(ext4_aio_mutex(inode)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76cb3b1ad78a..bed574dd4c22 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE && final_size <= inode->i_size) { int overwrite = 0; + BUG_ON(iocb->private == NULL); + + /* If we do a overwrite dio, i_mutex locking can be released */ + overwrite = *((int *)iocb->private); + + if (overwrite) { + down_read(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&inode->i_mutex); + } + /* * We could direct write to holes and fallocate. * @@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (!is_sync_kiocb(iocb)) { ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) - return -ENOMEM; + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } io_end->flag |= EXT4_IO_END_DIRECT; iocb->private = io_end; /* @@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = err; ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } + + retake_lock: + /* take i_mutex locking again if we do a ovewrite dio */ + if (overwrite) { + up_read(&EXT4_I(inode)->i_data_sem); + mutex_lock(&inode->i_mutex); + } + return ret; } -- cgit v1.2.3 From 7c319d328505b7781b65238ae9f53293b5ee0ca8 Mon Sep 17 00:00:00 2001 From: Aditya Kali Date: Sun, 22 Jul 2012 20:21:31 -0400 Subject: ext4: make quota as first class supported feature This patch adds support for quotas as a first class feature in ext4; which is to say, the quota files are stored in hidden inodes as file system metadata, instead of as separate files visible in the file system directory hierarchy. It is based on the proposal at: https://ext4.wiki.kernel.org/index.php/Design_For_1st_Class_Quota_in_Ext4 This patch introduces a new feature - EXT4_FEATURE_RO_COMPAT_QUOTA which, when turned on, enables quota accounting at mount time iteself. Also, the quota inodes are stored in two additional superblock fields. Some changes introduced by this patch that should be pointed out are: 1) Two new ext4-superblock fields - s_usr_quota_inum and s_grp_quota_inum for storing the quota inodes in use. 2) Default quota inodes are: inode#3 for tracking userquota and inode#4 for tracking group quota. The superblock fields can be set to use other inodes as well. 3) If the QUOTA feature and corresponding quota inodes are set in superblock, the quota usage tracking is turned on at mount time. On 'quotaon' ioctl, the quota limits enforcement is turned on. 'quotaoff' ioctl turns off only the limits enforcement in this case. 4) When QUOTA feature is in use, the quota mount options 'quota', 'usrquota', 'grpquota' are ignored by the kernel. 5) mke2fs or tune2fs can be used to set the QUOTA feature and initialize quota inodes. The default reserved inodes will not be visible to user as regular files. 6) The quota-tools will need to be modified to support hidden quota files on ext4. E2fsprogs will also include support for creating and fixing quota files. 7) Support is only for the new V2 quota file format. Tested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Johann Lombardi Signed-off-by: Aditya Kali Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 5 +- fs/ext4/ext4_jbd2.h | 18 ++++--- fs/ext4/super.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4a49f8225d0b..1610e808ebe3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1315,6 +1315,8 @@ static inline struct timespec ext4_current_time(struct inode *inode) static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || + ino == EXT4_USR_QUOTA_INO || + ino == EXT4_GRP_QUOTA_INO || ino == EXT4_JOURNAL_INO || ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && @@ -1497,7 +1499,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ + EXT4_FEATURE_RO_COMPAT_QUOTA) /* * Default values for user and/or group using reserved blocks diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index f440e8f1841f..1393c8304116 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -87,14 +87,20 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) +#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) - -#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ + +3+DQUOT_INIT_REWRITE) : 0) + +#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ + +3+DQUOT_DEL_REWRITE) : 0) #else #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 78b7ede2efa0..bebf8e5bf087 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1137,12 +1137,18 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, struct path *path); +static int ext4_quota_on_sysfile(struct super_block *sb, int type, + int format_id); static int ext4_quota_off(struct super_block *sb, int type); +static int ext4_quota_off_sysfile(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static int ext4_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags); +static int ext4_enable_quotas(struct super_block *sb); static const struct dquot_operations ext4_quota_operations = { .get_reserved_space = ext4_get_reserved_space, @@ -1164,6 +1170,16 @@ static const struct quotactl_ops ext4_qctl_operations = { .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk }; + +static const struct quotactl_ops ext4_qctl_sysfile_operations = { + .quota_on_meta = ext4_quota_on_sysfile, + .quota_off = ext4_quota_off_sysfile, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk +}; #endif static const struct super_operations ext4_sops = { @@ -2661,6 +2677,16 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) "extents feature\n"); return 0; } + +#ifndef CONFIG_QUOTA + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + !readonly) { + ext4_msg(sb, KERN_ERR, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return 0; + } +#endif /* CONFIG_QUOTA */ return 1; } @@ -3748,6 +3774,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->s_qcop = &ext4_qctl_operations; sb->dq_op = &ext4_quota_operations; + + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + /* Use qctl operations for hidden quota files. */ + sb->s_qcop = &ext4_qctl_sysfile_operations; + } #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); @@ -3960,6 +3991,16 @@ no_journal: } else descr = "out journal"; +#ifdef CONFIG_QUOTA + /* Enable quota usage during mount. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + !(sb->s_flags & MS_RDONLY)) { + ret = ext4_enable_quotas(sb); + if (ret) + goto failed_mount7; + } +#endif /* CONFIG_QUOTA */ + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); @@ -4682,16 +4723,26 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal == NULL) ext4_commit_super(sb, 1); + unlock_super(sb); #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) if (old_opts.s_qf_names[i] && old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); + if (enable_quota) { + if (sb_any_quota_suspended(sb)) + dquot_resume(sb, -1); + else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_QUOTA)) { + err = ext4_enable_quotas(sb); + if (err) { + lock_super(sb); + goto restore_opts; + } + } + } #endif - unlock_super(sb); - if (enable_quota) - dquot_resume(sb, -1); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); @@ -4904,6 +4955,74 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, return dquot_quota_on(sb, type, format_id, path); } +static int ext4_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) +{ + int err; + struct inode *qf_inode; + unsigned long qf_inums[MAXQUOTAS] = { + le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), + le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) + }; + + BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); + + if (!qf_inums[type]) + return -EPERM; + + qf_inode = ext4_iget(sb, qf_inums[type]); + if (IS_ERR(qf_inode)) { + ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); + return PTR_ERR(qf_inode); + } + + err = dquot_enable(qf_inode, type, format_id, flags); + iput(qf_inode); + + return err; +} + +/* Enable usage tracking for all quota types. */ +static int ext4_enable_quotas(struct super_block *sb) +{ + int type, err = 0; + unsigned long qf_inums[MAXQUOTAS] = { + le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), + le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) + }; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { + if (qf_inums[type]) { + err = ext4_quota_enable(sb, type, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED); + if (err) { + ext4_warning(sb, + "Failed to enable quota (type=%d) " + "tracking. Please run e2fsck to fix.", + type); + return err; + } + } + } + return 0; +} + +/* + * quota_on function that is used when QUOTA feature is set. + */ +static int ext4_quota_on_sysfile(struct super_block *sb, int type, + int format_id) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + return -EINVAL; + + /* + * USAGE was enabled at mount time. Only need to enable LIMITS now. + */ + return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); +} + static int ext4_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; @@ -4930,6 +5049,18 @@ out: return dquot_quota_off(sb, type); } +/* + * quota_off function that is used when QUOTA feature is set. + */ +static int ext4_quota_off_sysfile(struct super_block *sb, int type) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + return -EINVAL; + + /* Disable only the limits. */ + return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); +} + /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files) -- cgit v1.2.3 From 8a9918497bcf5aaa8d45eb61c373605bc4e8c81f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Jul 2012 20:23:31 -0400 Subject: ext4: remove unused variable in ext4_update_super() Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 17d38de4068c..50530bdbc02a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1197,7 +1197,7 @@ static void ext4_update_super(struct super_block *sb, struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int i, ret; + int i; BUG_ON(flex_gd->count == 0 || group_data == NULL); /* -- cgit v1.2.3 From 3108b54bcedde5d952c90460df5bc21efc1e134f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Jul 2012 20:25:31 -0400 Subject: ext4: remove dynamic array size in ext4_chksum() The ext4_checksum() inline function was using a dynamic array size, which is not legal C. (It is a gcc extension). Remove it. Cc: "Darrick J. Wong" Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1610e808ebe3..e8e8afa402f1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1667,10 +1667,12 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, { struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(sbi->s_chksum_driver)]; + char ctx[4]; } desc; int err; + BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); + desc.shash.tfm = sbi->s_chksum_driver; desc.shash.flags = 0; *(u32 *)desc.ctx = crc; -- cgit v1.2.3 From 254706056be7e4e161ae9675ead6cd4e269be966 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2012 20:27:31 -0400 Subject: ext4: fix ext4 mismerge back in January Duplicate caused, AFAICS, by mismerge in ff9cb1c4eead5e4c292e75cd3170a82d66944101> Signed-off-by: Al Viro Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/ioctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac3f366..6ec6f9ee2fec 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -268,7 +268,6 @@ group_extend_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); - mnt_drop_write(filp->f_path.mnt); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) -- cgit v1.2.3 From 97a7406880f61d7f89d613cf72e87682420e66b0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 22 Jul 2012 20:29:31 -0400 Subject: ext4: remove useless marking of superblock dirty Commit a0375156 properly notes that superblock doesn't need to be marked as dirty when only number of free inodes / blocks / number of directories changes since that is recomputed on each mount anyway. However that comment leaves some unnecessary markings as dirty in place. Remove these. Artem: tested using xfstests for both journalled and non-journalled ext4. Signed-off-by: Jan Kara Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Tested-by: Artem Bityutskiy --- fs/ext4/ialloc.c | 2 -- fs/ext4/mballoc.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 6866bc233e94..26154b81b836 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -315,7 +315,6 @@ out: err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!fatal) fatal = err; - ext4_mark_super_dirty(sb); } else ext4_error(sb, "bit already cleared for inode %lu", ino); @@ -830,7 +829,6 @@ got: percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); - ext4_mark_super_dirty(sb); if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ca376e7d716a..8eae94771c45 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2831,7 +2831,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); out_err: - ext4_mark_super_dirty(sb); brelse(bitmap_bh); return err; } @@ -4700,7 +4699,6 @@ do_more: put_bh(bitmap_bh); goto do_more; } - ext4_mark_super_dirty(sb); error_return: brelse(bitmap_bh); ext4_std_error(sb, err); -- cgit v1.2.3 From 044ce47fec90ec0f25605e87a5d72cca14568bc3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 22 Jul 2012 20:31:31 -0400 Subject: ext4: convert last user of ext4_mark_super_dirty() to ext4_handle_dirty_super() The last user of ext4_mark_super_dirty() in ext4_file_open() is so rare it can well be modifying the superblock properly by journalling the change. Change it and get rid of ext4_mark_super_dirty() as it's not needed anymore. Artem: small amendments. Artem: tested using xfstests for both journalled and non-journalled ext4. Signed-off-by: Jan Kara Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Tested-by: Artem Bityutskiy --- fs/ext4/ext4.h | 9 --------- fs/ext4/file.c | 14 +++++++++++++- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e8e8afa402f1..c3411d4ce2da 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2328,15 +2328,6 @@ static inline void ext4_unlock_group(struct super_block *sb, spin_unlock(ext4_group_lock_ptr(sb, group)); } -static inline void ext4_mark_super_dirty(struct super_block *sb) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - ext4_superblock_csum_set(sb, es); - if (EXT4_SB(sb)->s_journal == NULL) - sb->s_dirt =1; -} - /* * Block validity checking */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1c81509f5bd9..f77e795fed65 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -244,9 +244,21 @@ static int ext4_file_open(struct inode * inode, struct file * filp) path.dentry = mnt->mnt_root; cp = d_path(&path, buf, sizeof(buf)); if (!IS_ERR(cp)) { + handle_t *handle; + int err; + + handle = ext4_journal_start_sb(sb, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) { + ext4_journal_stop(handle); + return err; + } strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); - ext4_mark_super_dirty(sb); + ext4_handle_dirty_super(handle, sb); + ext4_journal_stop(handle); } } /* -- cgit v1.2.3 From 58c5873a769987e36265d1523d2aa5bdc18f32bd Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 22 Jul 2012 20:33:31 -0400 Subject: ext4: remove unnecessary superblock dirtying This patch changes the 'ext4_handle_dirty_super()' function which submits the superblock for I/O in the following cases: 1. When creating the first large file on a file system without EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature. 2. When re-sizing the file-system. 3. When creating an xattr on a file-system without the EXT4_FEATURE_COMPAT_EXT_ATTR feature. If the file-system has journal enabled, the superblock is written via the journal. We do not modify this path. If the file-system has no journal, this function, falls back to just marking the superblock as dirty using the 's_dirt' superblock flag. This means that it delays the actual superblock I/O submission by 5 seconds (default setting). Namely, the 'sync_supers()' kernel thread will call 'ext4_write_super()' later and will actually submit the superblock for I/O. And this is the behavior this patch modifies: we stop using 's_dirt' and just mark the superblock buffer as dirty right away. Indeed, all 3 cases above are extremely rare and it does not add any value to delay the I/O submission for them. Note: 'ext4_handle_dirty_super()' executes '__ext4_handle_dirty_super()' with 'now = 0'. This patch basically makes the 'now' argument unneeded and it will be deleted in one of the next patches. This patch also removes 's_dirt' condition on the unmount path because we never set it anymore, so we should not test it. Tested using xfstests for both journalled and non-journalled ext4. Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- fs/ext4/ext4_jbd2.c | 5 ++--- fs/ext4/super.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 90f7c2e84db1..c19ab6addb24 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -151,11 +151,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - } else if (now) { + } else { ext4_superblock_csum_set(sb, (struct ext4_super_block *)bh->b_data); mark_buffer_dirty(bh); - } else - sb->s_dirt = 1; + } return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bebf8e5bf087..662e93e8c25b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -896,7 +896,7 @@ static void ext4_put_super(struct super_block *sb) EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); } - if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) + if (!(sb->s_flags & MS_RDONLY)) ext4_commit_super(sb, 1); if (sbi->s_proc) { -- cgit v1.2.3 From 4d47603d9703e6fff8ff2618bc108d6280e2439d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 22 Jul 2012 20:35:31 -0400 Subject: ext4: weed out ext4_write_super We do not depend on VFS's '->write_super()' anymore and do not need the 's_dirt' flag anymore, so weed out 'ext4_write_super()' and 's_dirt'. Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- fs/ext4/super.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 662e93e8c25b..a2a59796cde0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -74,7 +74,6 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); -static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); @@ -1210,7 +1209,6 @@ static const struct super_operations ext4_nojournal_sops = { .dirty_inode = ext4_dirty_inode, .drop_inode = ext4_drop_inode, .evict_inode = ext4_evict_inode, - .write_super = ext4_write_super, .put_super = ext4_put_super, .statfs = ext4_statfs, .remount_fs = ext4_remount, @@ -4364,7 +4362,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); - sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb, es); mark_buffer_dirty(sbh); @@ -4471,13 +4468,6 @@ int ext4_force_commit(struct super_block *sb) return ret; } -static void ext4_write_super(struct super_block *sb) -{ - lock_super(sb); - ext4_commit_super(sb, 1); - unlock_super(sb); -} - static int ext4_sync_fs(struct super_block *sb, int wait) { int ret = 0; -- cgit v1.2.3 From b50924c2c606eccfe0caef39beb0929dfa9a1a81 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 22 Jul 2012 20:37:31 -0400 Subject: ext4: remove unnecessary argument from __ext4_handle_dirty_metadata() The '__ext4_handle_dirty_metadata()' does not need the 'now' argument anymore and we can kill it. Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- fs/ext4/ext4_jbd2.c | 3 +-- fs/ext4/ext4_jbd2.h | 7 ++----- fs/ext4/inode.c | 2 +- fs/ext4/namei.c | 4 ++-- fs/ext4/resize.c | 2 +- 5 files changed, 7 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c19ab6addb24..bfa65b49d424 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -138,8 +138,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb, - int now) + handle_t *handle, struct super_block *sb) { struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 1393c8304116..56d258c18303 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -219,8 +219,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, struct buffer_head *bh); int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb, - int now); + handle_t *handle, struct super_block *sb); #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) @@ -232,10 +231,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) -#define ext4_handle_dirty_super_now(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1) #define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0) + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bed574dd4c22..a533a18de98e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4093,7 +4093,7 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); ext4_handle_sync(handle); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_super(handle, sb); } } raw_inode->i_generation = cpu_to_le32(inode->i_generation); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0edaf18d843e..37faf56e558d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2397,7 +2397,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_super(handle, sb); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2470,7 +2470,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_handle_dirty_super_now(handle, inode->i_sb); + err = ext4_handle_dirty_super(handle, inode->i_sb); } else { struct ext4_iloc iloc2; struct inode *i_prev = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 50530bdbc02a..41f6ef68e2e1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -798,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_kvfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); -- cgit v1.2.3 From 968dee77220768a5f52cf8b21d0bdb73486febef Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Sun, 22 Jul 2012 22:49:08 -0400 Subject: ext4: fix hole punch failure when depth is greater than 0 Whether to continue removing extents or not is decided by the return value of function ext4_ext_more_to_rm() which checks 2 conditions: a) if there are no more indexes to process. b) if the number of entries are decreased in the header of "depth -1". In case of hole punch, if the last block to be removed is not part of the last extent index than this index will not be deleted, hence the number of valid entries in the extent header of "depth - 1" will remain as it is and ext4_ext_more_to_rm will return 0 although the required blocks are not yet removed. This patch fixes the above mentioned problem as instead of removing the extents from the end of file, it starts removing the blocks from the particular extent from which removing blocks is actually required and continue backward until done. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Reviewed-by: Lukas Czerner Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 46b5c9fdc96a..cd0c7ed06772 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2569,10 +2569,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); - struct ext4_ext_path *path; + struct ext4_ext_path *path = NULL; ext4_fsblk_t partial_cluster = 0; handle_t *handle; - int i, err; + int i = 0, err; ext_debug("truncate since %u to %u\n", start, end); @@ -2605,8 +2605,12 @@ again: } depth = ext_depth(inode); ex = path[depth].p_ext; - if (!ex) + if (!ex) { + ext4_ext_drop_refs(path); + kfree(path); + path = NULL; goto cont; + } ee_block = le32_to_cpu(ex->ee_block); @@ -2636,8 +2640,6 @@ again: if (err < 0) goto out; } - ext4_ext_drop_refs(path); - kfree(path); } cont: @@ -2646,19 +2648,27 @@ cont: * after i_size and walking into the tree depth-wise. */ depth = ext_depth(inode); - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); - if (path == NULL) { - ext4_journal_stop(handle); - return -ENOMEM; - } - path[0].p_depth = depth; - path[0].p_hdr = ext_inode_hdr(inode); + if (path) { + int k = i = depth; + while (--k > 0) + path[k].p_block = + le16_to_cpu(path[k].p_hdr->eh_entries)+1; + } else { + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), + GFP_NOFS); + if (path == NULL) { + ext4_journal_stop(handle); + return -ENOMEM; + } + path[0].p_depth = depth; + path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check(inode, path[0].p_hdr, depth)) { - err = -EIO; - goto out; + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { + err = -EIO; + goto out; + } } - i = err = 0; + err = 0; while (i >= 0 && err == 0) { if (i == depth) { @@ -2772,8 +2782,10 @@ cont: out: ext4_ext_drop_refs(path); kfree(path); - if (err == -EAGAIN) + if (err == -EAGAIN) { + path = NULL; goto again; + } ext4_journal_stop(handle); return err; -- cgit v1.2.3 From ddb24bbac3681b87ae0638aacb702d9273e3c025 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:18:14 +0530 Subject: logfs: create a pagecache page if it is not present While writing the partial journal entries we assumed that the page associated with the journal would always in locatable. This incorrect assumption resulted in the following BUG kernel BUG at /home/benixon/WD_SMR/kernels/linux-3.3.7-logfs/fs/logfs/journal.c:569! EIP is at logfs_write_area+0xb6/0x109 [logfs] EAX: 00000000 EBX: 00000000 ECX: ef6efea4 EDX: 00000000 ESI: 001b9000 EDI: f009e000 EBP: c3c13f14 ESP: c3c13ef0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process sync (pid: 1799, ti=c3c12000 task=f07825b0 task.ti=c3c12000) Stack: 01001000 c3c13f26 781b9000 00000000 f009e000 f7286000 f1f83400 f8445071 f1f83400 c3c13f30 f8445ae9 c3c13f20 0000100a 000ee000 f009e000 00000001 c3c13f5c f8445d17 c05eb0ee 00000000 f1f83400 ef718000 f009e25c ea9c3d80 Call Trace: [] ? account_shadow+0x16d/0x16d [logfs] [] logfs_write_je+0x2a/0x44 [logfs] [] logfs_write_anchor+0x114/0x228 [logfs] [] ? empty+0x5/0x5 [] logfs_sync_fs+0x1e/0x31 [logfs] [] __sync_filesystem+0x5d/0x6f [] sync_one_sb+0x15/0x17 [] iterate_supers+0x59/0x9a [] ? __sync_filesystem+0x6f/0x6f [] sys_sync+0x29/0x4f [] sysenter_do_call+0x12/0x28 EIP: [] logfs_write_area+0xb6/0x109 [logfs] SS:ESP 0068:c3c13ef0 ---[ end trace ef6e9ef52601a945 ]--- The fix is to create the pagecache page if it is not locatable. Reported-and-tested-by: Benixon Dhas Signed-off-by: Prasad Joshi --- fs/logfs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 1e1c369df22b..2a09b8d73989 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area, index = ofs >> PAGE_SHIFT; page_ofs = ofs & (PAGE_SIZE - 1); - page = find_lock_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); BUG_ON(!page); memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); unlock_page(page); -- cgit v1.2.3 From 97795d2a5b8d3c8dc4365d4bd3404191840453ba Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 22 Jul 2012 23:59:40 -0400 Subject: ext4: don't let i_reserved_meta_blocks go negative If we hit a condition where we have allocated metadata blocks that were not appropriately reserved, we risk underflow of ei->i_reserved_meta_blocks. In turn, this can throw sbi->s_dirtyclusters_counter significantly out of whack and undermine the nondelalloc fallback logic in ext4_nonda_switch(). Warn if this occurs and set i_allocated_meta_blocks to avoid this problem. This condition is reproduced by xfstests 270 against ext2 with delalloc enabled: Mar 28 08:58:02 localhost kernel: [ 171.526344] EXT4-fs (loop1): delayed block allocation failed for inode 14 at logical offset 64486 with max blocks 64 with error -28 Mar 28 08:58:02 localhost kernel: [ 171.526346] EXT4-fs (loop1): This should not happen!! Data will be lost 270 ultimately fails with an inconsistent filesystem and requires an fsck to repair. The cause of the error is an underflow in ext4_da_update_reserve_space() due to an unreserved meta block allocation. Signed-off-by: Brian Foster Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a533a18de98e..25f809dc45a3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -346,6 +346,15 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } + if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + "with only %d reserved metadata blocks\n", __func__, + inode->i_ino, ei->i_allocated_meta_blocks, + ei->i_reserved_meta_blocks); + WARN_ON(1); + ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; + } + /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; -- cgit v1.2.3 From 03179fe92318e7934c180d96f12eff2cb36ef7b6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Jul 2012 00:00:20 -0400 Subject: ext4: undo ext4_calc_metadata_amount if we fail to claim space The function ext4_calc_metadata_amount() has side effects, although it's not obvious from its function name. So if we fail to claim space, regardless of whether we retry to claim the space again, or return an error, we need to undo these side effects. Otherwise we can end up incorrectly calculating the number of metadata blocks needed for the operation, which was responsible for an xfstests failure for test #271 when using an ext2 file system with delalloc enabled. Reported-by: Brian Foster Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 25f809dc45a3..89b59cb7f9b8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1182,6 +1182,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; int ret; + ext4_lblk_t save_last_lblock; + int save_len; + + /* + * We will charge metadata quota at writeout time; this saves + * us from metadata over-estimation, though we may go over by + * a small amount in the end. Here we just reserve for data. + */ + ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); + if (ret) + return ret; /* * recalculate the amount of metadata blocks to reserve @@ -1190,32 +1201,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) */ repeat: spin_lock(&ei->i_block_reservation_lock); + /* + * ext4_calc_metadata_amount() has side effects, which we have + * to be prepared undo if we fail to claim space. + */ + save_len = ei->i_da_metadata_calc_len; + save_last_lblock = ei->i_da_metadata_calc_last_lblock; md_needed = EXT4_NUM_B2C(sbi, ext4_calc_metadata_amount(inode, lblock)); trace_ext4_da_reserve_space(inode, md_needed); - spin_unlock(&ei->i_block_reservation_lock); - /* - * We will charge metadata quota at writeout time; this saves - * us from metadata over-estimation, though we may go over by - * a small amount in the end. Here we just reserve for data. - */ - ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); - if (ret) - return ret; /* * We do still charge estimated metadata to the sb though; * we cannot afford to run out of free blocks. */ if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { - dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); + ei->i_da_metadata_calc_len = save_len; + ei->i_da_metadata_calc_last_lblock = save_last_lblock; + spin_unlock(&ei->i_block_reservation_lock); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; } + dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } - spin_lock(&ei->i_block_reservation_lock); ei->i_reserved_data_blocks++; ei->i_reserved_meta_blocks += md_needed; spin_unlock(&ei->i_block_reservation_lock); -- cgit v1.2.3 From 41b93bc1ee7e7276db698dd66afa7b740cda517a Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:35:52 +0530 Subject: logfs: maintain the ordering of meta-inode destruction LogFS does not use a specialized area to maintain the inodes. The inodes information is kept in a specialized file called inode file. Similarly, the segment information is kept in a segment file. Since the segment file also has an inode which is kept in the inode file, the inode for segment file must be evicted before the inode for inode file. The change fixes the following BUG during unmount Pid: 2057, comm: umount Not tainted 3.5.0-rc6+ #25 Bochs Bochs RIP: 0010:[] [] move_page_to_btree+0x32/0x1f0 [logfs] Process umount (pid: 2057, threadinfo ...) Call Trace: [] ? find_get_pages+0x2a/0x180 [] logfs_invalidatepage+0x85/0x90 [logfs] [] truncate_inode_page+0xb1/0xd0 [] truncate_inode_pages_range+0x15f/0x490 [] ? printk+0x78/0x7a [] truncate_inode_pages+0x15/0x20 [] logfs_evict_inode+0x6c/0x190 [logfs] [] ? _raw_spin_unlock+0x2b/0x40 [] evict+0xa7/0x1b0 [] dispose_list+0x3e/0x60 [] evict_inodes+0xf4/0x110 [] generic_shutdown_super+0x53/0xf0 [] logfs_kill_sb+0x52/0xf0 [logfs] [] deactivate_locked_super+0x45/0x80 [] deactivate_super+0x4a/0x70 [] mntput_no_expire+0xde/0x140 [] sys_umount+0x6f/0x3a0 [] system_call_fastpath+0x16/0x1b ---[ end trace 45f7752082cefafd ]--- Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index df093d9e4da1..6984562738d3 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -389,8 +389,8 @@ static void logfs_put_super(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); /* kill the meta-inodes */ - iput(super->s_master_inode); iput(super->s_segfile_inode); + iput(super->s_master_inode); iput(super->s_mapping_inode); } -- cgit v1.2.3 From 9f0bbd8ca7905fcc0602c038013b095322fec939 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 10:32:11 +0530 Subject: logfs: query block device for number of pages to send with bio The block device driver puts a limit on maximum number of pages that can be sent with the bio. Not all block devices can handle BIO_MAX_PAGES number of pages in bio. Specifically the virtio-blk diriver limits it to 126. When the LogFS file system was excersized in KVM, the following bug from do_virtblk_request() was observed static void do_virtblk_request(struct request_queue *q) { .... .... while ((req = blk_peek_request(q)) != NULL) { BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); .... .... } .... } The patch fixes the problem by querring the maximum number of pages in bio allowed from block device driver and then using those many pages during submit_bio. Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index ea29df36893d..e784a217b500 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -96,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, struct address_space *mapping = super->s_mapping_inode->i_mapping; struct bio *bio; struct page *page; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); @@ -191,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, { struct logfs_super *super = logfs_super(sb); struct bio *bio; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); -- cgit v1.2.3 From 906adea1536fe476c1068d86df01f8b130cde105 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 23 Jun 2012 11:24:48 +0800 Subject: jbd2: remove the second argument of kmap_atomic Signed-off-by: Cong Wang --- fs/jbd2/commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 216f4299f65e..af5280fb579b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -349,12 +349,12 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, return; sequence = cpu_to_be32(sequence); - addr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, sizeof(sequence)); csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), bh->b_size); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); tag->t_checksum = cpu_to_be32(csum); } -- cgit v1.2.3 From e39e64ac0cdeca3798a6bf186f873be20e2f57b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Jul 2012 15:23:45 -0400 Subject: Btrfs: don't wait around for new log writers on an SSD Waiting on spindles improves performance, but ssds want all the IO as quickly as we can push it down. Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b72b068183ec..8c35847d0fee 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1330,7 +1330,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); } - if (now < cur_trans->start_time || now - cur_trans->start_time < 1) + if (!btrfs_test_opt(root, SSD) && + (now < cur_trans->start_time || now - cur_trans->start_time < 1)) should_grow = 1; do { -- cgit v1.2.3 From cbea5ac1ee03197354bd38caad3fcb798f185181 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Jul 2012 15:25:05 -0400 Subject: Btrfs: reduce calls to wake_up on uncontended locks The btrfs locks were unconditionally calling wake_up as the locks were released. This lead to extra thrashing on the waitqueue, especially for locks that were dominated by readers. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 272f911203ff..a44eff074805 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -78,13 +78,15 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) write_lock(&eb->lock); WARN_ON(atomic_read(&eb->spinning_writers)); atomic_inc(&eb->spinning_writers); - if (atomic_dec_and_test(&eb->blocking_writers)) + if (atomic_dec_and_test(&eb->blocking_writers) && + waitqueue_active(&eb->write_lock_wq)) wake_up(&eb->write_lock_wq); } else if (rw == BTRFS_READ_LOCK_BLOCKING) { BUG_ON(atomic_read(&eb->blocking_readers) == 0); read_lock(&eb->lock); atomic_inc(&eb->spinning_readers); - if (atomic_dec_and_test(&eb->blocking_readers)) + if (atomic_dec_and_test(&eb->blocking_readers) && + waitqueue_active(&eb->read_lock_wq)) wake_up(&eb->read_lock_wq); } return; @@ -199,7 +201,8 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) } btrfs_assert_tree_read_locked(eb); WARN_ON(atomic_read(&eb->blocking_readers) == 0); - if (atomic_dec_and_test(&eb->blocking_readers)) + if (atomic_dec_and_test(&eb->blocking_readers) && + waitqueue_active(&eb->read_lock_wq)) wake_up(&eb->read_lock_wq); atomic_dec(&eb->read_locks); } @@ -247,8 +250,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb) if (blockers) { WARN_ON(atomic_read(&eb->spinning_writers)); atomic_dec(&eb->blocking_writers); - smp_wmb(); - wake_up(&eb->write_lock_wq); + smp_mb(); + if (waitqueue_active(&eb->write_lock_wq)) + wake_up(&eb->write_lock_wq); } else { WARN_ON(atomic_read(&eb->spinning_writers) != 1); atomic_dec(&eb->spinning_writers); -- cgit v1.2.3 From c5c3c5f31e6af2d12b154251a7f23b7f4add6b1d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Apr 2012 14:42:44 -0400 Subject: Btrfs: remove ->dirty_inode We do all of our inode updating when we change it, and now that we do ->update_time we don't need ->dirty_inode for atime updates anymore, so just remove it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e23991574fdf..ddc2efdda1ab 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1477,16 +1477,6 @@ static int btrfs_unfreeze(struct super_block *sb) return 0; } -static void btrfs_fs_dirty_inode(struct inode *inode, int flags) -{ - int ret; - - ret = btrfs_dirty_inode(inode); - if (ret) - printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu " - "error %d\n", btrfs_ino(inode), ret); -} - static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -1526,7 +1516,6 @@ static const struct super_operations btrfs_super_ops = { .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, .write_inode = btrfs_write_inode, - .dirty_inode = btrfs_fs_dirty_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, -- cgit v1.2.3 From 063849eafda03edf6872a3728b4a98dcc86290c7 Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Mon, 16 Apr 2012 15:27:51 +0200 Subject: Btrfs: allow mount -o remount,compress=no Btrfs allows to turn on compression on a mounted and used filesystem by issuing mount -o remount,compress=lzo. This patch allows to turn compression off again while the filesystem is mounted. As suggested by David Sterba if the compress-force option was set, it is implicitly cleared if compression is turned off. Tested-by: David Sterba Signed-off-by: Arnd Hannemann --- fs/btrfs/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ddc2efdda1ab..88a2d2bb2d75 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -396,15 +396,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) strcmp(args[0].from, "zlib") == 0) { compress_type = "zlib"; info->compress_type = BTRFS_COMPRESS_ZLIB; + btrfs_set_opt(info->mount_opt, COMPRESS); } else if (strcmp(args[0].from, "lzo") == 0) { compress_type = "lzo"; info->compress_type = BTRFS_COMPRESS_LZO; + btrfs_set_opt(info->mount_opt, COMPRESS); + } else if (strncmp(args[0].from, "no", 2) == 0) { + compress_type = "no"; + info->compress_type = BTRFS_COMPRESS_NONE; + btrfs_clear_opt(info->mount_opt, COMPRESS); + btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); + compress_force = false; } else { ret = -EINVAL; goto out; } - btrfs_set_opt(info->mount_opt, COMPRESS); if (compress_force) { btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); pr_info("btrfs: force %s compression\n", -- cgit v1.2.3 From 2bc5565286121d2a77ccd728eb3484dff2035b58 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Fri, 15 Jun 2012 09:49:33 +0200 Subject: Btrfs: don't update atime on RO subvolumes Before the update_time inode operation was indroduced, it was not possible to prevent updates of atime on RO subvolumes. VFS was only able to check for RO on the mount, but did not know anything about btrfs subvolumes. btrfs_update_time does now check if the root is RO and skip updating of times. Signed-off-by: Alexander Block --- fs/btrfs/inode.c | 5 +++++ fs/inode.c | 2 ++ 2 files changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7d1921ac76b..fcc65802f367 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4518,6 +4518,11 @@ int btrfs_dirty_inode(struct inode *inode) static int btrfs_update_time(struct inode *inode, struct timespec *now, int flags) { + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (btrfs_root_readonly(root)) + return -EROFS; + if (flags & S_VERSION) inode_inc_iversion(inode); if (flags & S_CTIME) diff --git a/fs/inode.c b/fs/inode.c index c99163b1b310..033529ecd242 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1551,6 +1551,8 @@ void touch_atime(struct path *path) * Btrfs), but since we touch atime while walking down the path we * really don't care if we failed to update the atime of the file, * so just ignore the return value. + * We may also fail on filesystems that have the ability to make parts + * of the fs read only, e.g. subvolumes in Btrfs. */ update_time(inode, &now, S_ATIME); mnt_drop_write(mnt); -- cgit v1.2.3 From e4b50e14c8f72bcbae53809815d5df70d5aec174 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Jun 2012 13:30:11 +0300 Subject: Btrfs: small naming cleanup in join_transaction() "root->fs_info" and "fs_info" are the same, but "fs_info" is prefered because it is shorter and that's what is used in the rest of the function. Signed-off-by: Dan Carpenter --- fs/btrfs/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8c35847d0fee..cb2dfe293947 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -100,8 +100,8 @@ loop: kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = fs_info->running_transaction; goto loop; - } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - spin_unlock(&root->fs_info->trans_lock); + } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + spin_unlock(&fs_info->trans_lock); kmem_cache_free(btrfs_transaction_cachep, cur_trans); return -EROFS; } -- cgit v1.2.3 From a43a21113365e5a9b59efc411da715d910cca87c Mon Sep 17 00:00:00 2001 From: Andrew Mahone Date: Tue, 19 Jun 2012 21:08:32 -0400 Subject: btrfs: ignore unfragmented file checks in defrag when compression enabled - rebased Rebased on btrfs-next and retested. Inform should_defrag_range if BTRFS_DEFRAG_RANGE_COMPRESS is set. If so, skip checks for adjacent extents and extent size when deciding whether to defrag, as these can prevent an uncompressed and unfragmented file from being compressed as requested. Signed-off-by: Andrew Mahone --- fs/btrfs/ioctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0e92e5763005..9ec23b93e019 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -832,7 +832,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) } static int should_defrag_range(struct inode *inode, u64 start, int thresh, - u64 *last_len, u64 *skip, u64 *defrag_end) + u64 *last_len, u64 *skip, u64 *defrag_end, + int compress) { struct extent_map *em; int ret = 1; @@ -863,7 +864,7 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, * we hit a real extent, if it is big or the next extent is not a * real extent, don't bother defragging it */ - if ((*last_len == 0 || *last_len >= thresh) && + if (!compress && (*last_len == 0 || *last_len >= thresh) && (em->len >= thresh || !next_mergeable)) ret = 0; out: @@ -1145,7 +1146,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, extent_thresh, &last_len, &skip, - &defrag_end)) { + &defrag_end, range->flags & + BTRFS_DEFRAG_RANGE_COMPRESS)) { unsigned long next; /* * the should_defrag function tells us how much to skip -- cgit v1.2.3 From b27f7c0c150f74564b5d4c6c24a03c5226bf6327 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 22 Jun 2012 06:30:39 -0600 Subject: btrfs: join DEV_STATS ioctls to one Commit c11d2c236cc260b36 (Btrfs: add ioctl to get and reset the device stats) introduced two ioctls doing almost the same thing distinguished by just the ioctl number which encodes "do reset after read". I have suggested http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg16604.html to implement it via the ioctl args. This hasn't happen, and I think we should use a more clean way to pass flags and should not waste ioctl numbers. CC: Stefan Behrens Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 16 ++++++++-------- fs/btrfs/ioctl.h | 6 ++++-- fs/btrfs/volumes.c | 5 ++--- fs/btrfs/volumes.h | 3 +-- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ec23b93e019..3a3f916d7c02 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3065,19 +3065,21 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, } static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, - void __user *arg, int reset_after_read) + void __user *arg) { struct btrfs_ioctl_get_dev_stats *sa; int ret; - if (reset_after_read && !capable(CAP_SYS_ADMIN)) - return -EPERM; - sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); - ret = btrfs_get_dev_stats(root, sa, reset_after_read); + if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { + kfree(sa); + return -EPERM; + } + + ret = btrfs_get_dev_stats(root, sa); if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; @@ -3475,9 +3477,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(root, argp); case BTRFS_IOC_GET_DEV_STATS: - return btrfs_ioctl_get_dev_stats(root, argp, 0); - case BTRFS_IOC_GET_AND_RESET_DEV_STATS: - return btrfs_ioctl_get_dev_stats(root, argp, 1); + return btrfs_ioctl_get_dev_stats(root, argp); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index e440aa653c30..021c55ed8aed 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -285,9 +285,13 @@ enum btrfs_dev_stat_values { BTRFS_DEV_STAT_VALUES_MAX }; +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + struct btrfs_ioctl_get_dev_stats { __u64 devid; /* in */ __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ /* out values: */ __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; @@ -361,7 +365,5 @@ struct btrfs_ioctl_get_dev_stats { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_get_dev_stats) #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ecaad40e7ef4..957bf393ab46 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4890,8 +4890,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) } int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats, - int reset_after_read) + struct btrfs_ioctl_get_dev_stats *stats) { struct btrfs_device *dev; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; @@ -4909,7 +4908,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, printk(KERN_WARNING "btrfs: get dev_stats failed, not yet valid\n"); return -ENODEV; - } else if (reset_after_read) { + } else if (stats->flags & BTRFS_DEV_STATS_RESET) { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { if (stats->nr_items > i) stats->values[i] = diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 95f6637614db..e404414a95a9 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -293,8 +293,7 @@ struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats, - int reset_after_read); + struct btrfs_ioctl_get_dev_stats *stats); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -- cgit v1.2.3 From 96c3f4331a8c1cd0a58307e4ac7e73e09d7dab23 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 21 Jun 2012 14:05:49 -0400 Subject: Btrfs: flush delayed inodes if we're short on space Those crazy gentoo guys have been complaining about ENOSPC errors on their portage volumes. This is because doing things like untar tends to create lots of new files which will soak up all the reservation space in the delayed inodes. Usually this gets papered over by the fact that we will try and commit the transaction, however if this happens in the wrong spot or we choose not to commit the transaction you will be screwed. So add the ability to expclitly flush delayed inodes to free up space. Please test this out guys to make sure it works since as usual I cannot reproduce. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/delayed-inode.c | 22 +++++++++-- fs/btrfs/delayed-inode.h | 2 + fs/btrfs/extent-tree.c | 97 +++++++++++++++++++++++++++++++----------------- 3 files changed, 83 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 2399f4086915..21d91a8073ee 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1113,8 +1113,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, * Returns < 0 on error and returns with an aborted transaction with any * outstanding delayed items cleaned up. */ -int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int nr) { struct btrfs_root *curr_root = root; struct btrfs_delayed_root *delayed_root; @@ -1122,6 +1122,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret = 0; + bool count = (nr > 0); if (trans->aborted) return -EIO; @@ -1137,7 +1138,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, delayed_root = btrfs_get_delayed_root(root); curr_node = btrfs_first_delayed_node(delayed_root); - while (curr_node) { + while (curr_node && (!count || (count && nr--))) { curr_root = curr_node->root; ret = btrfs_insert_delayed_items(trans, path, curr_root, curr_node); @@ -1149,6 +1150,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, path, curr_node); if (ret) { btrfs_release_delayed_node(curr_node); + curr_node = NULL; btrfs_abort_transaction(trans, root, ret); break; } @@ -1158,12 +1160,26 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, btrfs_release_delayed_node(prev_node); } + if (curr_node) + btrfs_release_delayed_node(curr_node); btrfs_free_path(path); trans->block_rsv = block_rsv; return ret; } +int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_run_delayed_items(trans, root, -1); +} + +int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int nr) +{ + return __btrfs_run_delayed_items(trans, root, nr); +} + static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *node) { diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f5aa4023d3e1..4f808e1baeed 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -107,6 +107,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode); int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int nr); void btrfs_balance_delayed_items(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1d36702ff7..3cde907a25a5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3728,6 +3728,60 @@ commit: return btrfs_commit_transaction(trans, root); } +enum flush_state { + FLUSH_DELALLOC = 1, + FLUSH_DELALLOC_WAIT = 2, + FLUSH_DELAYED_ITEMS_NR = 3, + FLUSH_DELAYED_ITEMS = 4, + COMMIT_TRANS = 5, +}; + +static int flush_space(struct btrfs_root *root, + struct btrfs_space_info *space_info, u64 num_bytes, + u64 orig_bytes, int state) +{ + struct btrfs_trans_handle *trans; + int nr; + int ret; + + switch (state) { + case FLUSH_DELALLOC: + case FLUSH_DELALLOC_WAIT: + ret = shrink_delalloc(root, num_bytes, + state == FLUSH_DELALLOC_WAIT); + if (ret > 0) + ret = 0; + break; + case FLUSH_DELAYED_ITEMS_NR: + case FLUSH_DELAYED_ITEMS: + if (state == FLUSH_DELAYED_ITEMS_NR) { + u64 bytes = btrfs_calc_trans_metadata_size(root, 1); + + nr = (int)div64_u64(num_bytes, bytes); + if (!nr) + nr = 1; + nr *= 2; + } else { + nr = -1; + } + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_run_delayed_items_nr(trans, root, nr); + btrfs_end_transaction(trans, root); + break; + case COMMIT_TRANS: + ret = may_commit_transaction(root, space_info, orig_bytes, 0); + break; + default: + ret = -ENOSPC; + break; + } + + return ret; +} /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for @@ -3749,11 +3803,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_space_info *space_info = block_rsv->space_info; u64 used; u64 num_bytes = orig_bytes; - int retries = 0; + int flush_state = FLUSH_DELALLOC; int ret = 0; - bool committed = false; bool flushing = false; - bool wait_ordered = false; + bool committed = false; again: ret = 0; @@ -3812,9 +3865,8 @@ again: * amount plus the amount of bytes that we need for this * reservation. */ - wait_ordered = true; num_bytes = used - space_info->total_bytes + - (orig_bytes * (retries + 1)); + (orig_bytes * 2); } if (ret) { @@ -3867,8 +3919,6 @@ again: trace_btrfs_space_reservation(root->fs_info, "space_info", space_info->flags, orig_bytes, 1); ret = 0; - } else { - wait_ordered = true; } } @@ -3887,36 +3937,13 @@ again: if (!ret || !flush) goto out; - /* - * We do synchronous shrinking since we don't actually unreserve - * metadata until after the IO is completed. - */ - ret = shrink_delalloc(root, num_bytes, wait_ordered); - if (ret < 0) - goto out; - - ret = 0; - - /* - * So if we were overcommitted it's possible that somebody else flushed - * out enough space and we simply didn't have enough space to reclaim, - * so go back around and try again. - */ - if (retries < 2) { - wait_ordered = true; - retries++; + ret = flush_space(root, space_info, num_bytes, orig_bytes, + flush_state); + flush_state++; + if (!ret) goto again; - } - - ret = -ENOSPC; - if (committed) - goto out; - - ret = may_commit_transaction(root, space_info, orig_bytes, 0); - if (!ret) { - committed = true; + else if (flush_state <= COMMIT_TRANS) goto again; - } out: if (flushing) { -- cgit v1.2.3 From 0ec4f431eb56d633da3a55da67d5c4b88886ccc7 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Mon, 23 Jul 2012 15:17:17 -0400 Subject: locks: fix checking of fcntl_setlease argument The only checks of the long argument passed to fcntl(fd,F_SETLEASE,.) are done after converting the long to an int. Thus some illegal values may be let through and cause problems in later code. [ They actually *don't* cause problems in mainline, as of Dave Jones's commit 8d657eb3b438 "Remove easily user-triggerable BUG from generic_setlease", but we should fix this anyway. And this patch will be necessary to fix real bugs on earlier kernels. ] Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/locks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index fce6238d52c1..82c353304f9e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock, return 0; } -static int assign_type(struct file_lock *fl, int type) +static int assign_type(struct file_lock *fl, long type) { switch (type) { case F_RDLCK: @@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, int type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, int type) +static struct file_lock *lease_alloc(struct file *filp, long type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; -- cgit v1.2.3 From 02db0844beffc1c4e99d750be58ffb3ed95d6d62 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 21 Jun 2012 16:03:58 -0400 Subject: Btrfs: add DEVICE_READY ioctl This will be used in conjunction with btrfs device ready . This is needed for initrd's to have a nice and lightweight way to tell if all of the devices needed for a file system are in the cache currently. This keeps them from having to do mount+sleep loops waiting for devices to show up. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.h | 3 ++- fs/btrfs/super.c | 7 +++++++ fs/btrfs/volumes.c | 9 ++++++++- fs/btrfs/volumes.h | 1 + 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 021c55ed8aed..4e3e5d342a2b 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -363,7 +363,8 @@ struct btrfs_ioctl_get_dev_stats { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) - #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 88a2d2bb2d75..26da344231ac 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1462,6 +1462,13 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, ret = btrfs_scan_one_device(vol->name, FMODE_READ, &btrfs_fs_type, &fs_devices); break; + case BTRFS_IOC_DEVICES_READY: + ret = btrfs_scan_one_device(vol->name, FMODE_READ, + &btrfs_fs_type, &fs_devices); + if (ret) + break; + ret = !(fs_devices->num_devices == fs_devices->total_devices); + break; } kfree(vol); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 957bf393ab46..39a0d04759f8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -429,6 +429,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; + fs_devices->total_devices = orig->total_devices; memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ @@ -739,6 +740,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, int ret; u64 devid; u64 transid; + u64 total_devices; flags |= FMODE_EXCL; bdev = blkdev_get_by_path(path, flags, holder); @@ -760,6 +762,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); transid = btrfs_super_generation(disk_super); + total_devices = btrfs_super_num_devices(disk_super); if (disk_super->label[0]) printk(KERN_INFO "device label %s ", disk_super->label); else @@ -767,7 +770,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); - + if (!ret && fs_devices_ret) + (*fs_devices_ret)->total_devices = total_devices; brelse(bh); error_close: mutex_unlock(&uuid_mutex); @@ -1433,6 +1437,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) list_del_rcu(&device->dev_list); device->fs_devices->num_devices--; + device->fs_devices->total_devices--; if (device->missing) root->fs_info->fs_devices->missing_devices--; @@ -1550,6 +1555,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; + fs_devices->total_devices = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -1749,6 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; + root->fs_info->fs_devices->total_devices++; if (device->can_discard) root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e404414a95a9..5479325987b3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -126,6 +126,7 @@ struct btrfs_fs_devices { u64 missing_devices; u64 total_rw_bytes; u64 num_can_discard; + u64 total_devices; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex -- cgit v1.2.3 From fed425c742cb1262ce90a41f2d3d211bac099533 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 22 Jun 2012 12:13:01 -0600 Subject: Btrfs: do not return EINVAL instead of ENOMEM from open_ctree() When bailing from open_ctree() err is returned, not ret. Signed-off-by: Ilya Dryomov --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2936ca49b3b4..fd216d9369fa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2244,7 +2244,7 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); if (ret) { - ret = -ENOMEM; + err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.3 From 44c44af2f4a6dc1595f1711cf307bd01062fd129 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 22 Jun 2012 12:14:13 -0600 Subject: Btrfs: do not ignore errors from btrfs_cleanup_fs_roots() when mounting There used to be a BUG_ON(ret) there before EH patch (79787eaa) went in. Bail out with EINVAL. Cc: David Sterba Signed-off-by: Ilya Dryomov --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fd216d9369fa..dd6676b446f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2466,8 +2466,8 @@ retry_root_backup: if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); - if (ret) { - } + if (ret) + goto fail_trans_kthread; ret = btrfs_recover_relocation(tree_root); if (ret < 0) { -- cgit v1.2.3 From 23291a044c31f9dfdeaf633b631059fb75e5c2c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 Jun 2012 05:15:23 -0600 Subject: Btrfs: fix error handling in __add_reloc_root() We dereferenced "node" in the error message after freeing it. Also btrfs_panic() can return so we should return an error code instead of continuing. Signed-off-by: Dan Carpenter --- fs/btrfs/relocation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 646ee21bb035..c5dbd9149679 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1239,10 +1239,11 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) { - kfree(node); btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " "tree\n"); + kfree(node); + return -EEXIST; } list_add_tail(&root->root_list, &rc->reloc_roots); -- cgit v1.2.3 From b9959295151625c17723103afd79077e80b24ddd Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 25 Jun 2012 21:25:22 -0600 Subject: Btrfs: return error of btrfs_update_inode() to caller We didn't check error of btrfs_update_inode(), but that error looks easy to bubble back up. Reviewed-by: David Sterba Signed-off-by: Tsutomu Itoh Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 2 +- fs/btrfs/tree-log.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fcc65802f367..f93a98e65d6c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2833,7 +2833,7 @@ err: inode_inc_iversion(inode); inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; - btrfs_update_inode(trans, root, dir); + ret = btrfs_update_inode(trans, root, dir); out: return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8abeae4224f9..c86670f4f285 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -637,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } inode_set_bytes(inode, saved_nbytes); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); out: if (inode) iput(inode); @@ -1133,7 +1133,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { btrfs_inc_nlink(inode); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; } else { -- cgit v1.2.3 From 0e721106923be82f651dd0ee504742a8a3eb089f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Jun 2012 16:13:18 -0400 Subject: Btrfs: change how we indicate we're adding csums There is weird logic I had to put in place to make sure that when we were adding csums that we'd used the delalloc block rsv instead of the global block rsv. Part of this meant that we had to free up our transaction reservation before we ran the delayed refs since csum deletion happens during the delayed ref work. The problem with this is that when we release a reservation we will add it to the global reserve if it is not full in order to keep us going along longer before we have to force a transaction commit. By releasing our reservation before we run delayed refs we don't get the opportunity to drain down the global reserve for the work we did, so we won't refill it as often. This isn't a problem per-se, it just results in us possibly committing transactions more and more often, and in rare cases could cause those WARN_ON()'s to pop in use_block_rsv because we ran out of space in our block rsv. This also helps us by holding onto space while the delayed refs run so we don't end up with as many people trying to do things at the same time, which again will help us not force commits or hit the use_block_rsv warnings. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 8 +++++++- fs/btrfs/file-item.c | 2 ++ fs/btrfs/transaction.c | 22 ++++++++-------------- fs/btrfs/transaction.h | 1 + 4 files changed, 18 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3cde907a25a5..ec0328bb86db 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3961,7 +3961,10 @@ static struct btrfs_block_rsv *get_block_rsv( { struct btrfs_block_rsv *block_rsv = NULL; - if (root->ref_cows || root == root->fs_info->csum_root) + if (root->ref_cows) + block_rsv = trans->block_rsv; + + if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; if (!block_rsv) @@ -4313,6 +4316,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + if (!trans->block_rsv) + return; + if (!trans->bytes_reserved) return; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5d158d320233..863c34d111b5 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, return -ENOMEM; sector_sum = sums->sums; + trans->adding_csums = 1; again: next_offset = (u64)-1; found_next = 0; @@ -853,6 +854,7 @@ next_sector: goto again; } out: + trans->adding_csums = 0; btrfs_free_path(path); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cb2dfe293947..328b95f67660 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -351,6 +351,7 @@ again: h->bytes_reserved = 0; h->delayed_ref_updates = 0; h->use_count = 1; + h->adding_csums = 0; h->block_rsv = NULL; h->orig_rsv = NULL; h->aborted = 0; @@ -473,7 +474,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans = trans->transaction; - struct btrfs_block_rsv *rsv = trans->block_rsv; int updates; int err; @@ -481,12 +481,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; - /* - * We need to do this in case we're deleting csums so the global block - * rsv get's used instead of the csum block rsv. - */ - trans->block_rsv = NULL; - updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) { @@ -495,8 +489,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, return err; } - trans->block_rsv = rsv; - return should_end_transaction(trans, root); } @@ -513,8 +505,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; while (count < 2) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; @@ -527,6 +517,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, } count++; } + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { @@ -1269,9 +1261,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_run_ordered_operations(root, 0); - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; - if (cur_trans->aborted) goto cleanup_transaction; @@ -1282,6 +1271,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; + cur_trans = trans->transaction; /* @@ -1533,6 +1525,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; cleanup_transaction: + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); // WARN_ON(1); if (current->journal_info == trans) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fe27379e368b..d314a74b4968 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,6 +57,7 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; int aborted; + int adding_csums; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From 287082b0bd10060e9c6b32ed9605174ddf2f672a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 28 Jun 2012 04:02:24 -0600 Subject: Btrfs: fix typo in cow_file_range_async and async_cow_submit It should be 10 * 1024 * 1024. Signed-off-by: Liu Bo Signed-off-by: Jiri Kosina --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f93a98e65d6c..18f1b44d1610 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); if (atomic_read(&root->fs_info->async_delalloc_pages) < - 5 * 1042 * 1024 && + 5 * 1024 * 1024 && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); @@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long nr_pages; u64 cur_end; - int limit = 10 * 1024 * 1042; + int limit = 10 * 1024 * 1024; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); -- cgit v1.2.3 From a874a63e1389c1daabd5abe4e4faaf9d63daf474 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 29 Jun 2012 03:58:46 -0600 Subject: Btrfs: check write access to mount earlier while creating snapshots Move check of write access to mount into upper functions so that we can use mnt_want_write_file instead, which is faster than mnt_want_write. Signed-off-by: Liu Bo --- fs/btrfs/ioctl.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3a3f916d7c02..c1f4975648ef 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -652,13 +652,9 @@ static noinline int btrfs_mksubvol(struct path *parent, if (dentry->d_inode) goto out_dput; - error = mnt_want_write(parent->mnt); - if (error) - goto out_dput; - error = btrfs_may_create(dir, dentry); if (error) - goto out_drop_write; + goto out_dput; down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); @@ -676,8 +672,6 @@ static noinline int btrfs_mksubvol(struct path *parent, fsnotify_mkdir(dir, dentry); out_up_read: up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); -out_drop_write: - mnt_drop_write(parent->mnt); out_dput: dput(dentry); out_unlock: @@ -1395,16 +1389,20 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + goto out; + namelen = strlen(name); if (strchr(name, '/')) { ret = -EINVAL; - goto out; + goto out_drop_write; } if (name[0] == '.' && (namelen == 1 || (name[1] == '.' && namelen == 2))) { ret = -EEXIST; - goto out; + goto out_drop_write; } if (subvol) { @@ -1415,7 +1413,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, src_file = fget(fd); if (!src_file) { ret = -EINVAL; - goto out; + goto out_drop_write; } src_inode = src_file->f_path.dentry->d_inode; @@ -1424,13 +1422,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, "another FS\n"); ret = -EINVAL; fput(src_file); - goto out; + goto out_drop_write; } ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, transid, readonly); fput(src_file); } +out_drop_write: + mnt_drop_write_file(file); out: return ret; } -- cgit v1.2.3 From 768e9dfe820abdcfb6683e05c60b8634f1a4ffce Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 29 Jun 2012 03:58:47 -0600 Subject: Btrfs: remove redundant r/o check for superblock mnt_want_write() and mnt_want_write_file() will check sb->s_flags with MS_RDONLY, and we don't need to do it ourselves. Signed-off-by: Liu Bo --- fs/btrfs/ioctl.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c1f4975648ef..b8034dc62e38 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1381,14 +1381,10 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, u64 *transid, bool readonly) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct file *src_file; int namelen; int ret = 0; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - ret = mnt_want_write_file(file); if (ret) goto out; @@ -3269,9 +3265,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; -- cgit v1.2.3 From e54bfa31044d602a57d4e190f6d1c3763ea76bfe Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 29 Jun 2012 03:58:48 -0600 Subject: Btrfs: use mnt_want_write_file instead of mnt_want_write mnt_want_write_file is faster when file has been opened for write. Signed-off-by: Liu Bo --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b8034dc62e38..13ed1c9534cc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3265,7 +3265,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3335,7 +3335,7 @@ out_bargs: out: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return ret; } -- cgit v1.2.3 From b9ca0664dc806ba70587f6f3202b60dc736cd6e5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 29 Jun 2012 03:58:49 -0600 Subject: Btrfs: do not set subvolume flags in readonly mode $ mkfs.btrfs /dev/sdb7 $ btrfstune -S1 /dev/sdb7 $ mount /dev/sdb7 /mnt/btrfs mount: block device /dev/sdb7 is write-protected, mounting read-only $ btrfs dev add /dev/sdb8 /mnt/btrfs/ Now we get a btrfs in which mnt flags has readonly but sb flags does not. So for those ioctls that only check sb flags with MS_RDONLY, it is going to be a problem. Setting subvolume flags is such an ioctl, we should use mnt_want_write_file() to check RO flags. Signed-off-by: Liu Bo --- fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 13ed1c9534cc..17facea6a51c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1521,29 +1521,40 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, u64 flags; int ret = 0; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + goto out; - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) - return -EINVAL; + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { + ret = -EINVAL; + goto out_drop_write; + } - if (copy_from_user(&flags, arg, sizeof(flags))) - return -EFAULT; + if (copy_from_user(&flags, arg, sizeof(flags))) { + ret = -EFAULT; + goto out_drop_write; + } - if (flags & BTRFS_SUBVOL_CREATE_ASYNC) - return -EINVAL; + if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { + ret = -EINVAL; + goto out_drop_write; + } - if (flags & ~BTRFS_SUBVOL_RDONLY) - return -EOPNOTSUPP; + if (flags & ~BTRFS_SUBVOL_RDONLY) { + ret = -EOPNOTSUPP; + goto out_drop_write; + } - if (!inode_owner_or_capable(inode)) - return -EACCES; + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out_drop_write; + } down_write(&root->fs_info->subvol_sem); /* nothing to do */ if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) - goto out; + goto out_drop_sem; root_flags = btrfs_root_flags(&root->root_item); if (flags & BTRFS_SUBVOL_RDONLY) @@ -1566,8 +1577,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, out_reset: if (ret) btrfs_set_root_flags(&root->root_item, root_flags); -out: +out_drop_sem: up_write(&root->fs_info->subvol_sem); +out_drop_write: + mnt_drop_write_file(file); +out: return ret; } -- cgit v1.2.3 From f4c738c2e7bc6d696b0d60155df7ea01684962b6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 2 Jul 2012 17:10:51 -0400 Subject: Btrfs: rework shrink_delalloc So shrink_delalloc has grown all sorts of cruft over the years thanks to many reworkings of how we track enospc. What happens now as we fill up the disk is we will loop for freaking ever hoping to reclaim a arbitrary amount of space of metadata, this was from when everybody flushed at the same time. Now we only have people flushing one at a time. So instead of trying to reclaim a huge amount of space, just try to flush a decent chunk of space, and stop looping as soon as we have enough free space to satisfy our reservation. This makes xfstests 224 go much faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 81 +++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ec0328bb86db..5e552f9cc5be 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3586,89 +3586,58 @@ out: /* * shrink metadata reservation for delalloc */ -static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, - bool wait_ordered) +static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; - u64 reserved; + u64 delalloc_bytes; u64 max_reclaim; - u64 reclaimed = 0; long time_left; unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; - unsigned long progress; trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); - reserved = space_info->bytes_may_use; - progress = space_info->reservation_progress; - - if (reserved == 0) - return 0; - - smp_mb(); - if (root->fs_info->delalloc_bytes == 0) { + delalloc_bytes = root->fs_info->delalloc_bytes; + if (delalloc_bytes == 0) { if (trans) - return 0; + return; btrfs_wait_ordered_extents(root, 0, 0); - return 0; + return; } - max_reclaim = min(reserved, to_reclaim); - nr_pages = max_t(unsigned long, nr_pages, - max_reclaim >> PAGE_CACHE_SHIFT); - while (loops < 1024) { - /* have the flusher threads jump in and do some IO */ - smp_mb(); - nr_pages = min_t(unsigned long, nr_pages, - root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); + while (delalloc_bytes && loops < 3) { + max_reclaim = min(delalloc_bytes, to_reclaim); + nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, - WB_REASON_FS_FREE_SPACE); + WB_REASON_FS_FREE_SPACE); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_may_use) - reclaimed += reserved - space_info->bytes_may_use; - reserved = space_info->bytes_may_use; + if (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use + orig <= + space_info->total_bytes) { + spin_unlock(&space_info->lock); + break; + } spin_unlock(&space_info->lock); loops++; - - if (reserved == 0 || reclaimed >= max_reclaim) - break; - - if (trans && trans->transaction->blocked) - return -EAGAIN; - if (wait_ordered && !trans) { btrfs_wait_ordered_extents(root, 0, 0); } else { - time_left = schedule_timeout_interruptible(1); - - /* We were interrupted, exit */ + time_left = schedule_timeout_killable(1); if (time_left) break; } - - /* we've kicked the IO a few times, if anything has been freed, - * exit. There is no sense in looping here for a long time - * when we really need to commit the transaction, or there are - * just too many writers without enough free space - */ - - if (loops > 3) { - smp_mb(); - if (progress != space_info->reservation_progress) - break; - } - + smp_mb(); + delalloc_bytes = root->fs_info->delalloc_bytes; } - - return reclaimed >= to_reclaim; } /** @@ -3742,15 +3711,13 @@ static int flush_space(struct btrfs_root *root, { struct btrfs_trans_handle *trans; int nr; - int ret; + int ret = 0; switch (state) { case FLUSH_DELALLOC: case FLUSH_DELALLOC_WAIT: - ret = shrink_delalloc(root, num_bytes, - state == FLUSH_DELALLOC_WAIT); - if (ret > 0) - ret = 0; + shrink_delalloc(root, num_bytes, orig_bytes, + state == FLUSH_DELALLOC_WAIT); break; case FLUSH_DELAYED_ITEMS_NR: case FLUSH_DELAYED_ITEMS: -- cgit v1.2.3 From c0901581ad077004145c9ee80e843fba71c100b8 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 10 Jul 2012 07:30:17 -0600 Subject: Btrfs: avoid I/O repair BUG() from btree_read_extent_buffer_pages() From btree_read_extent_buffer_pages(), currently repair_io_failure() can be called with mirror_num being zero when submit_one_bio() returned an error before. This used to cause a BUG_ON(!mirror_num) in repair_io_failure() and indeed this is not a case that needs the I/O repair code to rewrite disk blocks. This commit prevents calling repair_io_failure() in this case and thus avoids the BUG_ON() and malfunction. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dd6676b446f6..1a4a2a975926 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, break; } - if (failed && !ret) + if (failed && !ret && failed_mirror) repair_eb_io_failure(root, eb, failed_mirror); return ret; -- cgit v1.2.3 From 51a8cf9d2d97017d334f33f1b39067bd2f03bc49 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 10 Jul 2012 05:28:38 -0600 Subject: Btrfs: fix btrfs_is_free_space_inode to recognize btree inode For btree inode, its root is also 'tree root', so btree inode can be misunderstood as a free space inode. We should add one more check for btree inode. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 12394a90d60f..b168238bcb13 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -194,8 +194,10 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, struct inode *inode) { - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + if (root == root->fs_info->tree_root && + btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID) + return true; + if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) return true; return false; } -- cgit v1.2.3 From 83eea1f1bacd5dc7b44dcf84f5fdca54fdea5453 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 10 Jul 2012 05:28:39 -0600 Subject: Btrfs: kill root from btrfs_is_free_space_inode Since root can be fetched via BTRFS_I macro directly, we can save an args for btrfs_is_free_space_inode(). Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 5 +++-- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 2 +- fs/btrfs/inode.c | 22 +++++++++++----------- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b168238bcb13..21b8cfe08e95 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -191,9 +191,10 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } -static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, - struct inode *inode) +static inline bool btrfs_is_free_space_inode(struct inode *inode) { + struct btrfs_root *root = BTRFS_I(inode)->root; + if (root == root->fs_info->tree_root && btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID) return true; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e552f9cc5be..d1ebd2a06116 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4444,7 +4444,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) int ret; /* Need to be holding the i_mutex here if we aren't free space cache */ - if (btrfs_is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(inode)) flush = 0; if (flush && btrfs_transaction_in_commit(root->fs_info)) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 863c34d111b5..b45b9de0c21d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, * read from the commit root and sidestep a nasty deadlock * between reading the free space cache and updating the csum tree. */ - if (btrfs_is_free_space_inode(root, inode)) { + if (btrfs_is_free_space_inode(inode)) { path->search_commit_root = 1; path->skip_locking = 1; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 18f1b44d1610..321c415dea7f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(btrfs_is_free_space_inode(root, inode)); + BUG_ON(btrfs_is_free_space_inode(inode)); trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, @@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, return -ENOMEM; } - nolock = btrfs_is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(inode); if (nolock) trans = btrfs_join_transaction_nolock(root); @@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !btrfs_is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(inode); if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !btrfs_is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(inode); if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (btrfs_is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(inode)) metadata = 2; if (!(rw & REQ_WRITE)) { @@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) int ret; bool nolock; - nolock = btrfs_is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(inode); if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { ret = -EIO; @@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, ordered_extent->work.func = finish_ordered_fn; ordered_extent->work.flags = 0; - if (btrfs_is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(inode)) workers = &root->fs_info->endio_freespace_worker; else workers = &root->fs_info->endio_write_workers; @@ -2732,7 +2732,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * The data relocation inode should also be directly updated * without delay */ - if (!btrfs_is_free_space_inode(root, inode) + if (!btrfs_is_free_space_inode(inode) && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) @@ -3743,7 +3743,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - btrfs_is_free_space_inode(root, inode))) + btrfs_is_free_space_inode(inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4457,7 +4457,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) return 0; - if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { @@ -7051,7 +7051,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - !btrfs_is_free_space_inode(root, inode)) + !btrfs_is_free_space_inode(inode)) return 1; else return generic_drop_inode(inode); -- cgit v1.2.3 From 067893842341e7b7487062367ecfaa46c97505e0 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 6 Jul 2012 03:31:33 -0600 Subject: Btrfs: do not abort transaction in prealloc case During disk balance, we prealloc new file extent for file data relocation, but we may fail in 'no available space' case, and it leads to flipping btrfs into readonly. It is not necessary to bail out and abort transaction since we do have several ways to rescue ourselves from ENOSPC case. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d1ebd2a06116..67bd12a52369 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5748,7 +5748,11 @@ loop: ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_LIMITED); - if (ret < 0) { + /* + * Do not bail out on ENOSPC since we + * can do more things. + */ + if (ret < 0 && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); goto out; -- cgit v1.2.3 From cf7c1ef6e1fe05864369f59dd516e816b11de7d0 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 6 Jul 2012 03:31:34 -0600 Subject: Btrfs: fix a bug of writting free space cache during balance Here is the whole story: 1) A free space cache consists of two parts: o free space cache inode, which is special becase it's stored in root tree. o free space info, which is stored as the above inode's file data. But we only build up another new inode and does not flush its free space info onto disk when we _clear and setup_ free space cache, and this ends up with that the block group cache's cache_state remains DC_SETUP instead of DC_WRITTEN. And holding DC_SETUP means that we will not truncate this free space cache inode, which means the disk offset of its file extent will remain _unchanged_ at least until next transaction finishes committing itself. 2) We can set a block group readonly when we relocate the block group. However, if the readonly block group covers the disk offset where our free space cache inode is going to write, it will force the free space cache inode into cow_file_range() and it'll end up hitting a BUG_ON. 3) Due to the above analysis, we fix this bug by adding the missing dirty flag. 4) However, it's not over, there is still another case, nospace_cache. With nospace_cache, we do not want to set dirty flag, instead we just truncate free space cache inode and bail out with setting cache state DC_WRITTEN. We can benifit from it since it saves us another 'pre-allocation' part which usually costs a lot. Signed-off-by: Liu Bo Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 67bd12a52369..3ca26d84cce5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2903,8 +2903,13 @@ again: } spin_lock(&block_group->lock); - if (block_group->cached != BTRFS_CACHE_FINISHED) { - /* We're not cached, don't bother trying to write stuff out */ + if (block_group->cached != BTRFS_CACHE_FINISHED || + !btrfs_test_opt(root, SPACE_CACHE)) { + /* + * don't bother trying to write stuff out _if_ + * a) we're not cached, + * b) we're with nospace_cache mount option. + */ dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); goto out_put; @@ -7614,8 +7619,21 @@ int btrfs_read_block_groups(struct btrfs_root *root) INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - if (need_clear) + if (need_clear) { + /* + * When we mount with old space cache, we need to + * set BTRFS_DC_CLEAR and set dirty flag. + * + * a) Setting 'BTRFS_DC_CLEAR' makes sure that we + * truncate the old free space cache inode and + * setup a new one. + * b) Setting 'dirty flag' makes sure that we flush + * the new space cache info onto disk. + */ cache->disk_cache_state = BTRFS_DC_CLEAR; + if (btrfs_test_opt(root, SPACE_CACHE)) + cache->dirty = 1; + } read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), -- cgit v1.2.3 From 799ffc3c31de57d10a4b9abcfbfeea8771acc976 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 6 Jul 2012 03:31:35 -0600 Subject: Btrfs: add ro notification to dump_space_info Block group has ro attributes, make dump_space_info show it. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ca26d84cce5..7843542484c9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5825,13 +5825,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); - printk(KERN_INFO "block group %llu has %llu bytes, %llu used " - "%llu pinned %llu reserved\n", + printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", (unsigned long long)cache->key.objectid, (unsigned long long)cache->key.offset, (unsigned long long)btrfs_block_group_used(&cache->item), (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved); + (unsigned long long)cache->reserved, + cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } -- cgit v1.2.3 From f6175efab1e024554a104cca1f86134ef7ce06bc Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 6 Jul 2012 03:31:36 -0600 Subject: Btrfs: do not count in readonly bytes If a block group is ro, do not count its entries in when we dump space info. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6c4e2baa9290..6b10acfc2f5c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); - if (info->bytes >= bytes) + if (info->bytes >= bytes && !block_group->ro) count++; printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", (unsigned long long)info->offset, -- cgit v1.2.3 From e6466e354a5c23717325adecf387f93be4b9c830 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 4 Jul 2012 08:15:02 -0600 Subject: Btrfs: fix buffer leak in btrfs_next_old_leaf When calling btrfs_next_old_leaf, we were leaking an extent buffer in the rare case of using the deadlock avoidance code needed for the tree mod log. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8206b3900587..67fe46fdee6f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5127,6 +5127,7 @@ again: * locked. To solve this situation, we give up * on our lock and cycle. */ + free_extent_buffer(next); btrfs_release_path(path); cond_resched(); goto again; -- cgit v1.2.3 From d5b025d510664382f9a197f7e7fb9fc60fe209bc Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 2 Jul 2012 22:05:21 -0600 Subject: btrfs read error corrected message floods the console during recovery Changing printk_in_rcu to printk_ratelimited_in_rcu will suffice Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 01c21b6c6d43..f08206fcfb22 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1918,7 +1918,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, return -EIO; } - printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " + printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " "(dev %s sector %llu)\n", page->mapping->host->i_ino, start, rcu_str_deref(dev->name), sector); -- cgit v1.2.3 From b4d7c3c9456a311a45bc1ef8944b5ba5b176244f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 9 Jul 2012 20:21:07 -0600 Subject: Btrfs: kill free_space pointer from inode structure Inodes always allocate free space with BTRFS_BLOCK_GROUP_DATA type, which means every inode has the same BTRFS_I(inode)->free_space pointer. This shrinks struct btrfs_inode by 4 bytes (or 8 bytes on 64 bits). Signed-off-by: Li Zefan --- fs/btrfs/btrfs_inode.h | 3 --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 20 ++++++++------------ fs/btrfs/inode.c | 3 --- 4 files changed, 10 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 21b8cfe08e95..5b2ad6bc4fe7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -87,9 +87,6 @@ struct btrfs_inode { /* node for the red-black tree that links inodes in subvolume root */ struct rb_node rb_node; - /* the space_info for where this inode's data allocations are done */ - struct btrfs_space_info *space_info; - unsigned long runtime_flags; /* full 64 bit generation number, struct vfs_inode doesn't have a big diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa5c45b39075..6761490b91cd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1240,6 +1240,8 @@ struct btrfs_fs_info { */ struct list_head space_info; + struct btrfs_space_info *data_sinfo; + struct reloc_control *reloc_ctl; spinlock_t delalloc_lock; @@ -2607,7 +2609,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); -void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7843542484c9..6621ed72f3c3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3139,6 +3139,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, init_waitqueue_head(&found->wait); *space_info = found; list_add_rcu(&found->list, &info->space_info); + if (flags & BTRFS_BLOCK_GROUP_DATA) + info->data_sinfo = found; return 0; } @@ -3268,12 +3270,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) return get_alloc_profile(root, flags); } -void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) -{ - BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, - BTRFS_BLOCK_GROUP_DATA); -} - /* * This will check the space that the inode allocates from to make sure we have * enough space for bytes. @@ -3282,6 +3278,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; u64 used; int ret = 0, committed = 0, alloc_chunk = 1; @@ -3294,7 +3291,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) committed = 1; } - data_sinfo = BTRFS_I(inode)->space_info; + data_sinfo = fs_info->data_sinfo; if (!data_sinfo) goto alloc; @@ -3335,10 +3332,9 @@ alloc: goto commit_trans; } - if (!data_sinfo) { - btrfs_set_inode_space_info(root, inode); - data_sinfo = BTRFS_I(inode)->space_info; - } + if (!data_sinfo) + data_sinfo = fs_info->data_sinfo; + goto again; } @@ -3385,7 +3381,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - data_sinfo = BTRFS_I(inode)->space_info; + data_sinfo = root->fs_info->data_sinfo; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 321c415dea7f..ee45ebf42192 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4082,7 +4082,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - btrfs_set_inode_space_info(args->root, inode); return 0; } @@ -4667,7 +4666,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; inode->i_generation = BTRFS_I(inode)->generation; - btrfs_set_inode_space_info(root, inode); if (S_ISDIR(mode)) owner = 0; @@ -6944,7 +6942,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->root = NULL; - ei->space_info = NULL; ei->generation = 0; ei->last_trans = 0; ei->last_sub_trans = 0; -- cgit v1.2.3 From 293f7e07405a63975cee4e95a2cfa0c17b34b3aa Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 10 Jul 2012 00:58:58 -0600 Subject: Btrfs: zero unused bytes in inode item The otime field is not zeroed, so users will see random otime in an old filesystem with a new kernel which has otime support in the future. The reserved bytes are also not zeroed, and we'll have compatibility issue if we make use of those bytes. Signed-off-by: Li Zefan --- fs/btrfs/delayed-inode.c | 1 + fs/btrfs/inode.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 21d91a8073ee..335605c8ceab 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -62,6 +62,7 @@ static inline void btrfs_init_delayed_node( INIT_LIST_HEAD(&delayed_node->n_list); INIT_LIST_HEAD(&delayed_node->p_list); delayed_node->bytes_reserved = 0; + memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item)); } static inline int btrfs_is_continuous_delayed_item( diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ee45ebf42192..144f4642b2a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4693,6 +4693,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); + memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, + sizeof(*inode_item)); fill_inode_item(trans, path->nodes[0], inode_item, inode); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, -- cgit v1.2.3 From 18077bb413687f96bd168efcfb2b8778529e3b74 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 9 Jul 2012 20:22:35 -0600 Subject: Btrfs: rewrite BTRFS_SETGET_FUNCS BTRFS_SETGET_FUNCS macro is used to generate btrfs_set_foo() and btrfs_foo() functions, which read and write specific fields in the extent buffer. The total number of set/get functions is ~200, but in fact we only need 8 functions: 2 for u8 field, 2 for u16, 2 for u32 and 2 for u64. It results in redunction of ~37K bytes. text data bss dec hex filename 629661 12489 216 642366 9cd3e fs/btrfs/btrfs.o.orig 592637 12489 216 605342 93c9e fs/btrfs/btrfs.o Signed-off-by: Li Zefan --- fs/btrfs/ctree.h | 53 +++++++++++-- fs/btrfs/struct-funcs.c | 196 ++++++++++++++++++++++++------------------------ 2 files changed, 146 insertions(+), 103 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6761490b91cd..a0ee2f8e0566 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1623,13 +1623,54 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) offsetof(type, member), \ sizeof(((type *)0)->member))) -#ifndef BTRFS_SETGET_FUNCS +#define DECLARE_BTRFS_SETGET_BITS(bits) \ +u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ + unsigned long off, \ + struct btrfs_map_token *token); \ +void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ + unsigned long off, u##bits val, \ + struct btrfs_map_token *token); \ +static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ + unsigned long off) \ +{ \ + return btrfs_get_token_##bits(eb, ptr, off, NULL); \ +} \ +static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ + unsigned long off, u##bits val) \ +{ \ + btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ +} + +DECLARE_BTRFS_SETGET_BITS(8) +DECLARE_BTRFS_SETGET_BITS(16) +DECLARE_BTRFS_SETGET_BITS(32) +DECLARE_BTRFS_SETGET_BITS(64) + #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ -u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \ -void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\ -void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); -#endif +static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ +{ \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + return btrfs_get_##bits(eb, s, offsetof(type, member)); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \ + u##bits val) \ +{ \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + btrfs_set_##bits(eb, s, offsetof(type, member), val); \ +} \ +static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ + struct btrfs_map_token *token) \ +{ \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \ +} \ +static inline void btrfs_set_token_##name(struct extent_buffer *eb, \ + type *s, u##bits val, \ + struct btrfs_map_token *token) \ +{ \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \ +} #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c6ffa5812419..b976597b0721 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -17,15 +17,27 @@ */ #include +#include -/* this is some deeply nasty code. ctree.h has a different - * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef +#include "ctree.h" + +static inline u8 get_unaligned_le8(const void *p) +{ + return *(u8 *)p; +} + +static inline void put_unaligned_le8(u8 val, void *p) +{ + *(u8 *)p = val; +} + +/* + * this is some deeply nasty code. * * The end result is that anyone who #includes ctree.h gets a - * declaration for the btrfs_set_foo functions and btrfs_foo functions - * - * This file declares the macros and then #includes ctree.h, which results - * in cpp creating the function here based on the template below. + * declaration for the btrfs_set_foo functions and btrfs_foo functions, + * which are wappers of btrfs_set_token_#bits functions and + * btrfs_get_token_#bits functions, which are defined in this file. * * These setget functions do all the extent_buffer related mapping * required to efficiently read and write specific fields in the extent @@ -33,103 +45,93 @@ * an unsigned long offset into the extent buffer which has been * cast to a specific type. This gives us all the gcc type checking. * - * The extent buffer api is used to do all the kmapping and page - * spanning work required to get extent buffers in highmem and have - * a metadata blocksize different from the page size. - * - * The macro starts with a simple function prototype declaration so that - * sparse won't complain about it being static. + * The extent buffer api is used to do the page spanning work required to + * have a metadata blocksize different from the page size. */ -#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ -void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ -void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \ -u##bits btrfs_token_##name(struct extent_buffer *eb, \ - type *s, struct btrfs_map_token *token) \ +#define DEFINE_BTRFS_SETGET_BITS(bits) \ +u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ + unsigned long off, \ + struct btrfs_map_token *token) \ { \ - unsigned long part_offset = (unsigned long)s; \ - unsigned long offset = part_offset + offsetof(type, member); \ - type *p; \ - int err; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - unsigned long mem_len = sizeof(((type *)0)->member); \ - u##bits res; \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ - kaddr = token->kaddr; \ - p = (type *)(kaddr + part_offset - token->offset); \ - res = le##bits##_to_cpu(p->member); \ - return res; \ - } \ - err = map_private_extent_buffer(eb, offset, \ - mem_len, \ - &kaddr, &map_start, &map_len); \ - if (err) { \ - __le##bits leres; \ - read_eb_member(eb, s, type, member, &leres); \ - return le##bits##_to_cpu(leres); \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - res = le##bits##_to_cpu(p->member); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ - return res; \ + unsigned long part_offset = (unsigned long)ptr; \ + unsigned long offset = part_offset + off; \ + void *p; \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + int size = sizeof(u##bits); \ + u##bits res; \ + \ + if (token && token->kaddr && token->offset <= offset && \ + token->eb == eb && \ + (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ + kaddr = token->kaddr; \ + p = kaddr + part_offset - token->offset; \ + res = get_unaligned_le##bits(p + off); \ + return res; \ + } \ + err = map_private_extent_buffer(eb, offset, size, \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits leres; \ + \ + read_extent_buffer(eb, &leres, offset, size); \ + return le##bits##_to_cpu(leres); \ + } \ + p = kaddr + part_offset - map_start; \ + res = get_unaligned_le##bits(p + off); \ + if (token) { \ + token->kaddr = kaddr; \ + token->offset = map_start; \ + token->eb = eb; \ + } \ + return res; \ } \ -void btrfs_set_token_##name(struct extent_buffer *eb, \ - type *s, u##bits val, struct btrfs_map_token *token) \ +void btrfs_set_token_##bits(struct extent_buffer *eb, \ + void *ptr, unsigned long off, u##bits val, \ + struct btrfs_map_token *token) \ { \ - unsigned long part_offset = (unsigned long)s; \ - unsigned long offset = part_offset + offsetof(type, member); \ - type *p; \ - int err; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - unsigned long mem_len = sizeof(((type *)0)->member); \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ - kaddr = token->kaddr; \ - p = (type *)(kaddr + part_offset - token->offset); \ - p->member = cpu_to_le##bits(val); \ - return; \ - } \ - err = map_private_extent_buffer(eb, offset, \ - mem_len, \ - &kaddr, &map_start, &map_len); \ - if (err) { \ - __le##bits val2; \ - val2 = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val2); \ - return; \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - p->member = cpu_to_le##bits(val); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ -} \ -void btrfs_set_##name(struct extent_buffer *eb, \ - type *s, u##bits val) \ -{ \ - btrfs_set_token_##name(eb, s, val, NULL); \ -} \ -u##bits btrfs_##name(struct extent_buffer *eb, \ - type *s) \ -{ \ - return btrfs_token_##name(eb, s, NULL); \ -} \ + unsigned long part_offset = (unsigned long)ptr; \ + unsigned long offset = part_offset + off; \ + void *p; \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + int size = sizeof(u##bits); \ + \ + if (token && token->kaddr && token->offset <= offset && \ + token->eb == eb && \ + (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ + kaddr = token->kaddr; \ + p = kaddr + part_offset - token->offset; \ + put_unaligned_le##bits(val, p + off); \ + return; \ + } \ + err = map_private_extent_buffer(eb, offset, size, \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits val2; \ + \ + val2 = cpu_to_le##bits(val); \ + write_extent_buffer(eb, &val2, offset, size); \ + return; \ + } \ + p = kaddr + part_offset - map_start; \ + put_unaligned_le##bits(val, p + off); \ + if (token) { \ + token->kaddr = kaddr; \ + token->offset = map_start; \ + token->eb = eb; \ + } \ +} -#include "ctree.h" +DEFINE_BTRFS_SETGET_BITS(8) +DEFINE_BTRFS_SETGET_BITS(16) +DEFINE_BTRFS_SETGET_BITS(32) +DEFINE_BTRFS_SETGET_BITS(64) void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) -- cgit v1.2.3 From 5021976d8dd6d94248026631bfa4578aacd7b563 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 17 Jul 2012 09:02:10 -0600 Subject: Btrfs: remove unwanted printk() for btrfs device I/O stats People complained about the annoying kernel log message "btrfs: no dev_stats entry found ... (OK on first mount after mkfs)" everytime a filesystem is mounted for the first time after running mkfs. Since the distribution of the btrfs-progs is not synchronized to the kernel version, mkfs like it is now will be used also in the future. Then this message is not useful to find errors, it is just annoying. This commit removes the printk(). Signed-off-by: Stefan Behrens --- fs/btrfs/volumes.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 39a0d04759f8..14436074350f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4743,9 +4743,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) key.offset = device->devid; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { - printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", - rcu_str_deref(device->name), - (unsigned long long)device->devid); __btrfs_reset_dev_stats(device); device->dev_stats_valid = 1; btrfs_release_path(path); -- cgit v1.2.3 From a98cdb85b990765dbe80a215367ae007320bfeea Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 17 Jul 2012 09:02:11 -0600 Subject: Btrfs: suppress printk() if all device I/O stats are zero Code is added to suppress the I/O stats printing at mount time if all statistic values are zero. Signed-off-by: Stefan Behrens --- fs/btrfs/volumes.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 14436074350f..b8708f994e67 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4884,6 +4884,14 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) { + int i; + + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) + if (btrfs_dev_stat_read(dev, i) != 0) + break; + if (i == BTRFS_DEV_STAT_VALUES_MAX) + return; /* all values == 0, suppress message */ + printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), -- cgit v1.2.3 From e64860aa05048fa7a8483ca698b17c2caf5625cf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Jul 2012 16:05:36 -0400 Subject: Btrfs: don't return true in releasepage unless we actually freed the eb I noticed while looking at an extent_buffer race that we will unconditionally return 1 if we get down to release_extent_buffer after clearing the tree ref. However we can easily race in here and get a ref on the eb and not actually free the eb. So make release_extent_buffer return 1 if it free'd the eb and 0 if not so we can be a little kinder to the vm. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f08206fcfb22..e6243f787434 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4300,7 +4300,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) } /* Expects to have eb->eb_lock already held */ -static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) +static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { @@ -4321,9 +4321,11 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) btrfs_release_extent_buffer_page(eb, 0); call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); - return; + return 1; } spin_unlock(&eb->refs_lock); + + return 0; } void free_extent_buffer(struct extent_buffer *eb) @@ -4962,7 +4964,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask) spin_unlock(&eb->refs_lock); return 0; } - release_extent_buffer(eb, mask); - return 1; + return release_extent_buffer(eb, mask); } -- cgit v1.2.3 From 594831c4b232b094d645503ecedec2e35dcebdf3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Jul 2012 16:11:08 -0400 Subject: Btrfs: fix potential race in extent buffer freeing This sounds sort of impossible but it is the only thing I can think of and at the very least it is theoretically possible so here it goes. If we are in try_release_extent_buffer we will check that the ref count on the extent buffer is 1 and not under IO, and then go down and clear the tree ref. If between this check and clearing the tree ref somebody else comes in and grabs a ref on the eb and the marks it dirty before try_release_extent_buffer() does it's tree ref clear we can end up with a dirty eb that will be freed while it is still dirty which will result in a panic. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e6243f787434..e1939a6c7478 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4123,11 +4123,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) * So bump the ref count first, then set the bit. If someone * beat us to it, drop the ref we added. */ - if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { + spin_lock(&eb->refs_lock); + if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) atomic_inc(&eb->refs); - if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); - } + spin_unlock(&eb->refs_lock); } static void mark_extent_buffer_accessed(struct extent_buffer *eb) @@ -4239,9 +4238,7 @@ again: goto free_eb; } /* add one reference for the tree */ - spin_lock(&eb->refs_lock); check_buffer_tree_ref(eb); - spin_unlock(&eb->refs_lock); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); -- cgit v1.2.3 From 51561ffec9614618f3da362f9d1b03a95b717484 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Jul 2012 16:25:24 -0400 Subject: Btrfs: lock the transition from dirty to writeback for an eb There is a small window where an eb can have no IO bits set on it, which could potentially result in extent_buffer_under_io() returning false when we want it to return true, which could result in not fun things happening. So in order to protect this case we need to hold the refs_lock when we make this transition to make sure we get reliable results out of extent_buffer_udner_io(). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e1939a6c7478..97efc2f22597 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3077,8 +3077,15 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, } } + /* + * We need to do this to prevent races in people who check if the eb is + * under IO since we can end up having no IO bits set for a short period + * of time. + */ + spin_lock(&eb->refs_lock); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); + spin_unlock(&eb->refs_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); spin_lock(&fs_info->delalloc_lock); if (fs_info->dirty_metadata_bytes >= eb->len) @@ -3087,6 +3094,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, WARN_ON(1); spin_unlock(&fs_info->delalloc_lock); ret = 1; + } else { + spin_unlock(&eb->refs_lock); } btrfs_tree_unlock(eb); -- cgit v1.2.3 From df57dbe6bf73cc44305d81c24982a11da49b1f79 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 23 Jul 2012 05:50:03 -0600 Subject: Btrfs: make btrfs's allocation smoothly with preallocation For backref walking, we've introduce delayed ref's sequence. However, it changes our preallocation behavior. The story is that when we preallocate an extent and then mark it written piece by piece, the ideal case should be that we don't need to COW the extent, which is why we use 'preallocate'. But we may not make use of preallocation, since when we check for cross refs on the extent, we may have two ref entries which have the same content except the sequence value, and we recognize them as cross refs and do COW to allocate another extent. So we end up with several pieces of space instead of an whole extent. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6621ed72f3c3..71b2d1c7da69 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2581,8 +2581,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, node = rb_prev(node); if (node) { + int seq = ref->seq; + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr == bytenr) + if (ref->bytenr == bytenr && ref->seq == seq) goto out_unlock; } -- cgit v1.2.3 From 67c9684f48ea9cbc5e9b8a1feb3151800e9dcc22 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 20 Jul 2012 21:43:09 -0600 Subject: Btrfs: improve multi-thread buffer read While testing with my buffer read fio jobs[1], I find that btrfs does not perform well enough. Here is a scenario in fio jobs: We have 4 threads, "t1 t2 t3 t4", starting to buffer read a same file, and all of them will race on add_to_page_cache_lru(), and if one thread successfully puts its page into the page cache, it takes the responsibility to read the page's data. And what's more, reading a page needs a period of time to finish, in which other threads can slide in and process rest pages: t1 t2 t3 t4 add Page1 read Page1 add Page2 | read Page2 add Page3 | | read Page3 add Page4 | | | read Page4 -----|------------|-----------|-----------|-------- v v v v bio bio bio bio Now we have four bios, each of which holds only one page since we need to maintain consecutive pages in bio. Thus, we can end up with far more bios than we need. Here we're going to a) delay the real read-page section and b) try to put more pages into page cache. With that said, we can make each bio hold more pages and reduce the number of bios we need. Here is some numbers taken from fio results: w/o patch w patch ------------- -------- --------------- READ: 745MB/s +25% 934MB/s [1]: [global] group_reporting thread numjobs=4 bs=32k rw=read ioengine=sync directory=/mnt/btrfs/ [READ] filename=foobar size=2000M invalidate=1 Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 97efc2f22597..3e7c9ed6505b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3566,19 +3566,38 @@ int extent_readpages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned page_idx; unsigned long bio_flags = 0; + struct page *pagepool[16]; + struct page *page; + int i = 0; + int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); + page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, + if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { - __extent_read_full_page(tree, page, get_extent, - &bio, 0, &bio_flags); + page_cache_release(page); + continue; } - page_cache_release(page); + + pagepool[nr++] = page; + if (nr < ARRAY_SIZE(pagepool)) + continue; + for (i = 0; i < nr; i++) { + __extent_read_full_page(tree, pagepool[i], get_extent, + &bio, 0, &bio_flags); + page_cache_release(pagepool[i]); + } + nr = 0; } + for (i = 0; i < nr; i++) { + __extent_read_full_page(tree, pagepool[i], get_extent, + &bio, 0, &bio_flags); + page_cache_release(pagepool[i]); + } + BUG_ON(!list_empty(pages)); if (bio) return submit_one_bio(READ, bio, 0, bio_flags); -- cgit v1.2.3 From 00401ff780c58b9dabffef668386c206efc71c7c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:14:28 -0400 Subject: cifs: after upcalling for krb5 creds, invalidate key rather than revoking it Calling key_revoke here isn't ideal as further requests for the key will end up returning -EKEYREVOKED until it gets purged from the cache. What we really intend here is to force a new upcall on the next request_key. Cc: David Howells Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 551d0c2b9736..b4219789049a 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -938,7 +938,7 @@ ssetup_ntlmssp_authenticate: ssetup_exit: if (spnego_key) { - key_revoke(spnego_key); + key_invalidate(spnego_key); key_put(spnego_key); } kfree(str_area); -- cgit v1.2.3 From ac3aa2f8ae29c186c4742d15e39712af417c6d68 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:14:28 -0400 Subject: cifs: remove extraneous newlines from cERROR and cFYI calls Those macros add a newline on their own, so there's not any need to embed one in the message itself. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cache.c | 2 +- fs/cifs/cifsacl.c | 6 +++--- fs/cifs/cifsencrypt.c | 48 ++++++++++++++++++++++++------------------------ fs/cifs/cifssmb.c | 10 +++++----- fs/cifs/connect.c | 14 +++++++------- fs/cifs/file.c | 2 +- fs/cifs/link.c | 14 +++++++------- fs/cifs/smbencrypt.c | 14 +++++++------- 8 files changed, 55 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 545509c3313b..282d6de7e410 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, sharename = extract_sharename(tcon->treeName); if (IS_ERR(sharename)) { - cFYI(1, "%s: couldn't extract sharename\n", __func__); + cFYI(1, "%s: couldn't extract sharename", __func__); sharename = NULL; return 0; } diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 3cc1b251ca08..1885da4fca82 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -525,7 +525,7 @@ init_cifs_idmap(void) struct key *keyring; int ret; - cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name); + cFYI(1, "Registering the %s key type", cifs_idmap_key_type.name); /* create an override credential set with a special thread keyring in * which requests are cached @@ -572,7 +572,7 @@ init_cifs_idmap(void) sidgidtree = RB_ROOT; register_shrinker(&cifs_shrinker); - cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring)); + cFYI(1, "cifs idmap keyring: %d", key_serial(keyring)); return 0; failed_put_key: @@ -589,7 +589,7 @@ exit_cifs_idmap(void) unregister_key_type(&cifs_idmap_key_type); put_cred(root_cred); unregister_shrinker(&cifs_shrinker); - cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name); + cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name); } void diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 63c460e503b6..db768cb67e38 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -47,20 +47,20 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, return -EINVAL; if (!server->secmech.sdescmd5) { - cERROR(1, "%s: Can't generate signature\n", __func__); + cERROR(1, "%s: Can't generate signature", __func__); return -1; } rc = crypto_shash_init(&server->secmech.sdescmd5->shash); if (rc) { - cERROR(1, "%s: Could not init md5\n", __func__); + cERROR(1, "%s: Could not init md5", __func__); return rc; } rc = crypto_shash_update(&server->secmech.sdescmd5->shash, server->session_key.response, server->session_key.len); if (rc) { - cERROR(1, "%s: Could not update with response\n", __func__); + cERROR(1, "%s: Could not update with response", __func__); return rc; } @@ -85,7 +85,7 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, iov[i].iov_base, iov[i].iov_len); } if (rc) { - cERROR(1, "%s: Could not update with payload\n", + cERROR(1, "%s: Could not update with payload", __func__); return rc; } @@ -93,7 +93,7 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); if (rc) - cERROR(1, "%s: Could not generate md5 hash\n", __func__); + cERROR(1, "%s: Could not generate md5 hash", __func__); return rc; } @@ -399,7 +399,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, wchar_t *server; if (!ses->server->secmech.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); + cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash"); return -1; } @@ -415,7 +415,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { - cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n"); + cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5"); return rc; } @@ -423,7 +423,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { - cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); + cERROR(1, "calc_ntlmv2_hash: user mem alloc failure"); rc = -ENOMEM; return rc; } @@ -439,7 +439,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)user, 2 * len); kfree(user); if (rc) { - cERROR(1, "%s: Could not update with user\n", __func__); + cERROR(1, "%s: Could not update with user", __func__); return rc; } @@ -460,7 +460,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)domain, 2 * len); kfree(domain); if (rc) { - cERROR(1, "%s: Could not update with domain\n", + cERROR(1, "%s: Could not update with domain", __func__); return rc; } @@ -480,7 +480,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)server, 2 * len); kfree(server); if (rc) { - cERROR(1, "%s: Could not update with server\n", + cERROR(1, "%s: Could not update with server", __func__); return rc; } @@ -489,7 +489,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ntlmv2_hash); if (rc) - cERROR(1, "%s: Could not generate md5 hash\n", __func__); + cERROR(1, "%s: Could not generate md5 hash", __func__); return rc; } @@ -501,7 +501,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) unsigned int offset = CIFS_SESS_KEY_SIZE + 8; if (!ses->server->secmech.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); + cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash"); return -1; } @@ -527,14 +527,14 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + offset, ses->auth_key.len - offset); if (rc) { - cERROR(1, "%s: Could not update with response\n", __func__); + cERROR(1, "%s: Could not update with response", __func__); return rc; } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + CIFS_SESS_KEY_SIZE); if (rc) - cERROR(1, "%s: Could not generate md5 hash\n", __func__); + cERROR(1, "%s: Could not generate md5 hash", __func__); return rc; } @@ -613,7 +613,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { - cERROR(1, "%s: Could not init hmacmd5\n", __func__); + cERROR(1, "%s: Could not init hmacmd5", __func__); goto setup_ntlmv2_rsp_ret; } @@ -621,14 +621,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response + CIFS_SESS_KEY_SIZE, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cERROR(1, "%s: Could not update with response\n", __func__); + cERROR(1, "%s: Could not update with response", __func__); goto setup_ntlmv2_rsp_ret; } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response); if (rc) - cERROR(1, "%s: Could not generate md5 hash\n", __func__); + cERROR(1, "%s: Could not generate md5 hash", __func__); setup_ntlmv2_rsp_ret: kfree(tiblob); @@ -650,7 +650,7 @@ calc_seckey(struct cifs_ses *ses) tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_arc4)) { rc = PTR_ERR(tfm_arc4); - cERROR(1, "could not allocate crypto API arc4\n"); + cERROR(1, "could not allocate crypto API arc4"); return rc; } @@ -668,7 +668,7 @@ calc_seckey(struct cifs_ses *ses) rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); if (rc) { - cERROR(1, "could not encrypt session key rc: %d\n", rc); + cERROR(1, "could not encrypt session key rc: %d", rc); crypto_free_blkcipher(tfm_arc4); return rc; } @@ -705,13 +705,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); if (IS_ERR(server->secmech.hmacmd5)) { - cERROR(1, "could not allocate crypto hmacmd5\n"); + cERROR(1, "could not allocate crypto hmacmd5"); return PTR_ERR(server->secmech.hmacmd5); } server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(server->secmech.md5)) { - cERROR(1, "could not allocate crypto md5\n"); + cERROR(1, "could not allocate crypto md5"); rc = PTR_ERR(server->secmech.md5); goto crypto_allocate_md5_fail; } @@ -720,7 +720,7 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) crypto_shash_descsize(server->secmech.hmacmd5); server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); if (!server->secmech.sdeschmacmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n"); + cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5"); rc = -ENOMEM; goto crypto_allocate_hmacmd5_sdesc_fail; } @@ -732,7 +732,7 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) crypto_shash_descsize(server->secmech.md5); server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); if (!server->secmech.sdescmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n"); + cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5"); rc = -ENOMEM; goto crypto_allocate_md5_sdesc_fail; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4ee522b3f66f..684a0723021f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4137,7 +4137,7 @@ UnixQFileInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" + cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. " "Unix Extensions can be disabled on mount " "by specifying the nosfu mount option."); rc = -EIO; /* bad smb */ @@ -4223,7 +4223,7 @@ UnixQPathInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" + cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. " "Unix Extensions can be disabled on mount " "by specifying the nosfu mount option."); rc = -EIO; /* bad smb */ @@ -4675,7 +4675,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, if (*num_of_nodes < 1) { cERROR(1, "num_referrals: must be at least > 0," - "but we get num_referrals = %d\n", *num_of_nodes); + "but we get num_referrals = %d", *num_of_nodes); rc = -EINVAL; goto parse_DFS_referrals_exit; } @@ -4692,14 +4692,14 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, data_end = (char *)(&(pSMBr->PathConsumed)) + le16_to_cpu(pSMBr->t2.DataCount); - cFYI(1, "num_referrals: %d dfs flags: 0x%x ...\n", + cFYI(1, "num_referrals: %d dfs flags: 0x%x ...", *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags)); *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * *num_of_nodes, GFP_KERNEL); if (*target_nodes == NULL) { - cERROR(1, "Failed to allocate buffer for target_nodes\n"); + cERROR(1, "Failed to allocate buffer for target_nodes"); rc = -ENOMEM; goto parse_DFS_referrals_exit; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 94b7788c3189..e8c3e6b2af8a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2412,7 +2412,7 @@ cifs_put_smb_ses(struct cifs_ses *ses) int xid; struct TCP_Server_Info *server = ses->server; - cFYI(1, "%s: ses_count=%d\n", __func__, ses->ses_count); + cFYI(1, "%s: ses_count=%d", __func__, ses->ses_count); spin_lock(&cifs_tcp_ses_lock); if (--ses->ses_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -2700,7 +2700,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) int xid; struct cifs_ses *ses = tcon->ses; - cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count); + cFYI(1, "%s: tc_count=%d", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); if (--tcon->tc_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -3009,11 +3009,11 @@ bind_socket(struct TCP_Server_Info *server) saddr6 = (struct sockaddr_in6 *)&server->srcaddr; if (saddr6->sin6_family == AF_INET6) cERROR(1, "cifs: " - "Failed to bind to: %pI6c, error: %d\n", + "Failed to bind to: %pI6c, error: %d", &saddr6->sin6_addr, rc); else cERROR(1, "cifs: " - "Failed to bind to: %pI4, error: %d\n", + "Failed to bind to: %pI4, error: %d", &saddr4->sin_addr.s_addr, rc); } } @@ -3304,9 +3304,9 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, cFYI(1, "resetting capabilities failed"); } else cERROR(1, "Negotiating Unix capabilities " - "with the server failed. Consider " - "mounting with the Unix Extensions\n" - "disabled, if problems are found, " + "with the server failed. Consider " + "mounting with the Unix Extensions " + "disabled if problems are found " "by specifying the nounix mount " "option."); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 513adbc211d7..dd28caa0a5ce 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3169,7 +3169,7 @@ static int cifs_readpage(struct file *file, struct page *page) return rc; } - cFYI(1, "readpage %p at offset %d 0x%x\n", + cFYI(1, "readpage %p at offset %d 0x%x", page, (int)offset, (int)offset); rc = cifs_readpage_worker(file, page, &offset); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 6b0e06434391..90d8add2a2a9 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -56,14 +56,14 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(md5)) { rc = PTR_ERR(md5); - cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); + cERROR(1, "%s: Crypto md5 allocation error %d", __func__, rc); return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); sdescmd5 = kmalloc(size, GFP_KERNEL); if (!sdescmd5) { rc = -ENOMEM; - cERROR(1, "%s: Memory allocation failure\n", __func__); + cERROR(1, "%s: Memory allocation failure", __func__); goto symlink_hash_err; } sdescmd5->shash.tfm = md5; @@ -71,17 +71,17 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) rc = crypto_shash_init(&sdescmd5->shash); if (rc) { - cERROR(1, "%s: Could not init md5 shash\n", __func__); + cERROR(1, "%s: Could not init md5 shash", __func__); goto symlink_hash_err; } rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); if (rc) { - cERROR(1, "%s: Could not update iwth link_str\n", __func__); + cERROR(1, "%s: Could not update iwth link_str", __func__); goto symlink_hash_err; } rc = crypto_shash_final(&sdescmd5->shash, md5_hash); if (rc) - cERROR(1, "%s: Could not generate md5 hash\n", __func__); + cERROR(1, "%s: Could not generate md5 hash", __func__); symlink_hash_err: crypto_free_shash(md5); @@ -115,7 +115,7 @@ CIFSParseMFSymlink(const u8 *buf, rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { - cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + cFYI(1, "%s: MD5 hash failure: %d", __func__, rc); return rc; } @@ -154,7 +154,7 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { - cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + cFYI(1, "%s: MD5 hash failure: %d", __func__, rc); return rc; } diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index d5cd9aa7eacc..a0a58fbe2c10 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -78,7 +78,7 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_des)) { rc = PTR_ERR(tfm_des); - cERROR(1, "could not allocate des crypto API\n"); + cERROR(1, "could not allocate des crypto API"); goto smbhash_err; } @@ -91,7 +91,7 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); if (rc) - cERROR(1, "could not encrypt crypt key rc: %d\n", rc); + cERROR(1, "could not encrypt crypt key rc: %d", rc); crypto_free_blkcipher(tfm_des); smbhash_err: @@ -139,14 +139,14 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) md4 = crypto_alloc_shash("md4", 0, 0); if (IS_ERR(md4)) { rc = PTR_ERR(md4); - cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc); + cERROR(1, "%s: Crypto md4 allocation error %d", __func__, rc); return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md4); sdescmd4 = kmalloc(size, GFP_KERNEL); if (!sdescmd4) { rc = -ENOMEM; - cERROR(1, "%s: Memory allocation failure\n", __func__); + cERROR(1, "%s: Memory allocation failure", __func__); goto mdfour_err; } sdescmd4->shash.tfm = md4; @@ -154,17 +154,17 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) rc = crypto_shash_init(&sdescmd4->shash); if (rc) { - cERROR(1, "%s: Could not init md4 shash\n", __func__); + cERROR(1, "%s: Could not init md4 shash", __func__); goto mdfour_err; } rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len); if (rc) { - cERROR(1, "%s: Could not update with link_str\n", __func__); + cERROR(1, "%s: Could not update with link_str", __func__); goto mdfour_err; } rc = crypto_shash_final(&sdescmd4->shash, md4_hash); if (rc) - cERROR(1, "%s: Could not genereate md4 hash\n", __func__); + cERROR(1, "%s: Could not genereate md4 hash", __func__); mdfour_err: crypto_free_shash(md4); -- cgit v1.2.3 From c5fd363d771393a7b42bbbe051f30f97d4867a40 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:28:37 -0400 Subject: cifs: move file_lock off stack in cifs_push_posix_locks struct file_lock is pretty large, so we really don't want that on the stack in a potentially long call chain. Reorganize the arguments to CIFSSMBPosixLock to eliminate the need for that. Eliminate the get_flag and simply use a non-NULL pLockInfo to indicate that this is a "get" operation. In order to do that, need to add a new loff_t argument for the start_offset. Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 5 +++-- fs/cifs/cifssmb.c | 16 +++++++--------- fs/cifs/file.c | 8 +++----- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0a6cbfe2761e..baa1b6dc838e 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -386,8 +386,9 @@ extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon, const bool waitFlag, const __u8 oplock_level); extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, const __u16 smb_file_id, const __u32 netpid, - const int get_flag, const __u64 len, struct file_lock *, - const __u16 lock_type, const bool waitFlag); + const loff_t start_offset, const __u64 len, + struct file_lock *, const __u16 lock_type, + const bool waitFlag); extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon); extern int CIFSSMBEcho(struct TCP_Server_Info *server); extern int CIFSSMBLogoff(const int xid, struct cifs_ses *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 684a0723021f..5659850f780a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2356,9 +2356,10 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, - const __u16 smb_file_id, const __u32 netpid, const int get_flag, - const __u64 len, struct file_lock *pLockData, - const __u16 lock_type, const bool waitFlag) + const __u16 smb_file_id, const __u32 netpid, + const loff_t start_offset, const __u64 len, + struct file_lock *pLockData, const __u16 lock_type, + const bool waitFlag) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -2372,9 +2373,6 @@ CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, cFYI(1, "Posix Lock"); - if (pLockData == NULL) - return -EINVAL; - rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -2395,7 +2393,7 @@ CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */ pSMB->SetupCount = 1; pSMB->Reserved3 = 0; - if (get_flag) + if (pLockData) pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FILE_INFORMATION); else pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); @@ -2417,7 +2415,7 @@ CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, pSMB->Timeout = 0; parm_data->pid = cpu_to_le32(netpid); - parm_data->start = cpu_to_le64(pLockData->fl_start); + parm_data->start = cpu_to_le64(start_offset); parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ pSMB->DataOffset = cpu_to_le16(offset); @@ -2441,7 +2439,7 @@ CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, if (rc) { cFYI(1, "Send error in Posix Lock = %d", rc); - } else if (get_flag) { + } else if (pLockData) { /* lock structure can be returned on get */ __u16 data_offset; __u16 data_count; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index dd28caa0a5ce..e9a8ac0047c7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1039,12 +1039,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) unlock_flocks(); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { - struct file_lock tmp_lock; int stored_rc; - tmp_lock.fl_start = lck->offset; stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, - 0, lck->length, &tmp_lock, + lck->offset, lck->length, NULL, lck->type, 0); if (stored_rc) rc = stored_rc; @@ -1159,7 +1157,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, else posix_lock_type = CIFS_WRLCK; rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, - 1 /* get */, length, flock, + flock->fl_start, length, flock, posix_lock_type, wait_flag); return rc; } @@ -1353,7 +1351,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, - 0 /* set */, length, flock, + flock->fl_start, length, NULL, posix_lock_type, wait_flag); goto out; } -- cgit v1.2.3 From d971e0656bf6e57925292dae755d36fb3c5b093d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:28:37 -0400 Subject: cifs: remove bogus reset of smb_buf_length in smb_send routines There's a comment here about how we don't want to modify this length, but nothing in this function actually does. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index f25d4ea14be4..fdda15a6a803 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -126,7 +126,6 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) int rc = 0; int i = 0; struct msghdr smb_msg; - __be32 *buf_len = (__be32 *)(iov[0].iov_base); unsigned int len = iov[0].iov_len; unsigned int total_len; int first_vec = 0; @@ -235,9 +234,6 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) else rc = 0; - /* Don't want to modify the buffer as a side effect of this call. */ - *buf_len = cpu_to_be32(smb_buf_length); - return rc; } -- cgit v1.2.3 From 762a4206a3f415db0fcf2c7aed9a5e91deff221d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:28:38 -0400 Subject: cifs: rename cifs_sign_smb2 to cifs_sign_smbv "smb2" makes me think of the SMB2.x protocol, which isn't at all what this function is for... Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 4 ++-- fs/cifs/cifsproto.h | 2 +- fs/cifs/transport.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index db768cb67e38..6a0d741159f0 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -99,7 +99,7 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, } /* must be called with server->srv_mutex held */ -int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, +int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { int rc = 0; @@ -143,7 +143,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, iov.iov_base = cifs_pdu; iov.iov_len = be32_to_cpu(cifs_pdu->smb_buf_length) + 4; - return cifs_sign_smb2(&iov, 1, server, + return cifs_sign_smbv(&iov, 1, server, pexpected_response_sequence_number); } diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index baa1b6dc838e..613320c9a780 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -399,7 +399,7 @@ extern struct cifs_tcon *tconInfoAlloc(void); extern void tconInfoFree(struct cifs_tcon *); extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); -extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, +extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, struct TCP_Server_Info *server, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fdda15a6a803..3e6ffe355384 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -361,7 +361,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); + rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number); if (rc) { DeleteMidQEntry(mid); return rc; @@ -524,7 +524,7 @@ cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, rc = allocate_mid(ses, hdr, &mid); if (rc) return rc; - rc = cifs_sign_smb2(iov, nvec, ses->server, &mid->sequence_number); + rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); if (rc) delete_mid(mid); *ret_mid = mid; -- cgit v1.2.3 From 7659624ffb550d69c87f9af9ae63e717daa874bd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 20:34:17 -0400 Subject: cifs: reinstate sec=ntlmv2 mount option sec=ntlmv2 as a mount option got dropped in the mount option overhaul. Cc: Sachin Prabhu Cc: # 3.4+ Reported-by: Günter Kukkukk Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e8c3e6b2af8a..a675b7f47d63 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -238,8 +238,8 @@ static const match_table_t cifs_mount_option_tokens = { enum { Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, Opt_sec_ntlmsspi, Opt_sec_ntlmssp, - Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2i, - Opt_sec_nontlm, Opt_sec_lanman, + Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2, + Opt_sec_ntlmv2i, Opt_sec_lanman, Opt_sec_none, Opt_sec_err @@ -253,8 +253,9 @@ static const match_table_t cifs_secflavor_tokens = { { Opt_sec_ntlmssp, "ntlmssp" }, { Opt_ntlm, "ntlm" }, { Opt_sec_ntlmi, "ntlmi" }, + { Opt_sec_ntlmv2, "nontlm" }, + { Opt_sec_ntlmv2, "ntlmv2" }, { Opt_sec_ntlmv2i, "ntlmv2i" }, - { Opt_sec_nontlm, "nontlm" }, { Opt_sec_lanman, "lanman" }, { Opt_sec_none, "none" }, @@ -1167,7 +1168,7 @@ static int cifs_parse_security_flavors(char *value, case Opt_sec_ntlmi: vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN; break; - case Opt_sec_nontlm: + case Opt_sec_ntlmv2: vol->secFlg |= CIFSSEC_MAY_NTLMV2; break; case Opt_sec_ntlmv2i: -- cgit v1.2.3 From 316cf94a910f6f93d43cc574359d163ccae098a3 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 May 2012 14:31:03 +0400 Subject: CIFS: Move trans2 processing to ops struct Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 + fs/cifs/connect.c | 161 +------------------------------------------------- fs/cifs/smb1ops.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 159 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6df0cbe1cbc9..2aac4e5fb334 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -187,6 +187,9 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); + /* process transaction2 response */ + bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, + char *, int); }; struct smb_version_values { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a675b7f47d63..6d846e7624d0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -395,143 +395,6 @@ cifs_reconnect(struct TCP_Server_Info *server) return rc; } -/* - return codes: - 0 not a transact2, or all data present - >0 transact2 with that much data missing - -EINVAL = invalid transact2 - - */ -static int check2ndT2(char *buf) -{ - struct smb_hdr *pSMB = (struct smb_hdr *)buf; - struct smb_t2_rsp *pSMBt; - int remaining; - __u16 total_data_size, data_in_this_rsp; - - if (pSMB->Command != SMB_COM_TRANSACTION2) - return 0; - - /* check for plausible wct, bcc and t2 data and parm sizes */ - /* check for parm and data offset going beyond end of smb */ - if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ - cFYI(1, "invalid transact2 word count"); - return -EINVAL; - } - - pSMBt = (struct smb_t2_rsp *)pSMB; - - total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); - data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); - - if (total_data_size == data_in_this_rsp) - return 0; - else if (total_data_size < data_in_this_rsp) { - cFYI(1, "total data %d smaller than data in frame %d", - total_data_size, data_in_this_rsp); - return -EINVAL; - } - - remaining = total_data_size - data_in_this_rsp; - - cFYI(1, "missing %d bytes from transact2, check next response", - remaining); - if (total_data_size > CIFSMaxBufSize) { - cERROR(1, "TotalDataSize %d is over maximum buffer %d", - total_data_size, CIFSMaxBufSize); - return -EINVAL; - } - return remaining; -} - -static int coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) -{ - struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)second_buf; - struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)target_hdr; - char *data_area_of_tgt; - char *data_area_of_src; - int remaining; - unsigned int byte_count, total_in_tgt; - __u16 tgt_total_cnt, src_total_cnt, total_in_src; - - src_total_cnt = get_unaligned_le16(&pSMBs->t2_rsp.TotalDataCount); - tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); - - if (tgt_total_cnt != src_total_cnt) - cFYI(1, "total data count of primary and secondary t2 differ " - "source=%hu target=%hu", src_total_cnt, tgt_total_cnt); - - total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); - - remaining = tgt_total_cnt - total_in_tgt; - - if (remaining < 0) { - cFYI(1, "Server sent too much data. tgt_total_cnt=%hu " - "total_in_tgt=%hu", tgt_total_cnt, total_in_tgt); - return -EPROTO; - } - - if (remaining == 0) { - /* nothing to do, ignore */ - cFYI(1, "no more data remains"); - return 0; - } - - total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount); - if (remaining < total_in_src) - cFYI(1, "transact2 2nd response contains too much data"); - - /* find end of first SMB data area */ - data_area_of_tgt = (char *)&pSMBt->hdr.Protocol + - get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); - - /* validate target area */ - data_area_of_src = (char *)&pSMBs->hdr.Protocol + - get_unaligned_le16(&pSMBs->t2_rsp.DataOffset); - - data_area_of_tgt += total_in_tgt; - - total_in_tgt += total_in_src; - /* is the result too big for the field? */ - if (total_in_tgt > USHRT_MAX) { - cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt); - return -EPROTO; - } - put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); - - /* fix up the BCC */ - byte_count = get_bcc(target_hdr); - byte_count += total_in_src; - /* is the result too big for the field? */ - if (byte_count > USHRT_MAX) { - cFYI(1, "coalesced BCC too large (%u)", byte_count); - return -EPROTO; - } - put_bcc(byte_count, target_hdr); - - byte_count = be32_to_cpu(target_hdr->smb_buf_length); - byte_count += total_in_src; - /* don't allow buffer to overflow */ - if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); - return -ENOBUFS; - } - target_hdr->smb_buf_length = cpu_to_be32(byte_count); - - /* copy second buffer into end of first buffer */ - memcpy(data_area_of_tgt, data_area_of_src, total_in_src); - - if (remaining != total_in_src) { - /* more responses to go */ - cFYI(1, "waiting for more secondary responses"); - return 1; - } - - /* we are done */ - cFYI(1, "found the last secondary response"); - return 0; -} - static void cifs_echo_request(struct work_struct *work) { @@ -804,29 +667,9 @@ static void handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server, char *buf, int malformed) { - if (malformed == 0 && check2ndT2(buf) > 0) { - mid->multiRsp = true; - if (mid->resp_buf) { - /* merge response - fix up 1st*/ - malformed = coalesce_t2(buf, mid->resp_buf); - if (malformed > 0) - return; - - /* All parts received or packet is malformed. */ - mid->multiEnd = true; - return dequeue_mid(mid, malformed); - } - if (!server->large_buf) { - /*FIXME: switch to already allocated largebuf?*/ - cERROR(1, "1st trans2 resp needs bigbuf"); - } else { - /* Have first buffer */ - mid->resp_buf = buf; - mid->large_buf = true; - server->bigbuf = NULL; - } + if (server->ops->check_trans2 && + server->ops->check_trans2(mid, server, buf, malformed)) return; - } mid->resp_buf = buf; mid->large_buf = server->large_buf; /* Was previous buf put in mpx struct for multi-rsp? */ diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 6dec38f5522d..28359e789fff 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -213,6 +213,175 @@ cifs_get_next_mid(struct TCP_Server_Info *server) return mid; } +/* + return codes: + 0 not a transact2, or all data present + >0 transact2 with that much data missing + -EINVAL invalid transact2 + */ +static int +check2ndT2(char *buf) +{ + struct smb_hdr *pSMB = (struct smb_hdr *)buf; + struct smb_t2_rsp *pSMBt; + int remaining; + __u16 total_data_size, data_in_this_rsp; + + if (pSMB->Command != SMB_COM_TRANSACTION2) + return 0; + + /* check for plausible wct, bcc and t2 data and parm sizes */ + /* check for parm and data offset going beyond end of smb */ + if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ + cFYI(1, "invalid transact2 word count"); + return -EINVAL; + } + + pSMBt = (struct smb_t2_rsp *)pSMB; + + total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); + data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); + + if (total_data_size == data_in_this_rsp) + return 0; + else if (total_data_size < data_in_this_rsp) { + cFYI(1, "total data %d smaller than data in frame %d", + total_data_size, data_in_this_rsp); + return -EINVAL; + } + + remaining = total_data_size - data_in_this_rsp; + + cFYI(1, "missing %d bytes from transact2, check next response", + remaining); + if (total_data_size > CIFSMaxBufSize) { + cERROR(1, "TotalDataSize %d is over maximum buffer %d", + total_data_size, CIFSMaxBufSize); + return -EINVAL; + } + return remaining; +} + +static int +coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) +{ + struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)second_buf; + struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)target_hdr; + char *data_area_of_tgt; + char *data_area_of_src; + int remaining; + unsigned int byte_count, total_in_tgt; + __u16 tgt_total_cnt, src_total_cnt, total_in_src; + + src_total_cnt = get_unaligned_le16(&pSMBs->t2_rsp.TotalDataCount); + tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); + + if (tgt_total_cnt != src_total_cnt) + cFYI(1, "total data count of primary and secondary t2 differ " + "source=%hu target=%hu", src_total_cnt, tgt_total_cnt); + + total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); + + remaining = tgt_total_cnt - total_in_tgt; + + if (remaining < 0) { + cFYI(1, "Server sent too much data. tgt_total_cnt=%hu " + "total_in_tgt=%hu", tgt_total_cnt, total_in_tgt); + return -EPROTO; + } + + if (remaining == 0) { + /* nothing to do, ignore */ + cFYI(1, "no more data remains"); + return 0; + } + + total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount); + if (remaining < total_in_src) + cFYI(1, "transact2 2nd response contains too much data"); + + /* find end of first SMB data area */ + data_area_of_tgt = (char *)&pSMBt->hdr.Protocol + + get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); + + /* validate target area */ + data_area_of_src = (char *)&pSMBs->hdr.Protocol + + get_unaligned_le16(&pSMBs->t2_rsp.DataOffset); + + data_area_of_tgt += total_in_tgt; + + total_in_tgt += total_in_src; + /* is the result too big for the field? */ + if (total_in_tgt > USHRT_MAX) { + cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt); + return -EPROTO; + } + put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); + + /* fix up the BCC */ + byte_count = get_bcc(target_hdr); + byte_count += total_in_src; + /* is the result too big for the field? */ + if (byte_count > USHRT_MAX) { + cFYI(1, "coalesced BCC too large (%u)", byte_count); + return -EPROTO; + } + put_bcc(byte_count, target_hdr); + + byte_count = be32_to_cpu(target_hdr->smb_buf_length); + byte_count += total_in_src; + /* don't allow buffer to overflow */ + if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); + return -ENOBUFS; + } + target_hdr->smb_buf_length = cpu_to_be32(byte_count); + + /* copy second buffer into end of first buffer */ + memcpy(data_area_of_tgt, data_area_of_src, total_in_src); + + if (remaining != total_in_src) { + /* more responses to go */ + cFYI(1, "waiting for more secondary responses"); + return 1; + } + + /* we are done */ + cFYI(1, "found the last secondary response"); + return 0; +} + +static bool +cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, + char *buf, int malformed) +{ + if (malformed) + return false; + if (check2ndT2(buf) <= 0) + return false; + mid->multiRsp = true; + if (mid->resp_buf) { + /* merge response - fix up 1st*/ + malformed = coalesce_t2(buf, mid->resp_buf); + if (malformed > 0) + return true; + /* All parts received or packet is malformed. */ + mid->multiEnd = true; + dequeue_mid(mid, malformed); + return true; + } + if (!server->large_buf) { + /*FIXME: switch to already allocated largebuf?*/ + cERROR(1, "1st trans2 resp needs bigbuf"); + } else { + /* Have first buffer */ + mid->resp_buf = buf; + mid->large_buf = true; + server->bigbuf = NULL; + } + return true; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -229,6 +398,7 @@ struct smb_version_operations smb1_operations = { .check_message = checkSMB, .dump_detail = cifs_dump_detail, .is_oplock_break = is_valid_oplock_break, + .check_trans2 = cifs_check_trans2, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From a891f0f895f4a760fdb99636fab05e60597b8224 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 May 2012 16:14:34 +0400 Subject: CIFS: Extend credit mechanism to process request type Split all requests to echos, oplocks and others - each group uses its own credit slot. This is indicated by new flags CIFS_ECHO_OP and CIFS_OBREAK_OP that are not used now for CIFS. This change is required to support SMB2 protocol because of different processing of these commands. Acked-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 16 +++++++++--- fs/cifs/cifsproto.h | 4 +-- fs/cifs/cifssmb.c | 21 ++++++++-------- fs/cifs/smb1ops.c | 12 +++++++-- fs/cifs/transport.c | 70 +++++++++++++++++++++++++++++------------------------ 5 files changed, 74 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2aac4e5fb334..844b77c2bc9c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -171,9 +171,11 @@ struct smb_version_operations { /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); - void (*add_credits)(struct TCP_Server_Info *, const unsigned int); + void (*add_credits)(struct TCP_Server_Info *, const unsigned int, + const int); void (*set_credits)(struct TCP_Server_Info *, const int); - int * (*get_credits_field)(struct TCP_Server_Info *); + int * (*get_credits_field)(struct TCP_Server_Info *, const int); + unsigned int (*get_credits)(struct mid_q_entry *); __u64 (*get_next_mid)(struct TCP_Server_Info *); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); @@ -392,9 +394,10 @@ has_credits(struct TCP_Server_Info *server, int *credits) } static inline void -add_credits(struct TCP_Server_Info *server, const unsigned int add) +add_credits(struct TCP_Server_Info *server, const unsigned int add, + const int optype) { - server->ops->add_credits(server, add); + server->ops->add_credits(server, add, optype); } static inline void @@ -957,6 +960,11 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ #define CIFS_NO_RESP 0x040 /* no response buffer required */ +/* Type of request operation */ +#define CIFS_ECHO_OP 0x080 /* echo request */ +#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ +#define CIFS_OP_MASK 0x0180 /* mask request type */ + /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLM 0x00002 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 613320c9a780..b37399491fa3 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -71,11 +71,11 @@ extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, mid_receive_t *receive, mid_callback_t *callback, void *cbdata, - bool ignore_pend); + const int flags); extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , - int * /* bytes returned */ , const int long_op); + int * /* bytes returned */ , const int); extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5659850f780a..92bbd8487ead 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -744,7 +744,7 @@ cifs_echo_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = mid->callback_data; DeleteMidQEntry(mid); - add_credits(server, 1); + add_credits(server, 1, CIFS_ECHO_OP); } int @@ -771,7 +771,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, - server, true); + server, CIFS_ASYNC_OP | CIFS_ECHO_OP); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -1589,7 +1589,7 @@ cifs_readv_callback(struct mid_q_entry *mid) queue_work(cifsiod_wq, &rdata->work); DeleteMidQEntry(mid); - add_credits(server, 1); + add_credits(server, 1, 0); } /* cifs_async_readv - send an async write, and set up mid to handle result */ @@ -1645,7 +1645,7 @@ cifs_async_readv(struct cifs_readdata *rdata) kref_get(&rdata->refcount); rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, cifs_readv_receive, cifs_readv_callback, - rdata, false); + rdata, 0); if (rc == 0) cifs_stats_inc(&tcon->num_reads); @@ -2036,7 +2036,7 @@ cifs_writev_callback(struct mid_q_entry *mid) queue_work(cifsiod_wq, &wdata->work); DeleteMidQEntry(mid); - add_credits(tcon->ses->server, 1); + add_credits(tcon->ses->server, 1, 0); } /* cifs_async_writev - send an async write, and set up mid to handle result */ @@ -2118,7 +2118,7 @@ cifs_async_writev(struct cifs_writedata *wdata) kref_get(&wdata->refcount); rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, - NULL, cifs_writev_callback, wdata, false); + NULL, cifs_writev_callback, wdata, 0); if (rc == 0) cifs_stats_inc(&tcon->num_writes); @@ -2296,7 +2296,7 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, LOCK_REQ *pSMB = NULL; /* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ int bytes_returned; - int timeout = 0; + int flags = 0; __u16 count; cFYI(1, "CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock); @@ -2306,10 +2306,11 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, return rc; if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { - timeout = CIFS_ASYNC_OP; /* no response expected */ + /* no response expected */ + flags = CIFS_ASYNC_OP | CIFS_OBREAK_OP; pSMB->Timeout = 0; } else if (waitFlag) { - timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ + flags = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */ } else { pSMB->Timeout = 0; @@ -2342,7 +2343,7 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, (struct smb_hdr *) pSMB, &bytes_returned); cifs_small_buf_release(pSMB); } else { - rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, timeout); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, flags); /* SMB buffer freed by function above */ } cifs_stats_inc(&tcon->num_locks); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 28359e789fff..f4f839459e90 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -101,7 +101,8 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) } static void -cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add) +cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add, + const int optype) { spin_lock(&server->req_lock); server->credits += add; @@ -120,11 +121,17 @@ cifs_set_credits(struct TCP_Server_Info *server, const int val) } static int * -cifs_get_credits_field(struct TCP_Server_Info *server) +cifs_get_credits_field(struct TCP_Server_Info *server, const int optype) { return &server->credits; } +static unsigned int +cifs_get_credits(struct mid_q_entry *mid) +{ + return 1; +} + /* * Find a free multiplex id (SMB mid). Otherwise there could be * mid collisions which might cause problems, demultiplexing the @@ -390,6 +397,7 @@ struct smb_version_operations smb1_operations = { .add_credits = cifs_add_credits, .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, + .get_credits = cifs_get_credits, .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3e6ffe355384..904702db2526 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -250,13 +250,13 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, } static int -wait_for_free_credits(struct TCP_Server_Info *server, const int optype, +wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, int *credits) { int rc; spin_lock(&server->req_lock); - if (optype == CIFS_ASYNC_OP) { + if (timeout == CIFS_ASYNC_OP) { /* oplock breaks must not be held up */ server->in_flight++; *credits -= 1; @@ -286,7 +286,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int optype, */ /* update # of requests on the wire to server */ - if (optype != CIFS_BLOCKING_OP) { + if (timeout != CIFS_BLOCKING_OP) { *credits -= 1; server->in_flight++; } @@ -298,10 +298,11 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int optype, } static int -wait_for_free_request(struct TCP_Server_Info *server, const int optype) +wait_for_free_request(struct TCP_Server_Info *server, const int timeout, + const int optype) { - return wait_for_free_credits(server, optype, - server->ops->get_credits_field(server)); + return wait_for_free_credits(server, timeout, + server->ops->get_credits_field(server, optype)); } static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, @@ -378,12 +379,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, mid_receive_t *receive, - mid_callback_t *callback, void *cbdata, bool ignore_pend) + mid_callback_t *callback, void *cbdata, const int flags) { - int rc; + int rc, timeout, optype; struct mid_q_entry *mid; - rc = wait_for_free_request(server, ignore_pend ? CIFS_ASYNC_OP : 0); + timeout = flags & CIFS_TIMEOUT_MASK; + optype = flags & CIFS_OP_MASK; + + rc = wait_for_free_request(server, timeout, optype); if (rc) return rc; @@ -391,7 +395,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, rc = cifs_setup_async_request(server, iov, nvec, &mid); if (rc) { mutex_unlock(&server->srv_mutex); - add_credits(server, 1); + add_credits(server, 1, optype); wake_up(&server->request_q); return rc; } @@ -417,7 +421,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, return 0; delete_mid(mid); - add_credits(server, 1); + add_credits(server, 1, optype); wake_up(&server->request_q); return rc; } @@ -533,17 +537,19 @@ cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, int SendReceive2(const unsigned int xid, struct cifs_ses *ses, - struct kvec *iov, int n_vec, int *pRespBufType /* ret */, + struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, const int flags) { int rc = 0; - int long_op; + int timeout, optype; struct mid_q_entry *midQ; char *buf = iov[0].iov_base; + unsigned int credits = 1; - long_op = flags & CIFS_TIMEOUT_MASK; + timeout = flags & CIFS_TIMEOUT_MASK; + optype = flags & CIFS_OP_MASK; - *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ + *resp_buf_type = CIFS_NO_BUFFER; /* no response buf yet */ if ((ses == NULL) || (ses->server == NULL)) { cifs_small_buf_release(buf); @@ -562,7 +568,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, * use ses->maxReq. */ - rc = wait_for_free_request(ses->server, long_op); + rc = wait_for_free_request(ses->server, timeout, optype); if (rc) { cifs_small_buf_release(buf); return rc; @@ -581,7 +587,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, mutex_unlock(&ses->server->srv_mutex); cifs_small_buf_release(buf); /* Update # of requests on wire to server */ - add_credits(ses->server, 1); + add_credits(ses->server, 1, optype); return rc; } @@ -598,7 +604,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, goto out; } - if (long_op == CIFS_ASYNC_OP) { + if (timeout == CIFS_ASYNC_OP) { cifs_small_buf_release(buf); goto out; } @@ -611,7 +617,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); cifs_small_buf_release(buf); - add_credits(ses->server, 1); + add_credits(ses->server, 1, optype); return rc; } spin_unlock(&GlobalMid_Lock); @@ -621,7 +627,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sync_mid_result(midQ, ses->server); if (rc != 0) { - add_credits(ses->server, 1); + add_credits(ses->server, 1, optype); return rc; } @@ -635,9 +641,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, iov[0].iov_base = buf; iov[0].iov_len = get_rfc1002_length(buf) + 4; if (midQ->large_buf) - *pRespBufType = CIFS_LARGE_BUFFER; + *resp_buf_type = CIFS_LARGE_BUFFER; else - *pRespBufType = CIFS_SMALL_BUFFER; + *resp_buf_type = CIFS_SMALL_BUFFER; + + credits = ses->server->ops->get_credits(midQ); rc = ses->server->ops->check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR); @@ -647,7 +655,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, midQ->resp_buf = NULL; out: delete_mid(midQ); - add_credits(ses->server, 1); + add_credits(ses->server, credits, optype); return rc; } @@ -655,7 +663,7 @@ out: int SendReceive(const unsigned int xid, struct cifs_ses *ses, struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned, const int long_op) + int *pbytes_returned, const int timeout) { int rc = 0; struct mid_q_entry *midQ; @@ -683,7 +691,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, return -EIO; } - rc = wait_for_free_request(ses->server, long_op); + rc = wait_for_free_request(ses->server, timeout, 0); if (rc) return rc; @@ -697,7 +705,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (rc) { mutex_unlock(&ses->server->srv_mutex); /* Update # of requests on wire to server */ - add_credits(ses->server, 1); + add_credits(ses->server, 1, 0); return rc; } @@ -718,7 +726,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (rc < 0) goto out; - if (long_op == CIFS_ASYNC_OP) + if (timeout == CIFS_ASYNC_OP) goto out; rc = wait_for_response(ses->server, midQ); @@ -729,7 +737,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, /* no longer considered to be "in-flight" */ midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); - add_credits(ses->server, 1); + add_credits(ses->server, 1, 0); return rc; } spin_unlock(&GlobalMid_Lock); @@ -737,7 +745,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sync_mid_result(midQ, ses->server); if (rc != 0) { - add_credits(ses->server, 1); + add_credits(ses->server, 1, 0); return rc; } @@ -753,7 +761,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_check_receive(midQ, ses->server, 0); out: delete_mid(midQ); - add_credits(ses->server, 1); + add_credits(ses->server, 1, 0); return rc; } @@ -818,7 +826,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, return -EIO; } - rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP); + rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 0); if (rc) return rc; -- cgit v1.2.3 From 286170aa241819f39d9d1d5d9f2434cfb8519506 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 May 2012 10:43:58 +0400 Subject: CIFS: Move protocol specific negotiate code to ops struct Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 8 ++++++-- fs/cifs/cifsproto.h | 6 +++--- fs/cifs/cifssmb.c | 4 ++-- fs/cifs/connect.c | 24 +++++++++++------------- fs/cifs/sess.c | 2 +- fs/cifs/smb1ops.c | 23 +++++++++++++++++++++++ 6 files changed, 46 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 844b77c2bc9c..8a4150573cf8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -192,6 +192,10 @@ struct smb_version_operations { /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); + /* check if we need to negotiate */ + bool (*need_neg)(struct TCP_Server_Info *); + /* negotiate to the server */ + int (*negotiate)(const unsigned int, struct cifs_ses *); }; struct smb_version_values { @@ -324,7 +328,7 @@ struct TCP_Server_Info { struct mutex srv_mutex; struct task_struct *tsk; char server_GUID[16]; - char sec_mode; + __u16 sec_mode; bool session_estab; /* mark when very first sess is established */ u16 dialect; /* dialect index that server chose */ enum securityEnum secType; @@ -459,7 +463,7 @@ struct cifs_ses { char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ - int Suid; /* remote smb uid */ + __u64 Suid; /* remote smb uid */ uid_t linux_uid; /* overriding owner of files on the mount */ uid_t cred_uid; /* owner of credentials */ int capabilities; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b37399491fa3..723a3273c6bb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -178,11 +178,11 @@ extern void cifs_dfs_release_automount_timer(void); void cifs_proc_init(void); void cifs_proc_clean(void); -extern int cifs_negotiate_protocol(unsigned int xid, - struct cifs_ses *ses); +extern int cifs_negotiate_protocol(const unsigned int xid, + struct cifs_ses *ses); extern int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, struct nls_table *nls_info); -extern int CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses); +extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 92bbd8487ead..ae59d6e4e4f5 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -396,7 +396,7 @@ static inline void inc_rfc1001_len(void *pSMB, int count) } int -CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) +CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) { NEGOTIATE_REQ *pSMB; NEGOTIATE_RSP *pSMBr; @@ -480,7 +480,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) rc = -EOPNOTSUPP; goto neg_err_exit; } - server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); + server->sec_mode = le16_to_cpu(rsp->SecurityMode); server->maxReq = min_t(unsigned int, le16_to_cpu(rsp->MaxMpxCount), cifs_max_pending); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6d846e7624d0..03389f59390f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -407,7 +407,7 @@ cifs_echo_request(struct work_struct *work) * done, which is indicated by maxBuf != 0. Also, no need to ping if * we got a response recently */ - if (server->maxBuf == 0 || + if (!server->ops->need_neg || server->ops->need_neg(server) || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; @@ -2406,7 +2406,8 @@ static bool warned_on_ntlm; /* globals init to false automatically */ static struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) { - int rc = -ENOMEM, xid; + int rc = -ENOMEM; + unsigned int xid; struct cifs_ses *ses; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; @@ -3960,24 +3961,22 @@ cifs_umount(struct cifs_sb_info *cifs_sb) kfree(cifs_sb); } -int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) +int +cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses) { int rc = 0; struct TCP_Server_Info *server = ses->server; + if (!server->ops->need_neg || !server->ops->negotiate) + return -ENOSYS; + /* only send once per connect */ - if (server->maxBuf != 0) + if (!server->ops->need_neg(server)) return 0; set_credits(server, 1); - rc = CIFSSMBNegotiate(xid, ses); - if (rc == -EAGAIN) { - /* retry only once on 1st time connection */ - set_credits(server, 1); - rc = CIFSSMBNegotiate(xid, ses); - if (rc == -EAGAIN) - rc = -EHOSTDOWN; - } + + rc = server->ops->negotiate(xid, ses); if (rc == 0) { spin_lock(&GlobalMid_Lock); if (server->tcpStatus == CifsNeedNegotiate) @@ -3985,7 +3984,6 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) else rc = -EHOSTDOWN; spin_unlock(&GlobalMid_Lock); - } return rc; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b4219789049a..3ba3f3cd2397 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -898,7 +898,7 @@ ssetup_ntlmssp_authenticate: if (action & GUEST_LOGIN) cFYI(1, "Guest login"); /* BB mark SesInfo struct? */ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ - cFYI(1, "UID = %d ", ses->Suid); + cFYI(1, "UID = %llu ", ses->Suid); /* response can have either 3 or 4 word count - Samba sends 3 */ /* and lanman response is 3 */ bytes_remaining = get_bcc(smb_buf); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index f4f839459e90..ea4fb8aaaafb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -389,6 +389,27 @@ cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, return true; } +static bool +cifs_need_neg(struct TCP_Server_Info *server) +{ + return server->maxBuf == 0; +} + +static int +cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) +{ + int rc; + rc = CIFSSMBNegotiate(xid, ses); + if (rc == -EAGAIN) { + /* retry only once on 1st time connection */ + set_credits(ses->server, 1); + rc = CIFSSMBNegotiate(xid, ses); + if (rc == -EAGAIN) + rc = -EHOSTDOWN; + } + return rc; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -407,6 +428,8 @@ struct smb_version_operations smb1_operations = { .dump_detail = cifs_dump_detail, .is_oplock_break = is_valid_oplock_break, .check_trans2 = cifs_check_trans2, + .need_neg = cifs_need_neg, + .negotiate = cifs_negotiate, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 2164d3344693d2d4799fe91836d61f55516cbdf0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 23 Jun 2012 11:33:51 +0800 Subject: pipe: remove KM_USER0 from comments Signed-off-by: Cong Wang --- fs/pipe.c | 2 +- include/linux/pipe_fs_i.h | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 49c1065256fd..95cbd6b227e6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -224,7 +224,7 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, * and the caller has to be careful not to fault before calling * the unmap function. * - * Note that this function occupies KM_USER0 if @atomic != 0. + * Note that this function calls kmap_atomic() if @atomic != 0. */ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *buf, int atomic) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e1ac1ce16fb0..e11d1c0fc60f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -86,11 +86,9 @@ struct pipe_buf_operations { * mapping or not. The atomic map is faster, however you can't take * page faults before calling ->unmap() again. So if you need to eg * access user data through copy_to/from_user(), then you must get - * a non-atomic map. ->map() uses the KM_USER0 atomic slot for - * atomic maps, so you can't map more than one pipe_buffer at once - * and you have to be careful if mapping another page as source - * or destination for a copy (IOW, it has to use something else - * than KM_USER0). + * a non-atomic map. ->map() uses the kmap_atomic slot for + * atomic maps, you have to be careful if mapping another page as + * source or destination for a copy. */ void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); -- cgit v1.2.3 From 58c45c58a1cbc8d2e1d07839820bf745fb3e7f41 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 May 2012 10:54:49 +0400 Subject: CIFS: Move protocol specific session setup/logoff code to ops struct Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/cifsproto.h | 10 +++++----- fs/cifs/cifssmb.c | 2 +- fs/cifs/connect.c | 18 ++++++++++-------- fs/cifs/sess.c | 2 +- fs/cifs/smb1ops.c | 2 ++ 6 files changed, 24 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8a4150573cf8..a6eb9befdb2d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -196,6 +196,11 @@ struct smb_version_operations { bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int, struct cifs_ses *); + /* setup smb sessionn */ + int (*sess_setup)(const unsigned int, struct cifs_ses *, + const struct nls_table *); + /* close smb session */ + int (*logoff)(const unsigned int, struct cifs_ses *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 723a3273c6bb..a17be2618473 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -112,8 +112,8 @@ extern void header_assemble(struct smb_hdr *, char /* command */ , extern int small_smb_init_no_tc(const int smb_cmd, const int wct, struct cifs_ses *ses, void **request_buf); -extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, - const struct nls_table *nls_cp); +extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_cp); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, @@ -180,8 +180,8 @@ void cifs_proc_clean(void); extern int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses); -extern int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, - struct nls_table *nls_info); +extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, + struct nls_table *nls_info); extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses, @@ -391,7 +391,7 @@ extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, const bool waitFlag); extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon); extern int CIFSSMBEcho(struct TCP_Server_Info *server); -extern int CIFSSMBLogoff(const int xid, struct cifs_ses *ses); +extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); extern struct cifs_ses *sesInfoAlloc(void); extern void sesInfoFree(struct cifs_ses *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index ae59d6e4e4f5..915b8fc212e9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -781,7 +781,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) } int -CIFSSMBLogoff(const int xid, struct cifs_ses *ses) +CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) { LOGOFF_ANDX_REQ *pSMB; int rc = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 03389f59390f..444243d9232b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2253,7 +2253,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) static void cifs_put_smb_ses(struct cifs_ses *ses) { - int xid; + unsigned int xid; struct TCP_Server_Info *server = ses->server; cFYI(1, "%s: ses_count=%d", __func__, ses->ses_count); @@ -2266,9 +2266,9 @@ cifs_put_smb_ses(struct cifs_ses *ses) list_del_init(&ses->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - if (ses->status == CifsGood) { + if (ses->status == CifsGood && server->ops->logoff) { xid = GetXid(); - CIFSSMBLogoff(xid, ses); + server->ops->logoff(xid, ses); _FreeXid(xid); } sesInfoFree(ses); @@ -3989,11 +3989,11 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses) return rc; } - -int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, - struct nls_table *nls_info) +int +cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, + struct nls_table *nls_info) { - int rc = 0; + int rc = -ENOSYS; struct TCP_Server_Info *server = ses->server; ses->flags = 0; @@ -4004,7 +4004,9 @@ int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", server->sec_mode, server->capabilities, server->timeAdj); - rc = CIFS_SessSetup(xid, ses, nls_info); + if (server->ops->sess_setup) + rc = server->ops->sess_setup(xid, ses, nls_info); + if (rc) { cERROR(1, "Send error in SessSetup = %d", rc); } else { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 3ba3f3cd2397..08efc3c8efef 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -556,7 +556,7 @@ setup_ntlmv2_ret: } int -CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, +CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) { int rc = 0; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index ea4fb8aaaafb..6b0a5d616338 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -430,6 +430,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, + .sess_setup = CIFS_SessSetup, + .logoff = CIFSSMBLogoff, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 2e6e02ab6ddbd539fd7e092973daf057adbd53dc Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 May 2012 11:11:39 +0400 Subject: CIFS: Move protocol specific tcon/tdis code to ops struct and rename variables around the code changes. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 6 +++++ fs/cifs/cifsproto.h | 8 +++--- fs/cifs/cifssmb.c | 2 +- fs/cifs/connect.c | 73 +++++++++++++++++++++++++++++++---------------------- fs/cifs/smb1ops.c | 2 ++ 5 files changed, 56 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a6eb9befdb2d..6d18962c9903 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -160,6 +160,7 @@ struct mid_q_entry; struct TCP_Server_Info; struct cifsFileInfo; struct cifs_ses; +struct cifs_tcon; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -201,6 +202,11 @@ struct smb_version_operations { const struct nls_table *); /* close smb session */ int (*logoff)(const unsigned int, struct cifs_ses *); + /* connect to a server share */ + int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, + struct cifs_tcon *, const struct nls_table *); + /* close tree connecion */ + int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a17be2618473..5fbd6b9a64d9 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -184,9 +184,9 @@ extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, struct nls_table *nls_info); extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); -extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses, - const char *tree, struct cifs_tcon *tcon, - const struct nls_table *); +extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, + const char *tree, struct cifs_tcon *tcon, + const struct nls_table *); extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon, const char *searchName, const struct nls_table *nls_codepage, @@ -389,7 +389,7 @@ extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, const loff_t start_offset, const __u64 len, struct file_lock *, const __u16 lock_type, const bool waitFlag); -extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon); +extern int CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBEcho(struct TCP_Server_Info *server); extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 915b8fc212e9..98fc454827af 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -694,7 +694,7 @@ neg_err_exit: } int -CIFSSMBTDis(const int xid, struct cifs_tcon *tcon) +CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) { struct smb_hdr *smb_buffer; int rc = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 444243d9232b..fcf20d1b58b9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2542,7 +2542,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc) static void cifs_put_tcon(struct cifs_tcon *tcon) { - int xid; + unsigned int xid; struct cifs_ses *ses = tcon->ses; cFYI(1, "%s: tc_count=%d", __func__, tcon->tc_count); @@ -2556,7 +2556,8 @@ cifs_put_tcon(struct cifs_tcon *tcon) spin_unlock(&cifs_tcp_ses_lock); xid = GetXid(); - CIFSSMBTDis(xid, tcon); + if (ses->server->ops->tree_disconnect) + ses->server->ops->tree_disconnect(xid, tcon); _FreeXid(xid); cifs_fscache_release_super_cookie(tcon); @@ -2581,6 +2582,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) return tcon; } + if (!ses->server->ops->tree_connect) { + rc = -ENOSYS; + goto out_fail; + } + tcon = tconInfoAlloc(); if (tcon == NULL) { rc = -ENOMEM; @@ -2603,13 +2609,15 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) goto out_fail; } - /* BB Do we need to wrap session_mutex around - * this TCon call and Unix SetFS as - * we do on SessSetup and reconnect? */ + /* + * BB Do we need to wrap session_mutex around this TCon call and Unix + * SetFS as we do on SessSetup and reconnect? + */ xid = GetXid(); - rc = CIFSTCon(xid, ses, volume_info->UNC, tcon, volume_info->local_nls); + rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon, + volume_info->local_nls); FreeXid(xid); - cFYI(1, "CIFS Tcon rc = %d", rc); + cFYI(1, "Tcon rc = %d", rc); if (rc) goto out_fail; @@ -2618,10 +2626,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) cFYI(1, "DFS disabled (%d)", tcon->Flags); } tcon->seal = volume_info->seal; - /* we can have only one retry value for a connection - to a share so for resources mounted more than once - to the same server share the last value passed in - for the retry flag is used */ + /* + * We can have only one retry value for a connection to a share so for + * resources mounted more than once to the same server share the last + * value passed in for the retry flag is used. + */ tcon->retry = volume_info->retry; tcon->nocase = volume_info->nocase; tcon->local_lease = volume_info->local_lease; @@ -2755,37 +2764,41 @@ out: } int -get_dfs_path(int xid, struct cifs_ses *pSesInfo, const char *old_path, - const struct nls_table *nls_codepage, unsigned int *pnum_referrals, - struct dfs_info3_param **preferrals, int remap) +get_dfs_path(int xid, struct cifs_ses *ses, const char *old_path, + const struct nls_table *nls_codepage, unsigned int *num_referrals, + struct dfs_info3_param **referrals, int remap) { char *temp_unc; int rc = 0; - *pnum_referrals = 0; - *preferrals = NULL; + if (!ses->server->ops->tree_connect) + return -ENOSYS; + + *num_referrals = 0; + *referrals = NULL; - if (pSesInfo->ipc_tid == 0) { + if (ses->ipc_tid == 0) { temp_unc = kmalloc(2 /* for slashes */ + - strnlen(pSesInfo->serverName, - SERVER_NAME_LEN_WITH_NULL * 2) - + 1 + 4 /* slash IPC$ */ + 2, - GFP_KERNEL); + strnlen(ses->serverName, SERVER_NAME_LEN_WITH_NULL * 2) + + 1 + 4 /* slash IPC$ */ + 2, GFP_KERNEL); if (temp_unc == NULL) return -ENOMEM; temp_unc[0] = '\\'; temp_unc[1] = '\\'; - strcpy(temp_unc + 2, pSesInfo->serverName); - strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$"); - rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage); - cFYI(1, "CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid); + strcpy(temp_unc + 2, ses->serverName); + strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$"); + rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL, + nls_codepage); + cFYI(1, "Tcon rc = %d ipc_tid = %d", rc, ses->ipc_tid); kfree(temp_unc); } if (rc == 0) - rc = CIFSGetDFSRefer(xid, pSesInfo, old_path, preferrals, - pnum_referrals, nls_codepage, remap); - /* BB map targetUNCs to dfs_info3 structures, here or - in CIFSGetDFSRefer BB */ + rc = CIFSGetDFSRefer(xid, ses, old_path, referrals, + num_referrals, nls_codepage, remap); + /* + * BB - map targetUNCs to dfs_info3 structures, here or in + * CIFSGetDFSRefer. + */ return rc; } @@ -3777,7 +3790,7 @@ out: * pointer may be NULL. */ int -CIFSTCon(unsigned int xid, struct cifs_ses *ses, +CIFSTCon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *nls_codepage) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 6b0a5d616338..728595f096c9 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -432,6 +432,8 @@ struct smb_version_operations smb1_operations = { .negotiate = cifs_negotiate, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, + .tree_connect = CIFSTCon, + .tree_disconnect = CIFSSMBTDis, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 6d5786a34d98bffb8ad50d8053d1e53231fe0636 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 20 Jun 2012 11:21:16 +0400 Subject: CIFS: Rename Get/FreeXid and make them work with unsigned int Acked-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 7 +- fs/cifs/cifsacl.c | 21 +++--- fs/cifs/cifsfs.c | 6 +- fs/cifs/cifsproto.h | 171 +++++++++++++++++++++++++------------------------ fs/cifs/cifssmb.c | 146 +++++++++++++++++++++-------------------- fs/cifs/connect.c | 40 ++++++------ fs/cifs/dir.c | 43 ++++++------- fs/cifs/file.c | 133 +++++++++++++++++++------------------- fs/cifs/inode.c | 87 +++++++++++++------------ fs/cifs/ioctl.c | 6 +- fs/cifs/link.c | 24 +++---- fs/cifs/misc.c | 4 +- fs/cifs/readdir.c | 15 +++-- fs/cifs/xattr.c | 24 +++---- 14 files changed, 375 insertions(+), 352 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 6873bb634a97..ce5cbd717bfc 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -275,7 +275,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) struct cifs_sb_info *cifs_sb; struct cifs_ses *ses; char *full_path; - int xid, i; + unsigned int xid; + int i; int rc; struct vfsmount *mnt; struct tcon_link *tlink; @@ -302,11 +303,11 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) } ses = tlink_tcon(tlink)->ses; - xid = GetXid(); + xid = get_xid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1885da4fca82..05f4dc263a23 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1153,15 +1153,16 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, u32 *pacllen) { struct cifs_ntsd *pntsd = NULL; - int xid, rc; + unsigned int xid; + int rc; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return ERR_CAST(tlink); - xid = GetXid(); + xid = get_xid(); rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); @@ -1176,7 +1177,8 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, { struct cifs_ntsd *pntsd = NULL; int oplock = 0; - int xid, rc, create_options = 0; + unsigned int xid; + int rc, create_options = 0; __u16 fid; struct cifs_tcon *tcon; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1185,7 +1187,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, return ERR_CAST(tlink); tcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; @@ -1199,7 +1201,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, } cifs_put_tlink(tlink); - FreeXid(xid); + free_xid(xid); cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); if (rc) @@ -1230,7 +1232,8 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path, int aclflag) { int oplock = 0; - int xid, rc, access_flags, create_options = 0; + unsigned int xid; + int rc, access_flags, create_options = 0; __u16 fid; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -1240,7 +1243,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; @@ -1263,7 +1266,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, CIFSSMBClose(xid, tcon, fid); out: - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a7610cfedf0a..2e9929dc2072 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -158,9 +158,9 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); int rc = -EOPNOTSUPP; - int xid; + unsigned int xid; - xid = GetXid(); + xid = get_xid(); buf->f_type = CIFS_MAGIC_NUMBER; @@ -197,7 +197,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) if (rc) rc = SMBOldQFSInfo(xid, tcon, buf); - FreeXid(xid); + free_xid(xid); return 0; } diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5fbd6b9a64d9..8797e4064662 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -37,20 +37,20 @@ extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, unsigned int /* length */); -extern unsigned int _GetXid(void); -extern void _FreeXid(unsigned int); -#define GetXid() \ +extern unsigned int _get_xid(void); +extern void _free_xid(unsigned int); +#define get_xid() \ ({ \ - int __xid = (int)_GetXid(); \ - cFYI(1, "CIFS VFS: in %s as Xid: %d with uid: %d", \ + unsigned int __xid = _get_xid(); \ + cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \ __func__, __xid, current_fsuid()); \ __xid; \ }) -#define FreeXid(curr_xid) \ +#define free_xid(curr_xid) \ do { \ - _FreeXid(curr_xid); \ - cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ + _free_xid(curr_xid); \ + cFYI(1, "CIFS VFS: leaving %s (xid = %u) rc = %d", \ __func__, curr_xid, (int)rc); \ } while (0) extern int init_cifs_idmap(void); @@ -123,10 +123,10 @@ extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, struct file *file, struct tcon_link *tlink, __u32 oplock); -extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct super_block *sb, - int mode, unsigned int f_flags, - __u32 *poplock, __u16 *pnetfid, int xid); +extern int cifs_posix_open(char *full_path, struct inode **inode, + struct super_block *sb, int mode, + unsigned int f_flags, __u32 *oplock, __u16 *netfid, + unsigned int xid); void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, @@ -139,11 +139,12 @@ extern int cifs_get_file_info(struct file *filp); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO *pfile_info, - struct super_block *sb, int xid, const __u16 *pfid); + struct super_block *sb, unsigned int xid, + const __u16 *pfid); extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, - struct super_block *sb, int xid); + struct super_block *sb, unsigned int xid); extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, const __u16 *pfid); @@ -188,88 +189,90 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *); -extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon, +extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, const char *searchName, const struct nls_table *nls_codepage, __u16 *searchHandle, __u16 search_flags, struct cifs_search_info *psrch_inf, int map, const char dirsep); -extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon, +extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, __u16 searchHandle, __u16 search_flags, struct cifs_search_info *psrch_inf); -extern int CIFSFindClose(const int, struct cifs_tcon *tcon, +extern int CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, const __u16 search_handle); -extern int CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_ALL_INFO *pFindData); -extern int CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_ALL_INFO *findData, int legacy /* whether to use old info level */, const struct nls_table *nls_codepage, int remap); -extern int SMBQueryInformation(const int xid, struct cifs_tcon *tcon, +extern int SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_ALL_INFO *findData, const struct nls_table *nls_codepage, int remap); -extern int CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_UNIX_BASIC_INFO *pFindData); -extern int CIFSSMBUnixQPathInfo(const int xid, +extern int CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_UNIX_BASIC_INFO *pFindData, const struct nls_table *nls_codepage, int remap); -extern int CIFSGetDFSRefer(const int xid, struct cifs_ses *ses, +extern int CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, const unsigned char *searchName, struct dfs_info3_param **target_nodes, unsigned int *number_of_nodes_in_array, const struct nls_table *nls_codepage, int remap); -extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo, +extern int get_dfs_path(unsigned int xid, struct cifs_ses *pSesInfo, const char *old_path, const struct nls_table *nls_codepage, unsigned int *pnum_referrals, struct dfs_info3_param **preferrals, int remap); -extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, +extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, struct smb_vol *vol); -extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); -extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, +extern int SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); -extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon, __u64 cap); -extern int CIFSSMBQFSAttributeInfo(const int xid, +extern int CIFSSMBQFSAttributeInfo(const unsigned int xid, struct cifs_tcon *tcon); -extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon); -extern int CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon); -extern int CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBQFSDeviceInfo(const unsigned int xid, struct cifs_tcon *tcon); +extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); +extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); -extern int CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener); -extern int CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon, - bool delete_file, __u16 fid, __u32 pid_of_opener); +extern int CIFSSMBSetFileDisposition(const unsigned int xid, + struct cifs_tcon *tcon, + bool delete_file, __u16 fid, + __u32 pid_of_opener); #if 0 -extern int CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, char *fileName, __u16 dos_attributes, const struct nls_table *nls_codepage); #endif /* possibly unneeded function */ -extern int CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, __u64 size, bool setAllocationSizeFlag, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, __u16 fileHandle, __u32 opener_pid, bool AllocSizeFlag); @@ -283,108 +286,110 @@ struct cifs_unix_set_info_args { dev_t device; }; -extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBUnixSetFileInfo(const unsigned int xid, + struct cifs_tcon *tcon, const struct cifs_unix_set_info_args *args, u16 fid, u32 pid_of_opener); -extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *pTcon, - char *fileName, - const struct cifs_unix_set_info_args *args, - const struct nls_table *nls_codepage, - int remap_special_chars); +extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, + struct cifs_tcon *tcon, char *file_name, + const struct cifs_unix_set_info_args *args, + const struct nls_table *nls_codepage, + int remap_special_chars); -extern int CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *newName, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon, +extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, __u16 type, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBRename(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon, - int netfid, const char *target_name, - const struct nls_table *nls_codepage, - int remap_special_chars); -extern int CIFSCreateHardLink(const int xid, +extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, + int netfid, const char *target_name, + const struct nls_table *nls_codepage, + int remap_special_chars); +extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSUnixCreateHardLink(const int xid, +extern int CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSUnixCreateSymLink(const int xid, +extern int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage); -extern int CIFSSMBUnixQuerySymLink(const int xid, +extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage); #ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL -extern int CIFSSMBQueryReparseLinkInfo(const int xid, +extern int CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char *symlinkinfo, const int buflen, __u16 fid, const struct nls_table *nls_codepage); #endif /* temporarily unused until cifs_symlink fixed */ -extern int CIFSSMBOpen(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int disposition, const int access_flags, const int omode, __u16 *netfid, int *pOplock, FILE_ALL_INFO *, const struct nls_table *nls_codepage, int remap); -extern int SMBLegacyOpen(const int xid, struct cifs_tcon *tcon, +extern int SMBLegacyOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int disposition, const int access_flags, const int omode, __u16 *netfid, int *pOplock, FILE_ALL_INFO *, const struct nls_table *nls_codepage, int remap); -extern int CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon, +extern int CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, u32 posix_flags, __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData, __u32 *pOplock, const char *name, const struct nls_table *nls_codepage, int remap); -extern int CIFSSMBClose(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, const int smb_file_id); -extern int CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, const int smb_file_id); -extern int CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, +extern int CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, char **buf, int *return_buf_type); -extern int CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms, +extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, const char *buf, const char __user *ubuf, const int long_op); -extern int CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms, +extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, const int nvec, const int long_op); -extern int CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon, +extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, __u64 *inode_number, const struct nls_table *nls_codepage, int remap_special_chars); -extern int cifs_lockv(const int xid, struct cifs_tcon *tcon, const __u16 netfid, - const __u8 lock_type, const __u32 num_unlock, - const __u32 num_lock, LOCKING_ANDX_RANGE *buf); -extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon, +extern int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u16 netfid, const __u8 lock_type, + const __u32 num_unlock, const __u32 num_lock, + LOCKING_ANDX_RANGE *buf); +extern int CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, const __u16 netfid, const __u32 netpid, const __u64 len, const __u64 offset, const __u32 numUnlock, const __u32 numLock, const __u8 lockType, const bool waitFlag, const __u8 oplock_level); -extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, const __u16 smb_file_id, const __u32 netpid, const loff_t start_offset, const __u64 len, struct file_lock *, const __u16 lock_type, @@ -417,46 +422,46 @@ extern int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key); #endif /* CIFS_WEAK_PW_HASH */ #ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */ -extern int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBNotify(const unsigned int xid, struct cifs_tcon *tcon, const int notify_subdirs, const __u16 netfid, __u32 filter, struct file *file, int multishot, const struct nls_table *nls_codepage); #endif /* was needed for dnotify, and will be needed for inotify when VFS fix */ -extern int CIFSSMBCopy(int xid, +extern int CIFSSMBCopy(unsigned int xid, struct cifs_tcon *source_tcon, const char *fromName, const __u16 target_tid, const char *toName, const int flags, const struct nls_table *nls_codepage, int remap_special_chars); -extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon, +extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, size_t bufsize, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const char *ea_name, const void *ea_value, const __u16 ea_value_len, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); -extern int CIFSSMBSetCIFSACL(const int, struct cifs_tcon *, __u16, +extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16, struct cifs_ntsd *, __u32, int); -extern int CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBGetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char *acl_inf, const int buflen, const int acl_type, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon, +extern int CIFSSMBSetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *fileName, const char *local_acl, const int buflen, const int acl_type, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon, +extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, const int netfid, __u64 *pExtAttrBits, __u64 *pMask); extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid); + struct cifs_sb_info *cifs_sb, unsigned int xid); extern int mdfour(unsigned char *, unsigned char *, int); extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, const struct nls_table *codepage); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 98fc454827af..7a3b4a3b113b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -828,8 +828,9 @@ session_already_dead: } int -CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName, - __u16 type, const struct nls_table *nls_codepage, int remap) +CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __u16 type, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -903,8 +904,9 @@ PsxDelete: } int -CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName, - const struct nls_table *nls_codepage, int remap) +CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, const struct nls_table *nls_codepage, + int remap) { DELETE_FILE_REQ *pSMB = NULL; DELETE_FILE_RSP *pSMBr = NULL; @@ -948,8 +950,9 @@ DelFileRetry: } int -CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon, const char *dirName, - const struct nls_table *nls_codepage, int remap) +CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, + const char *dirName, const struct nls_table *nls_codepage, + int remap) { DELETE_DIRECTORY_REQ *pSMB = NULL; DELETE_DIRECTORY_RSP *pSMBr = NULL; @@ -991,7 +994,7 @@ RmDirRetry: } int -CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon, +CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, const struct nls_table *nls_codepage, int remap) { int rc = 0; @@ -1034,10 +1037,11 @@ MkDirRetry: } int -CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon, __u32 posix_flags, - __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData, - __u32 *pOplock, const char *name, - const struct nls_table *nls_codepage, int remap) +CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, + __u32 posix_flags, __u64 mode, __u16 *netfid, + FILE_UNIX_BASIC_INFO *pRetData, __u32 *pOplock, + const char *name, const struct nls_table *nls_codepage, + int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -1200,7 +1204,7 @@ access_flags_to_smbopen_mode(const int access_flags) } int -SMBLegacyOpen(const int xid, struct cifs_tcon *tcon, +SMBLegacyOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int openDisposition, const int access_flags, const int create_options, __u16 *netfid, int *pOplock, FILE_ALL_INFO *pfile_info, @@ -1307,7 +1311,7 @@ OldOpenRetry: } int -CIFSSMBOpen(const int xid, struct cifs_tcon *tcon, +CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int openDisposition, const int access_flags, const int create_options, __u16 *netfid, int *pOplock, FILE_ALL_INFO *pfile_info, @@ -1657,8 +1661,8 @@ cifs_async_readv(struct cifs_readdata *rdata) } int -CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, - char **buf, int *pbuf_type) +CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, char **buf, int *pbuf_type) { int rc = -EACCES; READ_REQ *pSMB = NULL; @@ -1769,7 +1773,7 @@ CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, int -CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms, +CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, const char *buf, const char __user *ubuf, const int long_op) { @@ -2136,7 +2140,7 @@ async_writev_out: } int -CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms, +CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, int n_vec, const int long_op) { @@ -2244,8 +2248,8 @@ CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms, return rc; } -int cifs_lockv(const int xid, struct cifs_tcon *tcon, const __u16 netfid, - const __u8 lock_type, const __u32 num_unlock, +int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u16 netfid, const __u8 lock_type, const __u32 num_unlock, const __u32 num_lock, LOCKING_ANDX_RANGE *buf) { int rc = 0; @@ -2286,7 +2290,7 @@ int cifs_lockv(const int xid, struct cifs_tcon *tcon, const __u16 netfid, } int -CIFSSMBLock(const int xid, struct cifs_tcon *tcon, +CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, const __u16 smb_file_id, const __u32 netpid, const __u64 len, const __u64 offset, const __u32 numUnlock, const __u32 numLock, const __u8 lockType, @@ -2356,7 +2360,7 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, } int -CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, +CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, const __u16 smb_file_id, const __u32 netpid, const loff_t start_offset, const __u64 len, struct file_lock *pLockData, const __u16 lock_type, @@ -2492,7 +2496,7 @@ plk_err_exit: int -CIFSSMBClose(const int xid, struct cifs_tcon *tcon, int smb_file_id) +CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; CLOSE_REQ *pSMB = NULL; @@ -2525,7 +2529,7 @@ CIFSSMBClose(const int xid, struct cifs_tcon *tcon, int smb_file_id) } int -CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, int smb_file_id) +CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; FLUSH_REQ *pSMB = NULL; @@ -2546,7 +2550,7 @@ CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, int smb_file_id) } int -CIFSSMBRename(const int xid, struct cifs_tcon *tcon, +CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap) { @@ -2613,7 +2617,7 @@ renameRetry: return rc; } -int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon, +int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, int remap) { @@ -2695,9 +2699,9 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon, } int -CIFSSMBCopy(const int xid, struct cifs_tcon *tcon, const char *fromName, - const __u16 target_tid, const char *toName, const int flags, - const struct nls_table *nls_codepage, int remap) +CIFSSMBCopy(const unsigned int xid, struct cifs_tcon *tcon, + const char *fromName, const __u16 target_tid, const char *toName, + const int flags, const struct nls_table *nls_codepage, int remap) { int rc = 0; COPY_REQ *pSMB = NULL; @@ -2763,7 +2767,7 @@ copyRetry: } int -CIFSUnixCreateSymLink(const int xid, struct cifs_tcon *tcon, +CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage) { @@ -2852,7 +2856,7 @@ createSymLinkRetry: } int -CIFSUnixCreateHardLink(const int xid, struct cifs_tcon *tcon, +CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap) { @@ -2937,7 +2941,7 @@ createHardLinkRetry: } int -CIFSCreateHardLink(const int xid, struct cifs_tcon *tcon, +CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap) { @@ -3009,7 +3013,7 @@ winCreateHardLinkRetry: } int -CIFSSMBUnixQuerySymLink(const int xid, struct cifs_tcon *tcon, +CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **symlinkinfo, const struct nls_table *nls_codepage) { @@ -3114,7 +3118,7 @@ querySymLinkRetry: * it is not compiled in by default until callers fixed up and more tested. */ int -CIFSSMBQueryReparseLinkInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char *symlinkinfo, const int buflen, __u16 fid, const struct nls_table *nls_codepage) @@ -3351,7 +3355,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, } int -CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon, +CIFSSMBGetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char *acl_inf, const int buflen, const int acl_type, const struct nls_table *nls_codepage, int remap) @@ -3440,7 +3444,7 @@ queryAclRetry: } int -CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon, +CIFSSMBSetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *fileName, const char *local_acl, const int buflen, const int acl_type, @@ -3520,7 +3524,7 @@ setACLerrorExit: /* BB fix tabs in this function FIXME BB */ int -CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon, +CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, const int netfid, __u64 *pExtAttrBits, __u64 *pMask) { int rc = 0; @@ -3695,7 +3699,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata, /* Get Security Descriptor (by handle) from remote server for a file or dir */ int -CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, +CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, struct cifs_ntsd **acl_inf, __u32 *pbuflen) { int rc = 0; @@ -3787,7 +3791,7 @@ qsec_out: } int -CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, +CIFSSMBSetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, struct cifs_ntsd *pntsd, __u32 acllen, int aclflag) { __u16 byte_count, param_count, data_count, param_offset, data_offset; @@ -3851,7 +3855,7 @@ setCifsAclRetry: /* Legacy Query Path Information call for lookup to old servers such as Win9x/WinME */ -int SMBQueryInformation(const int xid, struct cifs_tcon *tcon, +int SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_ALL_INFO *pFinfo, const struct nls_table *nls_codepage, int remap) @@ -3920,7 +3924,7 @@ QInfRetry: } int -CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_ALL_INFO *pFindData) { struct smb_t2_qfi_req *pSMB = NULL; @@ -3987,7 +3991,7 @@ QFileInfoRetry: } int -CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_ALL_INFO *pFindData, int legacy /* old style infolevel */, @@ -4088,7 +4092,7 @@ QPathInfoRetry: } int -CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_UNIX_BASIC_INFO *pFindData) { struct smb_t2_qfi_req *pSMB = NULL; @@ -4157,7 +4161,7 @@ UnixQFileInfoRetry: } int -CIFSSMBUnixQPathInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, FILE_UNIX_BASIC_INFO *pFindData, const struct nls_table *nls_codepage, int remap) @@ -4243,7 +4247,7 @@ UnixQPathInfoRetry: /* xid, tcon, searchName and codepage are input parms, rest are returned */ int -CIFSFindFirst(const int xid, struct cifs_tcon *tcon, +CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, const char *searchName, const struct nls_table *nls_codepage, __u16 *pnetfid, __u16 search_flags, @@ -4388,8 +4392,9 @@ findFirstRetry: return rc; } -int CIFSFindNext(const int xid, struct cifs_tcon *tcon, __u16 searchHandle, - __u16 search_flags, struct cifs_search_info *psrch_inf) +int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, + __u16 searchHandle, __u16 search_flags, + struct cifs_search_info *psrch_inf) { TRANSACTION2_FNEXT_REQ *pSMB = NULL; TRANSACTION2_FNEXT_RSP *pSMBr = NULL; @@ -4523,7 +4528,7 @@ FNext2_err_exit: } int -CIFSFindClose(const int xid, struct cifs_tcon *tcon, +CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, const __u16 searchHandle) { int rc = 0; @@ -4555,7 +4560,7 @@ CIFSFindClose(const int xid, struct cifs_tcon *tcon, } int -CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon, +CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, __u64 *inode_number, const struct nls_table *nls_codepage, int remap) @@ -4762,7 +4767,7 @@ parse_DFS_referrals_exit: } int -CIFSGetDFSRefer(const int xid, struct cifs_ses *ses, +CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, const unsigned char *searchName, struct dfs_info3_param **target_nodes, unsigned int *num_of_nodes, @@ -4877,7 +4882,8 @@ GetDFSRefExit: /* Query File System Info such as free space to old servers such as Win 9x */ int -SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData) +SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *FSData) { /* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */ TRANSACTION2_QFSI_REQ *pSMB = NULL; @@ -4956,7 +4962,8 @@ oldQFSInfoRetry: } int -CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData) +CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *FSData) { /* level 0x103 SMB_QUERY_FILE_SYSTEM_INFO */ TRANSACTION2_QFSI_REQ *pSMB = NULL; @@ -5035,7 +5042,7 @@ QFSInfoRetry: } int -CIFSSMBQFSAttributeInfo(const int xid, struct cifs_tcon *tcon) +CIFSSMBQFSAttributeInfo(const unsigned int xid, struct cifs_tcon *tcon) { /* level 0x105 SMB_QUERY_FILE_SYSTEM_INFO */ TRANSACTION2_QFSI_REQ *pSMB = NULL; @@ -5105,7 +5112,7 @@ QFSAttributeRetry: } int -CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon) +CIFSSMBQFSDeviceInfo(const unsigned int xid, struct cifs_tcon *tcon) { /* level 0x104 SMB_QUERY_FILE_SYSTEM_INFO */ TRANSACTION2_QFSI_REQ *pSMB = NULL; @@ -5176,7 +5183,7 @@ QFSDeviceRetry: } int -CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon) +CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon) { /* level 0x200 SMB_QUERY_CIFS_UNIX_INFO */ TRANSACTION2_QFSI_REQ *pSMB = NULL; @@ -5246,7 +5253,7 @@ QFSUnixRetry: } int -CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon, __u64 cap) +CIFSSMBSetFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon, __u64 cap) { /* level 0x200 SMB_SET_CIFS_UNIX_INFO */ TRANSACTION2_SETFSI_REQ *pSMB = NULL; @@ -5320,7 +5327,7 @@ SETFSUnixRetry: int -CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData) { /* level 0x201 SMB_QUERY_CIFS_POSIX_INFO */ @@ -5413,8 +5420,8 @@ QFSPosixRetry: in Samba which this routine can run into */ int -CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon, const char *fileName, - __u64 size, bool SetAllocation, +CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __u64 size, bool SetAllocation, const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; @@ -5502,7 +5509,7 @@ SetEOFRetry: } int -CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, __u64 size, +CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, __u16 fid, __u32 pid_of_opener, bool SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; @@ -5584,7 +5591,7 @@ CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, __u64 size, time and resort to the original setpathinfo level which takes the ancient DOS time format with 2 second granularity */ int -CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; @@ -5647,7 +5654,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon, } int -CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon, +CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, bool delete_file, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; @@ -5703,7 +5710,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon, } int -CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, int remap) { @@ -5787,7 +5794,7 @@ SetTimesRetry: handling it anyway and NT4 was what we thought it would be needed for Do not delete it until we prove whether needed for Win9x though */ int -CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon, char *fileName, +CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, char *fileName, __u16 dos_attrs, const struct nls_table *nls_codepage) { SETATTR_REQ *pSMB = NULL; @@ -5875,7 +5882,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, } int -CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, +CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, const struct cifs_unix_set_info_args *args, u16 fid, u32 pid_of_opener) { @@ -5939,7 +5946,8 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, } int -CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *tcon, char *fileName, +CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, + char *fileName, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, int remap) { @@ -6026,7 +6034,7 @@ setPermsRetry: * the data isn't copied to it, but the length is returned. */ ssize_t -CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon, +CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, size_t buf_size, const struct nls_table *nls_codepage, int remap) @@ -6209,8 +6217,8 @@ QAllEAsOut: } int -CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon, const char *fileName, - const char *ea_name, const void *ea_value, +CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, const char *ea_name, const void *ea_value, const __u16 ea_value_len, const struct nls_table *nls_codepage, int remap) { @@ -6336,7 +6344,7 @@ SetEARetry: * incompatible for network fs clients, we could instead simply * expose this config flag by adding a future cifs (and smb2) notify ioctl. */ -int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon, +int CIFSSMBNotify(const unsigned int xid, struct cifs_tcon *tcon, const int notify_subdirs, const __u16 netfid, __u32 filter, struct file *pfile, int multishot, const struct nls_table *nls_codepage) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fcf20d1b58b9..cfb7e7797642 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2267,9 +2267,9 @@ cifs_put_smb_ses(struct cifs_ses *ses) spin_unlock(&cifs_tcp_ses_lock); if (ses->status == CifsGood && server->ops->logoff) { - xid = GetXid(); + xid = get_xid(); server->ops->logoff(xid, ses); - _FreeXid(xid); + _free_xid(xid); } sesInfoFree(ses); cifs_put_tcp_session(server); @@ -2412,7 +2412,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; - xid = GetXid(); + xid = get_xid(); ses = cifs_find_smb_ses(server, volume_info); if (ses) { @@ -2424,7 +2424,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) mutex_unlock(&ses->session_mutex); /* problem -- put our ses reference */ cifs_put_smb_ses(ses); - FreeXid(xid); + free_xid(xid); return ERR_PTR(rc); } if (ses->need_reconnect) { @@ -2435,7 +2435,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) mutex_unlock(&ses->session_mutex); /* problem -- put our reference */ cifs_put_smb_ses(ses); - FreeXid(xid); + free_xid(xid); return ERR_PTR(rc); } } @@ -2443,7 +2443,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) /* existing SMB ses has a server reference already */ cifs_put_tcp_session(server); - FreeXid(xid); + free_xid(xid); return ses; } @@ -2502,12 +2502,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - FreeXid(xid); + free_xid(xid); return ses; get_ses_fail: sesInfoFree(ses); - FreeXid(xid); + free_xid(xid); return ERR_PTR(rc); } @@ -2555,10 +2555,10 @@ cifs_put_tcon(struct cifs_tcon *tcon) list_del_init(&tcon->tcon_list); spin_unlock(&cifs_tcp_ses_lock); - xid = GetXid(); + xid = get_xid(); if (ses->server->ops->tree_disconnect) ses->server->ops->tree_disconnect(xid, tcon); - _FreeXid(xid); + _free_xid(xid); cifs_fscache_release_super_cookie(tcon); tconInfoFree(tcon); @@ -2613,10 +2613,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) * BB Do we need to wrap session_mutex around this TCon call and Unix * SetFS as we do on SessSetup and reconnect? */ - xid = GetXid(); + xid = get_xid(); rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon, volume_info->local_nls); - FreeXid(xid); + free_xid(xid); cFYI(1, "Tcon rc = %d", rc); if (rc) goto out_fail; @@ -2764,7 +2764,7 @@ out: } int -get_dfs_path(int xid, struct cifs_ses *ses, const char *old_path, +get_dfs_path(unsigned int xid, struct cifs_ses *ses, const char *old_path, const struct nls_table *nls_codepage, unsigned int *num_referrals, struct dfs_info3_param **referrals, int remap) { @@ -3067,7 +3067,7 @@ ip_connect(struct TCP_Server_Info *server) return generic_ip_connect(server); } -void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, +void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info) { /* if we are reconnecting then should we check to see if @@ -3399,7 +3399,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) } static int -is_path_accessible(int xid, struct cifs_tcon *tcon, +is_path_accessible(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) { int rc; @@ -3485,7 +3485,7 @@ build_unc_path_to_root(const struct smb_vol *vol, * determine whether there were referrals. */ static int -expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, +expand_dfs_referral(unsigned int xid, struct cifs_ses *pSesInfo, struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb, int check_prefix) { @@ -3595,7 +3595,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { int rc; - int xid; + unsigned int xid; struct cifs_ses *pSesInfo; struct cifs_tcon *tcon; struct TCP_Server_Info *srvTcp; @@ -3618,7 +3618,7 @@ try_mount_again: else if (pSesInfo) cifs_put_smb_ses(pSesInfo); - FreeXid(xid); + free_xid(xid); } #endif rc = 0; @@ -3628,7 +3628,7 @@ try_mount_again: full_path = NULL; tlink = NULL; - xid = GetXid(); + xid = get_xid(); /* get a reference to a tcp session */ srvTcp = cifs_get_tcp_session(volume_info); @@ -3781,7 +3781,7 @@ mount_fail_check: } out: - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a180265a10b5..d364654491e3 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -157,10 +157,10 @@ check_name(struct dentry *direntry) /* Inode operations in similar order to how they appear in Linux file fs.h */ -static int cifs_do_create(struct inode *inode, struct dentry *direntry, - int xid, struct tcon_link *tlink, unsigned oflags, - umode_t mode, __u32 *oplock, __u16 *fileHandle, - int *created) +static int +cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, + struct tcon_link *tlink, unsigned oflags, umode_t mode, + __u32 *oplock, __u16 *fileHandle, int *created) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; @@ -382,7 +382,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, int *opened) { int rc; - int xid; + unsigned int xid; struct tcon_link *tlink; struct cifs_tcon *tcon; __u16 fileHandle; @@ -412,7 +412,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (rc) return rc; - xid = GetXid(); + xid = get_xid(); cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", inode, direntry->d_name.name, direntry); @@ -420,7 +420,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); filp = ERR_CAST(tlink); if (IS_ERR(tlink)) - goto free_xid; + goto out_free_xid; tcon = tlink_tcon(tlink); @@ -445,8 +445,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, out: cifs_put_tlink(tlink); -free_xid: - FreeXid(xid); +out_free_xid: + free_xid(xid); return rc; } @@ -454,7 +454,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, bool excl) { int rc; - int xid = GetXid(); + unsigned int xid = get_xid(); /* * BB below access is probably too much for mknod to request * but we have to do query and setpathinfo so requesting @@ -474,7 +474,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); if (IS_ERR(tlink)) - goto free_xid; + goto out_free_xid; rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fileHandle, &created); @@ -482,9 +482,8 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); cifs_put_tlink(tlink); -free_xid: - FreeXid(xid); - +out_free_xid: + free_xid(xid); return rc; } @@ -492,7 +491,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, dev_t device_number) { int rc = -EPERM; - int xid; + unsigned int xid; int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; @@ -516,7 +515,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -564,7 +563,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, if (buf == NULL) { kfree(full_path); rc = -ENOMEM; - FreeXid(xid); + free_xid(xid); return rc; } @@ -614,7 +613,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, mknod_out: kfree(full_path); kfree(buf); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -623,7 +622,7 @@ struct dentry * cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, unsigned int flags) { - int xid; + unsigned int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; @@ -631,7 +630,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct inode *newInode = NULL; char *full_path = NULL; - xid = GetXid(); + xid = get_xid(); cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", parent_dir_inode, direntry->d_name.name, direntry); @@ -641,7 +640,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, cifs_sb = CIFS_SB(parent_dir_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { - FreeXid(xid); + free_xid(xid); return (struct dentry *)tlink; } pTcon = tlink_tcon(tlink); @@ -695,7 +694,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, lookup_out: kfree(full_path); cifs_put_tlink(tlink); - FreeXid(xid); + free_xid(xid); return ERR_PTR(rc); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e9a8ac0047c7..93b3b1358409 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -107,7 +107,7 @@ static inline int cifs_get_disposition(unsigned int flags) int cifs_posix_open(char *full_path, struct inode **pinode, struct super_block *sb, int mode, unsigned int f_flags, - __u32 *poplock, __u16 *pnetfid, int xid) + __u32 *poplock, __u16 *pnetfid, unsigned int xid) { int rc; FILE_UNIX_BASIC_INFO *presp_data; @@ -170,7 +170,7 @@ posix_open_ret: static int cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock, - __u16 *pnetfid, int xid) + __u16 *pnetfid, unsigned int xid) { int rc; int desiredAccess; @@ -324,11 +324,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) cancel_work_sync(&cifs_file->oplock_break); if (!tcon->need_reconnect && !cifs_file->invalidHandle) { - int xid, rc; - - xid = GetXid(); + unsigned int xid; + int rc; + xid = get_xid(); rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); - FreeXid(xid); + free_xid(xid); } /* Delete any outstanding lock records. We'll lose them when the file @@ -350,7 +350,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) int cifs_open(struct inode *inode, struct file *file) { int rc = -EACCES; - int xid; + unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; @@ -360,12 +360,12 @@ int cifs_open(struct inode *inode, struct file *file) bool posix_open_ok = false; __u16 netfid; - xid = GetXid(); + xid = get_xid(); cifs_sb = CIFS_SB(inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { - FreeXid(xid); + free_xid(xid); return PTR_ERR(tlink); } tcon = tlink_tcon(tlink); @@ -445,7 +445,7 @@ int cifs_open(struct inode *inode, struct file *file) out: kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -464,7 +464,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) { int rc = -EACCES; - int xid; + unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; @@ -476,12 +476,12 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) int create_options = CREATE_NOT_DIR; __u16 netfid; - xid = GetXid(); + xid = get_xid(); mutex_lock(&pCifsFile->fh_mutex); if (!pCifsFile->invalidHandle) { mutex_unlock(&pCifsFile->fh_mutex); rc = 0; - FreeXid(xid); + free_xid(xid); return rc; } @@ -497,7 +497,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) if (full_path == NULL) { rc = -ENOMEM; mutex_unlock(&pCifsFile->fh_mutex); - FreeXid(xid); + free_xid(xid); return rc; } @@ -583,7 +583,7 @@ reopen_success: reopen_error_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); return rc; } @@ -601,13 +601,13 @@ int cifs_close(struct inode *inode, struct file *file) int cifs_closedir(struct inode *inode, struct file *file) { int rc = 0; - int xid; + unsigned int xid; struct cifsFileInfo *pCFileStruct = file->private_data; char *ptmp; cFYI(1, "Closedir inode = 0x%p", inode); - xid = GetXid(); + xid = get_xid(); if (pCFileStruct) { struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink); @@ -639,7 +639,7 @@ int cifs_closedir(struct inode *inode, struct file *file) file->private_data = NULL; } /* BB can we lock the filestruct while this is going on? */ - FreeXid(xid); + free_xid(xid); return rc; } @@ -872,7 +872,8 @@ try_again: static int cifs_push_mandatory_locks(struct cifsFileInfo *cfile) { - int xid, rc = 0, stored_rc; + unsigned int xid; + int rc = 0, stored_rc; struct cifsLockInfo *li, *tmp; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); @@ -882,13 +883,13 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; int i; - xid = GetXid(); + xid = get_xid(); tcon = tlink_tcon(cfile->tlink); mutex_lock(&cinode->lock_mutex); if (!cinode->can_cache_brlcks) { mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); + free_xid(xid); return rc; } @@ -899,7 +900,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) max_buf = tcon->ses->server->maxBuf; if (!max_buf) { mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); + free_xid(xid); return -EINVAL; } @@ -908,7 +909,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); + free_xid(xid); return rc; } @@ -947,7 +948,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) mutex_unlock(&cinode->lock_mutex); kfree(buf); - FreeXid(xid); + free_xid(xid); return rc; } @@ -977,12 +978,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) struct lock_to_push *lck, *tmp; __u64 length; - xid = GetXid(); + xid = get_xid(); mutex_lock(&cinode->lock_mutex); if (!cinode->can_cache_brlcks) { mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); + free_xid(xid); return rc; } @@ -1054,7 +1055,7 @@ out: cinode->can_cache_brlcks = false; mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); + free_xid(xid); return rc; err_out: list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { @@ -1126,7 +1127,7 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, } static int -cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset, +cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, __u64 length, __u32 type, int lock, int unlock, bool wait) { return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid, @@ -1136,7 +1137,7 @@ cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset, static int cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, - bool wait_flag, bool posix_lck, int xid) + bool wait_flag, bool posix_lck, unsigned int xid) { int rc = 0; __u64 length = 1 + flock->fl_end - flock->fl_start; @@ -1221,7 +1222,8 @@ cifs_free_llist(struct list_head *llist) } static int -cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) +cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, + unsigned int xid) { int rc = 0, stored_rc; int types[] = {LOCKING_ANDX_LARGE_FILES, @@ -1326,7 +1328,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) static int cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, - bool wait_flag, bool posix_lck, int lock, int unlock, int xid) + bool wait_flag, bool posix_lck, int lock, int unlock, + unsigned int xid) { int rc = 0; __u64 length = 1 + flock->fl_end - flock->fl_start; @@ -1400,7 +1403,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) __u32 type; rc = -EACCES; - xid = GetXid(); + xid = get_xid(); cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld " "end: %lld", cmd, flock->fl_flags, flock->fl_type, @@ -1426,7 +1429,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) */ if (IS_GETLK(cmd)) { rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); - FreeXid(xid); + free_xid(xid); return rc; } @@ -1435,13 +1438,13 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) * if no lock or unlock then nothing to do since we do not * know what it is */ - FreeXid(xid); + free_xid(xid); return -EOPNOTSUPP; } rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, xid); - FreeXid(xid); + free_xid(xid); return rc; } @@ -1468,7 +1471,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, unsigned int total_written; struct cifs_sb_info *cifs_sb; struct cifs_tcon *pTcon; - int xid; + unsigned int xid; struct dentry *dentry = open_file->dentry; struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); struct cifs_io_parms io_parms; @@ -1480,7 +1483,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, pTcon = tlink_tcon(open_file->tlink); - xid = GetXid(); + xid = get_xid(); for (total_written = 0; write_size > total_written; total_written += bytes_written) { @@ -1516,7 +1519,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, if (total_written) break; else { - FreeXid(xid); + free_xid(xid); return rc; } } else { @@ -1536,7 +1539,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, spin_unlock(&dentry->d_inode->i_lock); } mark_inode_dirty_sync(dentry->d_inode); - FreeXid(xid); + free_xid(xid); return total_written; } @@ -1935,9 +1938,9 @@ static int cifs_writepage_locked(struct page *page, struct writeback_control *wbc) { int rc; - int xid; + unsigned int xid; - xid = GetXid(); + xid = get_xid(); /* BB add check for wbc flags */ page_cache_get(page); if (!PageUptodate(page)) @@ -1966,7 +1969,7 @@ retry_write: SetPageUptodate(page); end_page_writeback(page); page_cache_release(page); - FreeXid(xid); + free_xid(xid); return rc; } @@ -2005,9 +2008,9 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { char *page_data; unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - int xid; + unsigned int xid; - xid = GetXid(); + xid = get_xid(); /* this is probably better than directly calling partialpage_write since in this function the file handle is known which we might as well leverage */ @@ -2018,7 +2021,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, /* if (rc < 0) should we set writebehind rc? */ kunmap(page); - FreeXid(xid); + free_xid(xid); } else { rc = copied; pos += copied; @@ -2041,7 +2044,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int xid; + unsigned int xid; int rc = 0; struct cifs_tcon *tcon; struct cifsFileInfo *smbfile = file->private_data; @@ -2053,7 +2056,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, return rc; mutex_lock(&inode->i_mutex); - xid = GetXid(); + xid = get_xid(); cFYI(1, "Sync file - name: %s datasync: 0x%x", file->f_path.dentry->d_name.name, datasync); @@ -2070,14 +2073,14 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); - FreeXid(xid); + free_xid(xid); mutex_unlock(&inode->i_mutex); return rc; } int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int xid; + unsigned int xid; int rc = 0; struct cifs_tcon *tcon; struct cifsFileInfo *smbfile = file->private_data; @@ -2089,7 +2092,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return rc; mutex_lock(&inode->i_mutex); - xid = GetXid(); + xid = get_xid(); cFYI(1, "Sync file - name: %s datasync: 0x%x", file->f_path.dentry->d_name.name, datasync); @@ -2098,7 +2101,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); - FreeXid(xid); + free_xid(xid); mutex_unlock(&inode->i_mutex); return rc; } @@ -2743,14 +2746,14 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, unsigned int rsize; struct cifs_sb_info *cifs_sb; struct cifs_tcon *pTcon; - int xid; + unsigned int xid; char *current_offset; struct cifsFileInfo *open_file; struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; __u32 pid; - xid = GetXid(); + xid = get_xid(); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); /* FIXME: set up handlers for larger reads and/or convert to async */ @@ -2758,7 +2761,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if (file->private_data == NULL) { rc = -EBADF; - FreeXid(xid); + free_xid(xid); return rc; } open_file = file->private_data; @@ -2803,7 +2806,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if (total_read) { break; } else { - FreeXid(xid); + free_xid(xid); return rc; } } else { @@ -2811,7 +2814,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, *poffset += bytes_read; } } - FreeXid(xid); + free_xid(xid); return total_read; } @@ -2838,7 +2841,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; struct inode *inode = file->f_path.dentry->d_inode; - xid = GetXid(); + xid = get_xid(); if (!CIFS_I(inode)->clientCanCacheRead) { rc = cifs_invalidate_mapping(inode); @@ -2849,7 +2852,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) rc = generic_file_mmap(file, vma); if (rc == 0) vma->vm_ops = &cifs_file_vm_ops; - FreeXid(xid); + free_xid(xid); return rc; } @@ -2857,17 +2860,17 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) { int rc, xid; - xid = GetXid(); + xid = get_xid(); rc = cifs_revalidate_file(file); if (rc) { cFYI(1, "Validation prior to mmap failed, error=%d", rc); - FreeXid(xid); + free_xid(xid); return rc; } rc = generic_file_mmap(file, vma); if (rc == 0) vma->vm_ops = &cifs_file_vm_ops; - FreeXid(xid); + free_xid(xid); return rc; } @@ -3157,13 +3160,13 @@ static int cifs_readpage(struct file *file, struct page *page) { loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; int rc = -EACCES; - int xid; + unsigned int xid; - xid = GetXid(); + xid = get_xid(); if (file->private_data == NULL) { rc = -EBADF; - FreeXid(xid); + free_xid(xid); return rc; } @@ -3174,7 +3177,7 @@ static int cifs_readpage(struct file *file, struct page *page) unlock_page(page); - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8e8bb49112ff..af902864ac03 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -289,7 +289,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) int cifs_get_file_info_unix(struct file *filp) { int rc; - int xid; + unsigned int xid; FILE_UNIX_BASIC_INFO find_data; struct cifs_fattr fattr; struct inode *inode = filp->f_path.dentry->d_inode; @@ -297,7 +297,7 @@ int cifs_get_file_info_unix(struct file *filp) struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - xid = GetXid(); + xid = get_xid(); rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); if (!rc) { cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); @@ -307,13 +307,13 @@ int cifs_get_file_info_unix(struct file *filp) } cifs_fattr_to_inode(inode, &fattr); - FreeXid(xid); + free_xid(xid); return rc; } int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *full_path, - struct super_block *sb, int xid) + struct super_block *sb, unsigned int xid) { int rc; FILE_UNIX_BASIC_INFO find_data; @@ -367,7 +367,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, static int cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) + struct cifs_sb_info *cifs_sb, unsigned int xid) { int rc; int oplock = 0; @@ -466,7 +466,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ? */ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) + struct cifs_sb_info *cifs_sb, unsigned int xid) { #ifdef CONFIG_CIFS_XATTR ssize_t rc; @@ -557,7 +557,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, int cifs_get_file_info(struct file *filp) { int rc; - int xid; + unsigned int xid; FILE_ALL_INFO find_data; struct cifs_fattr fattr; struct inode *inode = filp->f_path.dentry->d_inode; @@ -565,7 +565,7 @@ int cifs_get_file_info(struct file *filp) struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - xid = GetXid(); + xid = get_xid(); rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); switch (rc) { case 0: @@ -596,13 +596,13 @@ int cifs_get_file_info(struct file *filp) fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; cifs_fattr_to_inode(inode, &fattr); cgfi_exit: - FreeXid(xid); + free_xid(xid); return rc; } int cifs_get_inode_info(struct inode **pinode, const unsigned char *full_path, FILE_ALL_INFO *pfindData, - struct super_block *sb, int xid, const __u16 *pfid) + struct super_block *sb, unsigned int xid, const __u16 *pfid) { int rc = 0, tmprc; struct cifs_tcon *pTcon; @@ -886,13 +886,13 @@ retry_iget5_locked: /* gets root inode */ struct inode *cifs_root_iget(struct super_block *sb) { - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct inode *inode = NULL; long rc; struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - xid = GetXid(); + xid = get_xid(); if (tcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, "", sb, xid); else @@ -922,15 +922,15 @@ struct inode *cifs_root_iget(struct super_block *sb) } out: - /* can not call macro FreeXid here since in a void func + /* can not call macro free_xid here since in a void func * TODO: This is no longer true */ - _FreeXid(xid); + _free_xid(xid); return inode; } static int -cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, +cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, char *full_path, __u32 dosattr) { int rc; @@ -1051,7 +1051,8 @@ out: * anything else. */ static int -cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid) +cifs_rename_pending_delete(char *full_path, struct dentry *dentry, + unsigned int xid) { int oplock = 0; int rc; @@ -1171,7 +1172,7 @@ undo_setattr: int cifs_unlink(struct inode *dir, struct dentry *dentry) { int rc = 0; - int xid; + unsigned int xid; char *full_path = NULL; struct inode *inode = dentry->d_inode; struct cifsInodeInfo *cifs_inode; @@ -1189,7 +1190,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); /* Unlink can be called from rename so we can not take the * sb->s_vfs_rename_mutex here */ @@ -1265,7 +1266,7 @@ out_reval: unlink_out: kfree(full_path); kfree(attrs); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -1273,7 +1274,7 @@ unlink_out: int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) { int rc = 0, tmprc; - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -1289,7 +1290,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -1446,7 +1447,7 @@ mkdir_out: */ CIFS_I(inode)->time = 0; kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -1454,7 +1455,7 @@ mkdir_out: int cifs_rmdir(struct inode *inode, struct dentry *direntry) { int rc = 0; - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -1463,7 +1464,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cFYI(1, "cifs_rmdir, inode = 0x%p", inode); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -1506,13 +1507,14 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) rmdir_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); return rc; } static int -cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, - struct dentry *to_dentry, const char *toPath) +cifs_do_rename(unsigned int xid, struct dentry *from_dentry, + const char *fromPath, struct dentry *to_dentry, + const char *toPath) { struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); struct tcon_link *tlink; @@ -1571,7 +1573,8 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, struct cifs_tcon *tcon; FILE_UNIX_BASIC_INFO *info_buf_source = NULL; FILE_UNIX_BASIC_INFO *info_buf_target; - int xid, rc, tmprc; + unsigned int xid; + int rc, tmprc; cifs_sb = CIFS_SB(source_dir->i_sb); tlink = cifs_sb_tlink(cifs_sb); @@ -1579,7 +1582,7 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); /* * we already have the rename sem so we do not need to @@ -1652,7 +1655,7 @@ cifs_rename_exit: kfree(info_buf_source); kfree(fromName); kfree(toName); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -1727,7 +1730,7 @@ int cifs_revalidate_file_attr(struct file *filp) int cifs_revalidate_dentry_attr(struct dentry *dentry) { - int xid; + unsigned int xid; int rc = 0; struct inode *inode = dentry->d_inode; struct super_block *sb = dentry->d_sb; @@ -1739,7 +1742,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) if (!cifs_inode_needs_reval(inode)) return rc; - xid = GetXid(); + xid = get_xid(); /* can not safely grab the rename sem here if rename calls revalidate since that would deadlock */ @@ -1761,7 +1764,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) out: kfree(full_path); - FreeXid(xid); + free_xid(xid); return rc; } @@ -1869,7 +1872,7 @@ static void cifs_setsize(struct inode *inode, loff_t offset) static int cifs_set_file_size(struct inode *inode, struct iattr *attrs, - int xid, char *full_path) + unsigned int xid, char *full_path) { int rc; struct cifsFileInfo *open_file; @@ -1971,7 +1974,7 @@ static int cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) { int rc; - int xid; + unsigned int xid; char *full_path = NULL; struct inode *inode = direntry->d_inode; struct cifsInodeInfo *cifsInode = CIFS_I(inode); @@ -1984,7 +1987,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) cFYI(1, "setattr_unix on file %s attrs->ia_valid=0x%x", direntry->d_name.name, attrs->ia_valid); - xid = GetXid(); + xid = get_xid(); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; @@ -2104,14 +2107,14 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) out: kfree(args); kfree(full_path); - FreeXid(xid); + free_xid(xid); return rc; } static int cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) { - int xid; + unsigned int xid; uid_t uid = NO_CHANGE_32; gid_t gid = NO_CHANGE_32; struct inode *inode = direntry->d_inode; @@ -2122,7 +2125,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) __u32 dosattr = 0; __u64 mode = NO_CHANGE_64; - xid = GetXid(); + xid = get_xid(); cFYI(1, "setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid); @@ -2132,14 +2135,14 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = inode_change_ok(inode, attrs); if (rc < 0) { - FreeXid(xid); + free_xid(xid); return rc; } full_path = build_path_from_dentry(direntry); if (full_path == NULL) { rc = -ENOMEM; - FreeXid(xid); + free_xid(xid); return rc; } @@ -2265,7 +2268,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) cifs_setattr_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 6d2667f0c98c..ae082a66de2f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -34,7 +34,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = filep->f_dentry->d_inode; int rc = -ENOTTY; /* strange error - but the precedent */ - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; #ifdef CONFIG_CIFS_POSIX struct cifsFileInfo *pSMBFile = filep->private_data; @@ -44,7 +44,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) __u64 caps; #endif /* CONFIG_CIFS_POSIX */ - xid = GetXid(); + xid = get_xid(); cFYI(1, "ioctl file %p cmd %u arg %lu", filep, command, arg); @@ -105,6 +105,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) break; } - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 90d8add2a2a9..f78971511f57 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -181,7 +181,7 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) } static int -CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon, +CIFSCreateMFSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, struct cifs_sb_info *cifs_sb) { @@ -238,7 +238,7 @@ CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon, } static int -CIFSQueryMFSymLink(const int xid, struct cifs_tcon *tcon, +CIFSQueryMFSymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **symlinkinfo, const struct nls_table *nls_codepage, int remap) { @@ -307,7 +307,7 @@ CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr) int CIFSCheckMFSymlink(struct cifs_fattr *fattr, const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) + struct cifs_sb_info *cifs_sb, unsigned int xid) { int rc; int oplock = 0; @@ -390,7 +390,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, struct dentry *direntry) { int rc = -EACCES; - int xid; + unsigned int xid; char *fromName = NULL; char *toName = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -403,7 +403,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); fromName = build_path_from_dentry(old_file); toName = build_path_from_dentry(direntry); @@ -455,7 +455,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, cifs_hl_exit: kfree(fromName); kfree(toName); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); return rc; } @@ -465,14 +465,14 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) { struct inode *inode = direntry->d_inode; int rc = -ENOMEM; - int xid; + unsigned int xid; char *full_path = NULL; char *target_path = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; - xid = GetXid(); + xid = get_xid(); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { @@ -529,7 +529,7 @@ out: target_path = ERR_PTR(rc); } - FreeXid(xid); + free_xid(xid); if (tlink) cifs_put_tlink(tlink); nd_set_link(nd, target_path); @@ -540,14 +540,14 @@ int cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) { int rc = -EOPNOTSUPP; - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink; struct cifs_tcon *pTcon; char *full_path = NULL; struct inode *newinode = NULL; - xid = GetXid(); + xid = get_xid(); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { @@ -594,7 +594,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) symlink_exit: kfree(full_path); cifs_put_tlink(tlink); - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 557506ae1e2a..64601146f157 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -40,7 +40,7 @@ extern mempool_t *cifs_req_poolp; since the cifs fs was mounted */ unsigned int -_GetXid(void) +_get_xid(void) { unsigned int xid; @@ -58,7 +58,7 @@ _GetXid(void) } void -_FreeXid(unsigned int xid) +_free_xid(unsigned int xid) { spin_lock(&GlobalMid_Lock); /* if (GlobalTotalActiveXid == 0) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a4217f02fab2..da30d96a7495 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -193,7 +193,7 @@ cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, we try to do FindFirst on (NTFS) directory symlinks */ /* int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, - int xid) + unsigned int xid) { __u16 fid; int len; @@ -220,7 +220,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, } */ -static int initiate_cifs_search(const int xid, struct file *file) +static int initiate_cifs_search(const unsigned int xid, struct file *file) { __u16 search_flags; int rc = 0; @@ -507,7 +507,7 @@ static int cifs_save_resume_key(const char *current_entry, assume that they are located in the findfirst return buffer.*/ /* We start counting in the buffer with entry 2 and increment for every entry (do not increment for . or .. entry) */ -static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon, +static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, struct file *file, char **ppCurrentEntry, int *num_to_ret) { __u16 search_flags; @@ -721,7 +721,8 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) { int rc = 0; - int xid, i; + unsigned int xid; + int i; struct cifs_tcon *pTcon; struct cifsFileInfo *cifsFile = NULL; char *current_entry; @@ -730,7 +731,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) char *end_of_smb; unsigned int max_len; - xid = GetXid(); + xid = get_xid(); /* * Ensure FindFirst doesn't fail before doing filldir() for '.' and @@ -768,7 +769,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) if (file->private_data == NULL) { rc = -EINVAL; - FreeXid(xid); + free_xid(xid); return rc; } cifsFile = file->private_data; @@ -840,6 +841,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } /* end switch */ rddir2_exit: - FreeXid(xid); + free_xid(xid); return rc; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 10d92cf57ab6..5142f2c60278 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -39,7 +39,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) { int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -60,7 +60,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -88,7 +88,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) } remove_ea_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); #endif return rc; @@ -99,7 +99,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, { int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -120,7 +120,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -221,7 +221,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, set_ea_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); #endif return rc; @@ -232,7 +232,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -253,7 +253,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -355,7 +355,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, get_ea_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); #endif return rc; @@ -365,7 +365,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR - int xid; + unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; @@ -389,7 +389,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) return PTR_ERR(tlink); pTcon = tlink_tcon(tlink); - xid = GetXid(); + xid = get_xid(); full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -409,7 +409,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) list_ea_exit: kfree(full_path); - FreeXid(xid); + free_xid(xid); cifs_put_tlink(tlink); #endif return rc; -- cgit v1.2.3 From f7ec0d0bbc5cff9edd649e6891e7e2efcb94c038 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 20 Jun 2012 17:50:01 +0400 Subject: CIFS: Rename 7 error codes to NT_ style and consider such codes as CIFS errors. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/nterr.c | 6 +++--- fs/cifs/nterr.h | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/cifs/nterr.c b/fs/cifs/nterr.c index 819fd994b121..b6023c646123 100644 --- a/fs/cifs/nterr.c +++ b/fs/cifs/nterr.c @@ -31,7 +31,7 @@ const struct nt_err_code_struct nt_errs[] = { {"NT_STATUS_INVALID_INFO_CLASS", NT_STATUS_INVALID_INFO_CLASS}, {"NT_STATUS_INFO_LENGTH_MISMATCH", NT_STATUS_INFO_LENGTH_MISMATCH}, {"NT_STATUS_ACCESS_VIOLATION", NT_STATUS_ACCESS_VIOLATION}, - {"STATUS_BUFFER_OVERFLOW", STATUS_BUFFER_OVERFLOW}, + {"NT_STATUS_BUFFER_OVERFLOW", NT_STATUS_BUFFER_OVERFLOW}, {"NT_STATUS_IN_PAGE_ERROR", NT_STATUS_IN_PAGE_ERROR}, {"NT_STATUS_PAGEFILE_QUOTA", NT_STATUS_PAGEFILE_QUOTA}, {"NT_STATUS_INVALID_HANDLE", NT_STATUS_INVALID_HANDLE}, @@ -681,7 +681,7 @@ const struct nt_err_code_struct nt_errs[] = { NT_STATUS_QUOTA_LIST_INCONSISTENT}, {"NT_STATUS_FILE_IS_OFFLINE", NT_STATUS_FILE_IS_OFFLINE}, {"NT_STATUS_NO_MORE_ENTRIES", NT_STATUS_NO_MORE_ENTRIES}, - {"STATUS_MORE_ENTRIES", STATUS_MORE_ENTRIES}, - {"STATUS_SOME_UNMAPPED", STATUS_SOME_UNMAPPED}, + {"NT_STATUS_MORE_ENTRIES", NT_STATUS_MORE_ENTRIES}, + {"NT_STATUS_SOME_UNMAPPED", NT_STATUS_SOME_UNMAPPED}, {NULL, 0} }; diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h index 257267367d41..7a0eae5ae7c9 100644 --- a/fs/cifs/nterr.h +++ b/fs/cifs/nterr.h @@ -35,18 +35,20 @@ struct nt_err_code_struct { extern const struct nt_err_code_struct nt_errs[]; /* Win32 Status codes. */ -#define STATUS_MORE_ENTRIES 0x0105 -#define ERROR_INVALID_PARAMETER 0x0057 -#define ERROR_INSUFFICIENT_BUFFER 0x007a -#define STATUS_1804 0x070c -#define STATUS_NOTIFY_ENUM_DIR 0x010c +#define NT_STATUS_MORE_ENTRIES 0x0105 +#define NT_ERROR_INVALID_PARAMETER 0x0057 +#define NT_ERROR_INSUFFICIENT_BUFFER 0x007a +#define NT_STATUS_1804 0x070c +#define NT_STATUS_NOTIFY_ENUM_DIR 0x010c -/* Win32 Error codes extracted using a loop in smbclient then printing a - netmon sniff to a file. */ +/* + * Win32 Error codes extracted using a loop in smbclient then printing a netmon + * sniff to a file. + */ -#define NT_STATUS_OK 0x0000 -#define STATUS_SOME_UNMAPPED 0x0107 -#define STATUS_BUFFER_OVERFLOW 0x80000005 +#define NT_STATUS_OK 0x0000 +#define NT_STATUS_SOME_UNMAPPED 0x0107 +#define NT_STATUS_BUFFER_OVERFLOW 0x80000005 #define NT_STATUS_NO_MORE_ENTRIES 0x8000001a #define NT_STATUS_MEDIA_CHANGED 0x8000001c #define NT_STATUS_END_OF_MEDIA 0x8000001e -- cgit v1.2.3 From b8030603d94a231c7fdee4c1ac369537f1fb8fb0 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 25 Dec 2011 13:27:35 +0400 Subject: CIFS: Add SMB2 status codes Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2status.h | 1782 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1782 insertions(+) create mode 100644 fs/cifs/smb2status.h (limited to 'fs') diff --git a/fs/cifs/smb2status.h b/fs/cifs/smb2status.h new file mode 100644 index 000000000000..3d5f62150de4 --- /dev/null +++ b/fs/cifs/smb2status.h @@ -0,0 +1,1782 @@ +/* + * fs/cifs/smb2status.h + * + * SMB2 Status code (network error) definitions + * Definitions are from MS-ERREF + * + * Copyright (c) International Business Machines Corp., 2009,2011 + * Author(s): Steve French (sfrench@us.ibm.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * 0 1 2 3 4 5 6 7 8 9 0 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F + * SEV C N <-------Facility--------> <------Error Status Code------> + * + * C is set if "customer defined" error, N bit is reserved and MBZ + */ + +#define STATUS_SEVERITY_SUCCESS __constant_cpu_to_le32(0x0000) +#define STATUS_SEVERITY_INFORMATIONAL __constanst_cpu_to_le32(0x0001) +#define STATUS_SEVERITY_WARNING __constanst_cpu_to_le32(0x0002) +#define STATUS_SEVERITY_ERROR __constanst_cpu_to_le32(0x0003) + +struct ntstatus { + /* Facility is the high 12 bits of the following field */ + __le32 Facility; /* low 2 bits Severity, next is Customer, then rsrvd */ + __le32 Code; +}; + +#define STATUS_SUCCESS __constant_cpu_to_le32(0x00000000) +#define STATUS_WAIT_0 __constant_cpu_to_le32(0x00000000) +#define STATUS_WAIT_1 __constant_cpu_to_le32(0x00000001) +#define STATUS_WAIT_2 __constant_cpu_to_le32(0x00000002) +#define STATUS_WAIT_3 __constant_cpu_to_le32(0x00000003) +#define STATUS_WAIT_63 __constant_cpu_to_le32(0x0000003F) +#define STATUS_ABANDONED __constant_cpu_to_le32(0x00000080) +#define STATUS_ABANDONED_WAIT_0 __constant_cpu_to_le32(0x00000080) +#define STATUS_ABANDONED_WAIT_63 __constant_cpu_to_le32(0x000000BF) +#define STATUS_USER_APC __constant_cpu_to_le32(0x000000C0) +#define STATUS_KERNEL_APC __constant_cpu_to_le32(0x00000100) +#define STATUS_ALERTED __constant_cpu_to_le32(0x00000101) +#define STATUS_TIMEOUT __constant_cpu_to_le32(0x00000102) +#define STATUS_PENDING __constant_cpu_to_le32(0x00000103) +#define STATUS_REPARSE __constant_cpu_to_le32(0x00000104) +#define STATUS_MORE_ENTRIES __constant_cpu_to_le32(0x00000105) +#define STATUS_NOT_ALL_ASSIGNED __constant_cpu_to_le32(0x00000106) +#define STATUS_SOME_NOT_MAPPED __constant_cpu_to_le32(0x00000107) +#define STATUS_OPLOCK_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x00000108) +#define STATUS_VOLUME_MOUNTED __constant_cpu_to_le32(0x00000109) +#define STATUS_RXACT_COMMITTED __constant_cpu_to_le32(0x0000010A) +#define STATUS_NOTIFY_CLEANUP __constant_cpu_to_le32(0x0000010B) +#define STATUS_NOTIFY_ENUM_DIR __constant_cpu_to_le32(0x0000010C) +#define STATUS_NO_QUOTAS_FOR_ACCOUNT __constant_cpu_to_le32(0x0000010D) +#define STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED __constant_cpu_to_le32(0x0000010E) +#define STATUS_PAGE_FAULT_TRANSITION __constant_cpu_to_le32(0x00000110) +#define STATUS_PAGE_FAULT_DEMAND_ZERO __constant_cpu_to_le32(0x00000111) +#define STATUS_PAGE_FAULT_COPY_ON_WRITE __constant_cpu_to_le32(0x00000112) +#define STATUS_PAGE_FAULT_GUARD_PAGE __constant_cpu_to_le32(0x00000113) +#define STATUS_PAGE_FAULT_PAGING_FILE __constant_cpu_to_le32(0x00000114) +#define STATUS_CACHE_PAGE_LOCKED __constant_cpu_to_le32(0x00000115) +#define STATUS_CRASH_DUMP __constant_cpu_to_le32(0x00000116) +#define STATUS_BUFFER_ALL_ZEROS __constant_cpu_to_le32(0x00000117) +#define STATUS_REPARSE_OBJECT __constant_cpu_to_le32(0x00000118) +#define STATUS_RESOURCE_REQUIREMENTS_CHANGED __constant_cpu_to_le32(0x00000119) +#define STATUS_TRANSLATION_COMPLETE __constant_cpu_to_le32(0x00000120) +#define STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY __constant_cpu_to_le32(0x00000121) +#define STATUS_NOTHING_TO_TERMINATE __constant_cpu_to_le32(0x00000122) +#define STATUS_PROCESS_NOT_IN_JOB __constant_cpu_to_le32(0x00000123) +#define STATUS_PROCESS_IN_JOB __constant_cpu_to_le32(0x00000124) +#define STATUS_VOLSNAP_HIBERNATE_READY __constant_cpu_to_le32(0x00000125) +#define STATUS_FSFILTER_OP_COMPLETED_SUCCESSFULLY __constant_cpu_to_le32(0x00000126) +#define STATUS_INTERRUPT_VECTOR_ALREADY_CONNECTED __constant_cpu_to_le32(0x00000127) +#define STATUS_INTERRUPT_STILL_CONNECTED __constant_cpu_to_le32(0x00000128) +#define STATUS_PROCESS_CLONED __constant_cpu_to_le32(0x00000129) +#define STATUS_FILE_LOCKED_WITH_ONLY_READERS __constant_cpu_to_le32(0x0000012A) +#define STATUS_FILE_LOCKED_WITH_WRITERS __constant_cpu_to_le32(0x0000012B) +#define STATUS_RESOURCEMANAGER_READ_ONLY __constant_cpu_to_le32(0x00000202) +#define STATUS_WAIT_FOR_OPLOCK __constant_cpu_to_le32(0x00000367) +#define DBG_EXCEPTION_HANDLED __constant_cpu_to_le32(0x00010001) +#define DBG_CONTINUE __constant_cpu_to_le32(0x00010002) +#define STATUS_FLT_IO_COMPLETE __constant_cpu_to_le32(0x001C0001) +#define STATUS_OBJECT_NAME_EXISTS __constant_cpu_to_le32(0x40000000) +#define STATUS_THREAD_WAS_SUSPENDED __constant_cpu_to_le32(0x40000001) +#define STATUS_WORKING_SET_LIMIT_RANGE __constant_cpu_to_le32(0x40000002) +#define STATUS_IMAGE_NOT_AT_BASE __constant_cpu_to_le32(0x40000003) +#define STATUS_RXACT_STATE_CREATED __constant_cpu_to_le32(0x40000004) +#define STATUS_SEGMENT_NOTIFICATION __constant_cpu_to_le32(0x40000005) +#define STATUS_LOCAL_USER_SESSION_KEY __constant_cpu_to_le32(0x40000006) +#define STATUS_BAD_CURRENT_DIRECTORY __constant_cpu_to_le32(0x40000007) +#define STATUS_SERIAL_MORE_WRITES __constant_cpu_to_le32(0x40000008) +#define STATUS_REGISTRY_RECOVERED __constant_cpu_to_le32(0x40000009) +#define STATUS_FT_READ_RECOVERY_FROM_BACKUP __constant_cpu_to_le32(0x4000000A) +#define STATUS_FT_WRITE_RECOVERY __constant_cpu_to_le32(0x4000000B) +#define STATUS_SERIAL_COUNTER_TIMEOUT __constant_cpu_to_le32(0x4000000C) +#define STATUS_NULL_LM_PASSWORD __constant_cpu_to_le32(0x4000000D) +#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH __constant_cpu_to_le32(0x4000000E) +#define STATUS_RECEIVE_PARTIAL __constant_cpu_to_le32(0x4000000F) +#define STATUS_RECEIVE_EXPEDITED __constant_cpu_to_le32(0x40000010) +#define STATUS_RECEIVE_PARTIAL_EXPEDITED __constant_cpu_to_le32(0x40000011) +#define STATUS_EVENT_DONE __constant_cpu_to_le32(0x40000012) +#define STATUS_EVENT_PENDING __constant_cpu_to_le32(0x40000013) +#define STATUS_CHECKING_FILE_SYSTEM __constant_cpu_to_le32(0x40000014) +#define STATUS_FATAL_APP_EXIT __constant_cpu_to_le32(0x40000015) +#define STATUS_PREDEFINED_HANDLE __constant_cpu_to_le32(0x40000016) +#define STATUS_WAS_UNLOCKED __constant_cpu_to_le32(0x40000017) +#define STATUS_SERVICE_NOTIFICATION __constant_cpu_to_le32(0x40000018) +#define STATUS_WAS_LOCKED __constant_cpu_to_le32(0x40000019) +#define STATUS_LOG_HARD_ERROR __constant_cpu_to_le32(0x4000001A) +#define STATUS_ALREADY_WIN32 __constant_cpu_to_le32(0x4000001B) +#define STATUS_WX86_UNSIMULATE __constant_cpu_to_le32(0x4000001C) +#define STATUS_WX86_CONTINUE __constant_cpu_to_le32(0x4000001D) +#define STATUS_WX86_SINGLE_STEP __constant_cpu_to_le32(0x4000001E) +#define STATUS_WX86_BREAKPOINT __constant_cpu_to_le32(0x4000001F) +#define STATUS_WX86_EXCEPTION_CONTINUE __constant_cpu_to_le32(0x40000020) +#define STATUS_WX86_EXCEPTION_LASTCHANCE __constant_cpu_to_le32(0x40000021) +#define STATUS_WX86_EXCEPTION_CHAIN __constant_cpu_to_le32(0x40000022) +#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE __constant_cpu_to_le32(0x40000023) +#define STATUS_NO_YIELD_PERFORMED __constant_cpu_to_le32(0x40000024) +#define STATUS_TIMER_RESUME_IGNORED __constant_cpu_to_le32(0x40000025) +#define STATUS_ARBITRATION_UNHANDLED __constant_cpu_to_le32(0x40000026) +#define STATUS_CARDBUS_NOT_SUPPORTED __constant_cpu_to_le32(0x40000027) +#define STATUS_WX86_CREATEWX86TIB __constant_cpu_to_le32(0x40000028) +#define STATUS_MP_PROCESSOR_MISMATCH __constant_cpu_to_le32(0x40000029) +#define STATUS_HIBERNATED __constant_cpu_to_le32(0x4000002A) +#define STATUS_RESUME_HIBERNATION __constant_cpu_to_le32(0x4000002B) +#define STATUS_FIRMWARE_UPDATED __constant_cpu_to_le32(0x4000002C) +#define STATUS_DRIVERS_LEAKING_LOCKED_PAGES __constant_cpu_to_le32(0x4000002D) +#define STATUS_MESSAGE_RETRIEVED __constant_cpu_to_le32(0x4000002E) +#define STATUS_SYSTEM_POWERSTATE_TRANSITION __constant_cpu_to_le32(0x4000002F) +#define STATUS_ALPC_CHECK_COMPLETION_LIST __constant_cpu_to_le32(0x40000030) +#define STATUS_SYSTEM_POWERSTATE_COMPLEX_TRANSITION __constant_cpu_to_le32(0x40000031) +#define STATUS_ACCESS_AUDIT_BY_POLICY __constant_cpu_to_le32(0x40000032) +#define STATUS_ABANDON_HIBERFILE __constant_cpu_to_le32(0x40000033) +#define STATUS_BIZRULES_NOT_ENABLED __constant_cpu_to_le32(0x40000034) +#define STATUS_WAKE_SYSTEM __constant_cpu_to_le32(0x40000294) +#define STATUS_DS_SHUTTING_DOWN __constant_cpu_to_le32(0x40000370) +#define DBG_REPLY_LATER __constant_cpu_to_le32(0x40010001) +#define DBG_UNABLE_TO_PROVIDE_HANDLE __constant_cpu_to_le32(0x40010002) +#define DBG_TERMINATE_THREAD __constant_cpu_to_le32(0x40010003) +#define DBG_TERMINATE_PROCESS __constant_cpu_to_le32(0x40010004) +#define DBG_CONTROL_C __constant_cpu_to_le32(0x40010005) +#define DBG_PRINTEXCEPTION_C __constant_cpu_to_le32(0x40010006) +#define DBG_RIPEXCEPTION __constant_cpu_to_le32(0x40010007) +#define DBG_CONTROL_BREAK __constant_cpu_to_le32(0x40010008) +#define DBG_COMMAND_EXCEPTION __constant_cpu_to_le32(0x40010009) +#define RPC_NT_UUID_LOCAL_ONLY __constant_cpu_to_le32(0x40020056) +#define RPC_NT_SEND_INCOMPLETE __constant_cpu_to_le32(0x400200AF) +#define STATUS_CTX_CDM_CONNECT __constant_cpu_to_le32(0x400A0004) +#define STATUS_CTX_CDM_DISCONNECT __constant_cpu_to_le32(0x400A0005) +#define STATUS_SXS_RELEASE_ACTIVATION_CONTEXT __constant_cpu_to_le32(0x4015000D) +#define STATUS_RECOVERY_NOT_NEEDED __constant_cpu_to_le32(0x40190034) +#define STATUS_RM_ALREADY_STARTED __constant_cpu_to_le32(0x40190035) +#define STATUS_LOG_NO_RESTART __constant_cpu_to_le32(0x401A000C) +#define STATUS_VIDEO_DRIVER_DEBUG_REPORT_REQUEST __constant_cpu_to_le32(0x401B00EC) +#define STATUS_GRAPHICS_PARTIAL_DATA_POPULATED __constant_cpu_to_le32(0x401E000A) +#define STATUS_GRAPHICS_DRIVER_MISMATCH __constant_cpu_to_le32(0x401E0117) +#define STATUS_GRAPHICS_MODE_NOT_PINNED __constant_cpu_to_le32(0x401E0307) +#define STATUS_GRAPHICS_NO_PREFERRED_MODE __constant_cpu_to_le32(0x401E031E) +#define STATUS_GRAPHICS_DATASET_IS_EMPTY __constant_cpu_to_le32(0x401E034B) +#define STATUS_GRAPHICS_NO_MORE_ELEMENTS_IN_DATASET __constant_cpu_to_le32(0x401E034C) +#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_PINNED __constant_cpu_to_le32(0x401E0351) +#define STATUS_GRAPHICS_UNKNOWN_CHILD_STATUS __constant_cpu_to_le32(0x401E042F) +#define STATUS_GRAPHICS_LEADLINK_START_DEFERRED __constant_cpu_to_le32(0x401E0437) +#define STATUS_GRAPHICS_POLLING_TOO_FREQUENTLY __constant_cpu_to_le32(0x401E0439) +#define STATUS_GRAPHICS_START_DEFERRED __constant_cpu_to_le32(0x401E043A) +#define STATUS_NDIS_INDICATION_REQUIRED __constant_cpu_to_le32(0x40230001) +#define STATUS_GUARD_PAGE_VIOLATION __constant_cpu_to_le32(0x80000001) +#define STATUS_DATATYPE_MISALIGNMENT __constant_cpu_to_le32(0x80000002) +#define STATUS_BREAKPOINT __constant_cpu_to_le32(0x80000003) +#define STATUS_SINGLE_STEP __constant_cpu_to_le32(0x80000004) +#define STATUS_BUFFER_OVERFLOW __constant_cpu_to_le32(0x80000005) +#define STATUS_NO_MORE_FILES __constant_cpu_to_le32(0x80000006) +#define STATUS_WAKE_SYSTEM_DEBUGGER __constant_cpu_to_le32(0x80000007) +#define STATUS_HANDLES_CLOSED __constant_cpu_to_le32(0x8000000A) +#define STATUS_NO_INHERITANCE __constant_cpu_to_le32(0x8000000B) +#define STATUS_GUID_SUBSTITUTION_MADE __constant_cpu_to_le32(0x8000000C) +#define STATUS_PARTIAL_COPY __constant_cpu_to_le32(0x8000000D) +#define STATUS_DEVICE_PAPER_EMPTY __constant_cpu_to_le32(0x8000000E) +#define STATUS_DEVICE_POWERED_OFF __constant_cpu_to_le32(0x8000000F) +#define STATUS_DEVICE_OFF_LINE __constant_cpu_to_le32(0x80000010) +#define STATUS_DEVICE_BUSY __constant_cpu_to_le32(0x80000011) +#define STATUS_NO_MORE_EAS __constant_cpu_to_le32(0x80000012) +#define STATUS_INVALID_EA_NAME __constant_cpu_to_le32(0x80000013) +#define STATUS_EA_LIST_INCONSISTENT __constant_cpu_to_le32(0x80000014) +#define STATUS_INVALID_EA_FLAG __constant_cpu_to_le32(0x80000015) +#define STATUS_VERIFY_REQUIRED __constant_cpu_to_le32(0x80000016) +#define STATUS_EXTRANEOUS_INFORMATION __constant_cpu_to_le32(0x80000017) +#define STATUS_RXACT_COMMIT_NECESSARY __constant_cpu_to_le32(0x80000018) +#define STATUS_NO_MORE_ENTRIES __constant_cpu_to_le32(0x8000001A) +#define STATUS_FILEMARK_DETECTED __constant_cpu_to_le32(0x8000001B) +#define STATUS_MEDIA_CHANGED __constant_cpu_to_le32(0x8000001C) +#define STATUS_BUS_RESET __constant_cpu_to_le32(0x8000001D) +#define STATUS_END_OF_MEDIA __constant_cpu_to_le32(0x8000001E) +#define STATUS_BEGINNING_OF_MEDIA __constant_cpu_to_le32(0x8000001F) +#define STATUS_MEDIA_CHECK __constant_cpu_to_le32(0x80000020) +#define STATUS_SETMARK_DETECTED __constant_cpu_to_le32(0x80000021) +#define STATUS_NO_DATA_DETECTED __constant_cpu_to_le32(0x80000022) +#define STATUS_REDIRECTOR_HAS_OPEN_HANDLES __constant_cpu_to_le32(0x80000023) +#define STATUS_SERVER_HAS_OPEN_HANDLES __constant_cpu_to_le32(0x80000024) +#define STATUS_ALREADY_DISCONNECTED __constant_cpu_to_le32(0x80000025) +#define STATUS_LONGJUMP __constant_cpu_to_le32(0x80000026) +#define STATUS_CLEANER_CARTRIDGE_INSTALLED __constant_cpu_to_le32(0x80000027) +#define STATUS_PLUGPLAY_QUERY_VETOED __constant_cpu_to_le32(0x80000028) +#define STATUS_UNWIND_CONSOLIDATE __constant_cpu_to_le32(0x80000029) +#define STATUS_REGISTRY_HIVE_RECOVERED __constant_cpu_to_le32(0x8000002A) +#define STATUS_DLL_MIGHT_BE_INSECURE __constant_cpu_to_le32(0x8000002B) +#define STATUS_DLL_MIGHT_BE_INCOMPATIBLE __constant_cpu_to_le32(0x8000002C) +#define STATUS_STOPPED_ON_SYMLINK __constant_cpu_to_le32(0x8000002D) +#define STATUS_DEVICE_REQUIRES_CLEANING __constant_cpu_to_le32(0x80000288) +#define STATUS_DEVICE_DOOR_OPEN __constant_cpu_to_le32(0x80000289) +#define STATUS_DATA_LOST_REPAIR __constant_cpu_to_le32(0x80000803) +#define DBG_EXCEPTION_NOT_HANDLED __constant_cpu_to_le32(0x80010001) +#define STATUS_CLUSTER_NODE_ALREADY_UP __constant_cpu_to_le32(0x80130001) +#define STATUS_CLUSTER_NODE_ALREADY_DOWN __constant_cpu_to_le32(0x80130002) +#define STATUS_CLUSTER_NETWORK_ALREADY_ONLINE __constant_cpu_to_le32(0x80130003) +#define STATUS_CLUSTER_NETWORK_ALREADY_OFFLINE __constant_cpu_to_le32(0x80130004) +#define STATUS_CLUSTER_NODE_ALREADY_MEMBER __constant_cpu_to_le32(0x80130005) +#define STATUS_COULD_NOT_RESIZE_LOG __constant_cpu_to_le32(0x80190009) +#define STATUS_NO_TXF_METADATA __constant_cpu_to_le32(0x80190029) +#define STATUS_CANT_RECOVER_WITH_HANDLE_OPEN __constant_cpu_to_le32(0x80190031) +#define STATUS_TXF_METADATA_ALREADY_PRESENT __constant_cpu_to_le32(0x80190041) +#define STATUS_TRANSACTION_SCOPE_CALLBACKS_NOT_SET __constant_cpu_to_le32(0x80190042) +#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD_RECOVERED __constant_cpu_to_le32(0x801B00EB) +#define STATUS_FLT_BUFFER_TOO_SMALL __constant_cpu_to_le32(0x801C0001) +#define STATUS_FVE_PARTIAL_METADATA __constant_cpu_to_le32(0x80210001) +#define STATUS_UNSUCCESSFUL __constant_cpu_to_le32(0xC0000001) +#define STATUS_NOT_IMPLEMENTED __constant_cpu_to_le32(0xC0000002) +#define STATUS_INVALID_INFO_CLASS __constant_cpu_to_le32(0xC0000003) +#define STATUS_INFO_LENGTH_MISMATCH __constant_cpu_to_le32(0xC0000004) +#define STATUS_ACCESS_VIOLATION __constant_cpu_to_le32(0xC0000005) +#define STATUS_IN_PAGE_ERROR __constant_cpu_to_le32(0xC0000006) +#define STATUS_PAGEFILE_QUOTA __constant_cpu_to_le32(0xC0000007) +#define STATUS_INVALID_HANDLE __constant_cpu_to_le32(0xC0000008) +#define STATUS_BAD_INITIAL_STACK __constant_cpu_to_le32(0xC0000009) +#define STATUS_BAD_INITIAL_PC __constant_cpu_to_le32(0xC000000A) +#define STATUS_INVALID_CID __constant_cpu_to_le32(0xC000000B) +#define STATUS_TIMER_NOT_CANCELED __constant_cpu_to_le32(0xC000000C) +#define STATUS_INVALID_PARAMETER __constant_cpu_to_le32(0xC000000D) +#define STATUS_NO_SUCH_DEVICE __constant_cpu_to_le32(0xC000000E) +#define STATUS_NO_SUCH_FILE __constant_cpu_to_le32(0xC000000F) +#define STATUS_INVALID_DEVICE_REQUEST __constant_cpu_to_le32(0xC0000010) +#define STATUS_END_OF_FILE __constant_cpu_to_le32(0xC0000011) +#define STATUS_WRONG_VOLUME __constant_cpu_to_le32(0xC0000012) +#define STATUS_NO_MEDIA_IN_DEVICE __constant_cpu_to_le32(0xC0000013) +#define STATUS_UNRECOGNIZED_MEDIA __constant_cpu_to_le32(0xC0000014) +#define STATUS_NONEXISTENT_SECTOR __constant_cpu_to_le32(0xC0000015) +#define STATUS_MORE_PROCESSING_REQUIRED __constant_cpu_to_le32(0xC0000016) +#define STATUS_NO_MEMORY __constant_cpu_to_le32(0xC0000017) +#define STATUS_CONFLICTING_ADDRESSES __constant_cpu_to_le32(0xC0000018) +#define STATUS_NOT_MAPPED_VIEW __constant_cpu_to_le32(0xC0000019) +#define STATUS_UNABLE_TO_FREE_VM __constant_cpu_to_le32(0xC000001A) +#define STATUS_UNABLE_TO_DELETE_SECTION __constant_cpu_to_le32(0xC000001B) +#define STATUS_INVALID_SYSTEM_SERVICE __constant_cpu_to_le32(0xC000001C) +#define STATUS_ILLEGAL_INSTRUCTION __constant_cpu_to_le32(0xC000001D) +#define STATUS_INVALID_LOCK_SEQUENCE __constant_cpu_to_le32(0xC000001E) +#define STATUS_INVALID_VIEW_SIZE __constant_cpu_to_le32(0xC000001F) +#define STATUS_INVALID_FILE_FOR_SECTION __constant_cpu_to_le32(0xC0000020) +#define STATUS_ALREADY_COMMITTED __constant_cpu_to_le32(0xC0000021) +#define STATUS_ACCESS_DENIED __constant_cpu_to_le32(0xC0000022) +#define STATUS_BUFFER_TOO_SMALL __constant_cpu_to_le32(0xC0000023) +#define STATUS_OBJECT_TYPE_MISMATCH __constant_cpu_to_le32(0xC0000024) +#define STATUS_NONCONTINUABLE_EXCEPTION __constant_cpu_to_le32(0xC0000025) +#define STATUS_INVALID_DISPOSITION __constant_cpu_to_le32(0xC0000026) +#define STATUS_UNWIND __constant_cpu_to_le32(0xC0000027) +#define STATUS_BAD_STACK __constant_cpu_to_le32(0xC0000028) +#define STATUS_INVALID_UNWIND_TARGET __constant_cpu_to_le32(0xC0000029) +#define STATUS_NOT_LOCKED __constant_cpu_to_le32(0xC000002A) +#define STATUS_PARITY_ERROR __constant_cpu_to_le32(0xC000002B) +#define STATUS_UNABLE_TO_DECOMMIT_VM __constant_cpu_to_le32(0xC000002C) +#define STATUS_NOT_COMMITTED __constant_cpu_to_le32(0xC000002D) +#define STATUS_INVALID_PORT_ATTRIBUTES __constant_cpu_to_le32(0xC000002E) +#define STATUS_PORT_MESSAGE_TOO_LONG __constant_cpu_to_le32(0xC000002F) +#define STATUS_INVALID_PARAMETER_MIX __constant_cpu_to_le32(0xC0000030) +#define STATUS_INVALID_QUOTA_LOWER __constant_cpu_to_le32(0xC0000031) +#define STATUS_DISK_CORRUPT_ERROR __constant_cpu_to_le32(0xC0000032) +#define STATUS_OBJECT_NAME_INVALID __constant_cpu_to_le32(0xC0000033) +#define STATUS_OBJECT_NAME_NOT_FOUND __constant_cpu_to_le32(0xC0000034) +#define STATUS_OBJECT_NAME_COLLISION __constant_cpu_to_le32(0xC0000035) +#define STATUS_PORT_DISCONNECTED __constant_cpu_to_le32(0xC0000037) +#define STATUS_DEVICE_ALREADY_ATTACHED __constant_cpu_to_le32(0xC0000038) +#define STATUS_OBJECT_PATH_INVALID __constant_cpu_to_le32(0xC0000039) +#define STATUS_OBJECT_PATH_NOT_FOUND __constant_cpu_to_le32(0xC000003A) +#define STATUS_OBJECT_PATH_SYNTAX_BAD __constant_cpu_to_le32(0xC000003B) +#define STATUS_DATA_OVERRUN __constant_cpu_to_le32(0xC000003C) +#define STATUS_DATA_LATE_ERROR __constant_cpu_to_le32(0xC000003D) +#define STATUS_DATA_ERROR __constant_cpu_to_le32(0xC000003E) +#define STATUS_CRC_ERROR __constant_cpu_to_le32(0xC000003F) +#define STATUS_SECTION_TOO_BIG __constant_cpu_to_le32(0xC0000040) +#define STATUS_PORT_CONNECTION_REFUSED __constant_cpu_to_le32(0xC0000041) +#define STATUS_INVALID_PORT_HANDLE __constant_cpu_to_le32(0xC0000042) +#define STATUS_SHARING_VIOLATION __constant_cpu_to_le32(0xC0000043) +#define STATUS_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000044) +#define STATUS_INVALID_PAGE_PROTECTION __constant_cpu_to_le32(0xC0000045) +#define STATUS_MUTANT_NOT_OWNED __constant_cpu_to_le32(0xC0000046) +#define STATUS_SEMAPHORE_LIMIT_EXCEEDED __constant_cpu_to_le32(0xC0000047) +#define STATUS_PORT_ALREADY_SET __constant_cpu_to_le32(0xC0000048) +#define STATUS_SECTION_NOT_IMAGE __constant_cpu_to_le32(0xC0000049) +#define STATUS_SUSPEND_COUNT_EXCEEDED __constant_cpu_to_le32(0xC000004A) +#define STATUS_THREAD_IS_TERMINATING __constant_cpu_to_le32(0xC000004B) +#define STATUS_BAD_WORKING_SET_LIMIT __constant_cpu_to_le32(0xC000004C) +#define STATUS_INCOMPATIBLE_FILE_MAP __constant_cpu_to_le32(0xC000004D) +#define STATUS_SECTION_PROTECTION __constant_cpu_to_le32(0xC000004E) +#define STATUS_EAS_NOT_SUPPORTED __constant_cpu_to_le32(0xC000004F) +#define STATUS_EA_TOO_LARGE __constant_cpu_to_le32(0xC0000050) +#define STATUS_NONEXISTENT_EA_ENTRY __constant_cpu_to_le32(0xC0000051) +#define STATUS_NO_EAS_ON_FILE __constant_cpu_to_le32(0xC0000052) +#define STATUS_EA_CORRUPT_ERROR __constant_cpu_to_le32(0xC0000053) +#define STATUS_FILE_LOCK_CONFLICT __constant_cpu_to_le32(0xC0000054) +#define STATUS_LOCK_NOT_GRANTED __constant_cpu_to_le32(0xC0000055) +#define STATUS_DELETE_PENDING __constant_cpu_to_le32(0xC0000056) +#define STATUS_CTL_FILE_NOT_SUPPORTED __constant_cpu_to_le32(0xC0000057) +#define STATUS_UNKNOWN_REVISION __constant_cpu_to_le32(0xC0000058) +#define STATUS_REVISION_MISMATCH __constant_cpu_to_le32(0xC0000059) +#define STATUS_INVALID_OWNER __constant_cpu_to_le32(0xC000005A) +#define STATUS_INVALID_PRIMARY_GROUP __constant_cpu_to_le32(0xC000005B) +#define STATUS_NO_IMPERSONATION_TOKEN __constant_cpu_to_le32(0xC000005C) +#define STATUS_CANT_DISABLE_MANDATORY __constant_cpu_to_le32(0xC000005D) +#define STATUS_NO_LOGON_SERVERS __constant_cpu_to_le32(0xC000005E) +#define STATUS_NO_SUCH_LOGON_SESSION __constant_cpu_to_le32(0xC000005F) +#define STATUS_NO_SUCH_PRIVILEGE __constant_cpu_to_le32(0xC0000060) +#define STATUS_PRIVILEGE_NOT_HELD __constant_cpu_to_le32(0xC0000061) +#define STATUS_INVALID_ACCOUNT_NAME __constant_cpu_to_le32(0xC0000062) +#define STATUS_USER_EXISTS __constant_cpu_to_le32(0xC0000063) +#define STATUS_NO_SUCH_USER __constant_cpu_to_le32(0xC0000064) +#define STATUS_GROUP_EXISTS __constant_cpu_to_le32(0xC0000065) +#define STATUS_NO_SUCH_GROUP __constant_cpu_to_le32(0xC0000066) +#define STATUS_MEMBER_IN_GROUP __constant_cpu_to_le32(0xC0000067) +#define STATUS_MEMBER_NOT_IN_GROUP __constant_cpu_to_le32(0xC0000068) +#define STATUS_LAST_ADMIN __constant_cpu_to_le32(0xC0000069) +#define STATUS_WRONG_PASSWORD __constant_cpu_to_le32(0xC000006A) +#define STATUS_ILL_FORMED_PASSWORD __constant_cpu_to_le32(0xC000006B) +#define STATUS_PASSWORD_RESTRICTION __constant_cpu_to_le32(0xC000006C) +#define STATUS_LOGON_FAILURE __constant_cpu_to_le32(0xC000006D) +#define STATUS_ACCOUNT_RESTRICTION __constant_cpu_to_le32(0xC000006E) +#define STATUS_INVALID_LOGON_HOURS __constant_cpu_to_le32(0xC000006F) +#define STATUS_INVALID_WORKSTATION __constant_cpu_to_le32(0xC0000070) +#define STATUS_PASSWORD_EXPIRED __constant_cpu_to_le32(0xC0000071) +#define STATUS_ACCOUNT_DISABLED __constant_cpu_to_le32(0xC0000072) +#define STATUS_NONE_MAPPED __constant_cpu_to_le32(0xC0000073) +#define STATUS_TOO_MANY_LUIDS_REQUESTED __constant_cpu_to_le32(0xC0000074) +#define STATUS_LUIDS_EXHAUSTED __constant_cpu_to_le32(0xC0000075) +#define STATUS_INVALID_SUB_AUTHORITY __constant_cpu_to_le32(0xC0000076) +#define STATUS_INVALID_ACL __constant_cpu_to_le32(0xC0000077) +#define STATUS_INVALID_SID __constant_cpu_to_le32(0xC0000078) +#define STATUS_INVALID_SECURITY_DESCR __constant_cpu_to_le32(0xC0000079) +#define STATUS_PROCEDURE_NOT_FOUND __constant_cpu_to_le32(0xC000007A) +#define STATUS_INVALID_IMAGE_FORMAT __constant_cpu_to_le32(0xC000007B) +#define STATUS_NO_TOKEN __constant_cpu_to_le32(0xC000007C) +#define STATUS_BAD_INHERITANCE_ACL __constant_cpu_to_le32(0xC000007D) +#define STATUS_RANGE_NOT_LOCKED __constant_cpu_to_le32(0xC000007E) +#define STATUS_DISK_FULL __constant_cpu_to_le32(0xC000007F) +#define STATUS_SERVER_DISABLED __constant_cpu_to_le32(0xC0000080) +#define STATUS_SERVER_NOT_DISABLED __constant_cpu_to_le32(0xC0000081) +#define STATUS_TOO_MANY_GUIDS_REQUESTED __constant_cpu_to_le32(0xC0000082) +#define STATUS_GUIDS_EXHAUSTED __constant_cpu_to_le32(0xC0000083) +#define STATUS_INVALID_ID_AUTHORITY __constant_cpu_to_le32(0xC0000084) +#define STATUS_AGENTS_EXHAUSTED __constant_cpu_to_le32(0xC0000085) +#define STATUS_INVALID_VOLUME_LABEL __constant_cpu_to_le32(0xC0000086) +#define STATUS_SECTION_NOT_EXTENDED __constant_cpu_to_le32(0xC0000087) +#define STATUS_NOT_MAPPED_DATA __constant_cpu_to_le32(0xC0000088) +#define STATUS_RESOURCE_DATA_NOT_FOUND __constant_cpu_to_le32(0xC0000089) +#define STATUS_RESOURCE_TYPE_NOT_FOUND __constant_cpu_to_le32(0xC000008A) +#define STATUS_RESOURCE_NAME_NOT_FOUND __constant_cpu_to_le32(0xC000008B) +#define STATUS_ARRAY_BOUNDS_EXCEEDED __constant_cpu_to_le32(0xC000008C) +#define STATUS_FLOAT_DENORMAL_OPERAND __constant_cpu_to_le32(0xC000008D) +#define STATUS_FLOAT_DIVIDE_BY_ZERO __constant_cpu_to_le32(0xC000008E) +#define STATUS_FLOAT_INEXACT_RESULT __constant_cpu_to_le32(0xC000008F) +#define STATUS_FLOAT_INVALID_OPERATION __constant_cpu_to_le32(0xC0000090) +#define STATUS_FLOAT_OVERFLOW __constant_cpu_to_le32(0xC0000091) +#define STATUS_FLOAT_STACK_CHECK __constant_cpu_to_le32(0xC0000092) +#define STATUS_FLOAT_UNDERFLOW __constant_cpu_to_le32(0xC0000093) +#define STATUS_INTEGER_DIVIDE_BY_ZERO __constant_cpu_to_le32(0xC0000094) +#define STATUS_INTEGER_OVERFLOW __constant_cpu_to_le32(0xC0000095) +#define STATUS_PRIVILEGED_INSTRUCTION __constant_cpu_to_le32(0xC0000096) +#define STATUS_TOO_MANY_PAGING_FILES __constant_cpu_to_le32(0xC0000097) +#define STATUS_FILE_INVALID __constant_cpu_to_le32(0xC0000098) +#define STATUS_ALLOTTED_SPACE_EXCEEDED __constant_cpu_to_le32(0xC0000099) +#define STATUS_INSUFFICIENT_RESOURCES __constant_cpu_to_le32(0xC000009A) +#define STATUS_DFS_EXIT_PATH_FOUND __constant_cpu_to_le32(0xC000009B) +#define STATUS_DEVICE_DATA_ERROR __constant_cpu_to_le32(0xC000009C) +#define STATUS_DEVICE_NOT_CONNECTED __constant_cpu_to_le32(0xC000009D) +#define STATUS_DEVICE_POWER_FAILURE __constant_cpu_to_le32(0xC000009E) +#define STATUS_FREE_VM_NOT_AT_BASE __constant_cpu_to_le32(0xC000009F) +#define STATUS_MEMORY_NOT_ALLOCATED __constant_cpu_to_le32(0xC00000A0) +#define STATUS_WORKING_SET_QUOTA __constant_cpu_to_le32(0xC00000A1) +#define STATUS_MEDIA_WRITE_PROTECTED __constant_cpu_to_le32(0xC00000A2) +#define STATUS_DEVICE_NOT_READY __constant_cpu_to_le32(0xC00000A3) +#define STATUS_INVALID_GROUP_ATTRIBUTES __constant_cpu_to_le32(0xC00000A4) +#define STATUS_BAD_IMPERSONATION_LEVEL __constant_cpu_to_le32(0xC00000A5) +#define STATUS_CANT_OPEN_ANONYMOUS __constant_cpu_to_le32(0xC00000A6) +#define STATUS_BAD_VALIDATION_CLASS __constant_cpu_to_le32(0xC00000A7) +#define STATUS_BAD_TOKEN_TYPE __constant_cpu_to_le32(0xC00000A8) +#define STATUS_BAD_MASTER_BOOT_RECORD __constant_cpu_to_le32(0xC00000A9) +#define STATUS_INSTRUCTION_MISALIGNMENT __constant_cpu_to_le32(0xC00000AA) +#define STATUS_INSTANCE_NOT_AVAILABLE __constant_cpu_to_le32(0xC00000AB) +#define STATUS_PIPE_NOT_AVAILABLE __constant_cpu_to_le32(0xC00000AC) +#define STATUS_INVALID_PIPE_STATE __constant_cpu_to_le32(0xC00000AD) +#define STATUS_PIPE_BUSY __constant_cpu_to_le32(0xC00000AE) +#define STATUS_ILLEGAL_FUNCTION __constant_cpu_to_le32(0xC00000AF) +#define STATUS_PIPE_DISCONNECTED __constant_cpu_to_le32(0xC00000B0) +#define STATUS_PIPE_CLOSING __constant_cpu_to_le32(0xC00000B1) +#define STATUS_PIPE_CONNECTED __constant_cpu_to_le32(0xC00000B2) +#define STATUS_PIPE_LISTENING __constant_cpu_to_le32(0xC00000B3) +#define STATUS_INVALID_READ_MODE __constant_cpu_to_le32(0xC00000B4) +#define STATUS_IO_TIMEOUT __constant_cpu_to_le32(0xC00000B5) +#define STATUS_FILE_FORCED_CLOSED __constant_cpu_to_le32(0xC00000B6) +#define STATUS_PROFILING_NOT_STARTED __constant_cpu_to_le32(0xC00000B7) +#define STATUS_PROFILING_NOT_STOPPED __constant_cpu_to_le32(0xC00000B8) +#define STATUS_COULD_NOT_INTERPRET __constant_cpu_to_le32(0xC00000B9) +#define STATUS_FILE_IS_A_DIRECTORY __constant_cpu_to_le32(0xC00000BA) +#define STATUS_NOT_SUPPORTED __constant_cpu_to_le32(0xC00000BB) +#define STATUS_REMOTE_NOT_LISTENING __constant_cpu_to_le32(0xC00000BC) +#define STATUS_DUPLICATE_NAME __constant_cpu_to_le32(0xC00000BD) +#define STATUS_BAD_NETWORK_PATH __constant_cpu_to_le32(0xC00000BE) +#define STATUS_NETWORK_BUSY __constant_cpu_to_le32(0xC00000BF) +#define STATUS_DEVICE_DOES_NOT_EXIST __constant_cpu_to_le32(0xC00000C0) +#define STATUS_TOO_MANY_COMMANDS __constant_cpu_to_le32(0xC00000C1) +#define STATUS_ADAPTER_HARDWARE_ERROR __constant_cpu_to_le32(0xC00000C2) +#define STATUS_INVALID_NETWORK_RESPONSE __constant_cpu_to_le32(0xC00000C3) +#define STATUS_UNEXPECTED_NETWORK_ERROR __constant_cpu_to_le32(0xC00000C4) +#define STATUS_BAD_REMOTE_ADAPTER __constant_cpu_to_le32(0xC00000C5) +#define STATUS_PRINT_QUEUE_FULL __constant_cpu_to_le32(0xC00000C6) +#define STATUS_NO_SPOOL_SPACE __constant_cpu_to_le32(0xC00000C7) +#define STATUS_PRINT_CANCELLED __constant_cpu_to_le32(0xC00000C8) +#define STATUS_NETWORK_NAME_DELETED __constant_cpu_to_le32(0xC00000C9) +#define STATUS_NETWORK_ACCESS_DENIED __constant_cpu_to_le32(0xC00000CA) +#define STATUS_BAD_DEVICE_TYPE __constant_cpu_to_le32(0xC00000CB) +#define STATUS_BAD_NETWORK_NAME __constant_cpu_to_le32(0xC00000CC) +#define STATUS_TOO_MANY_NAMES __constant_cpu_to_le32(0xC00000CD) +#define STATUS_TOO_MANY_SESSIONS __constant_cpu_to_le32(0xC00000CE) +#define STATUS_SHARING_PAUSED __constant_cpu_to_le32(0xC00000CF) +#define STATUS_REQUEST_NOT_ACCEPTED __constant_cpu_to_le32(0xC00000D0) +#define STATUS_REDIRECTOR_PAUSED __constant_cpu_to_le32(0xC00000D1) +#define STATUS_NET_WRITE_FAULT __constant_cpu_to_le32(0xC00000D2) +#define STATUS_PROFILING_AT_LIMIT __constant_cpu_to_le32(0xC00000D3) +#define STATUS_NOT_SAME_DEVICE __constant_cpu_to_le32(0xC00000D4) +#define STATUS_FILE_RENAMED __constant_cpu_to_le32(0xC00000D5) +#define STATUS_VIRTUAL_CIRCUIT_CLOSED __constant_cpu_to_le32(0xC00000D6) +#define STATUS_NO_SECURITY_ON_OBJECT __constant_cpu_to_le32(0xC00000D7) +#define STATUS_CANT_WAIT __constant_cpu_to_le32(0xC00000D8) +#define STATUS_PIPE_EMPTY __constant_cpu_to_le32(0xC00000D9) +#define STATUS_CANT_ACCESS_DOMAIN_INFO __constant_cpu_to_le32(0xC00000DA) +#define STATUS_CANT_TERMINATE_SELF __constant_cpu_to_le32(0xC00000DB) +#define STATUS_INVALID_SERVER_STATE __constant_cpu_to_le32(0xC00000DC) +#define STATUS_INVALID_DOMAIN_STATE __constant_cpu_to_le32(0xC00000DD) +#define STATUS_INVALID_DOMAIN_ROLE __constant_cpu_to_le32(0xC00000DE) +#define STATUS_NO_SUCH_DOMAIN __constant_cpu_to_le32(0xC00000DF) +#define STATUS_DOMAIN_EXISTS __constant_cpu_to_le32(0xC00000E0) +#define STATUS_DOMAIN_LIMIT_EXCEEDED __constant_cpu_to_le32(0xC00000E1) +#define STATUS_OPLOCK_NOT_GRANTED __constant_cpu_to_le32(0xC00000E2) +#define STATUS_INVALID_OPLOCK_PROTOCOL __constant_cpu_to_le32(0xC00000E3) +#define STATUS_INTERNAL_DB_CORRUPTION __constant_cpu_to_le32(0xC00000E4) +#define STATUS_INTERNAL_ERROR __constant_cpu_to_le32(0xC00000E5) +#define STATUS_GENERIC_NOT_MAPPED __constant_cpu_to_le32(0xC00000E6) +#define STATUS_BAD_DESCRIPTOR_FORMAT __constant_cpu_to_le32(0xC00000E7) +#define STATUS_INVALID_USER_BUFFER __constant_cpu_to_le32(0xC00000E8) +#define STATUS_UNEXPECTED_IO_ERROR __constant_cpu_to_le32(0xC00000E9) +#define STATUS_UNEXPECTED_MM_CREATE_ERR __constant_cpu_to_le32(0xC00000EA) +#define STATUS_UNEXPECTED_MM_MAP_ERROR __constant_cpu_to_le32(0xC00000EB) +#define STATUS_UNEXPECTED_MM_EXTEND_ERR __constant_cpu_to_le32(0xC00000EC) +#define STATUS_NOT_LOGON_PROCESS __constant_cpu_to_le32(0xC00000ED) +#define STATUS_LOGON_SESSION_EXISTS __constant_cpu_to_le32(0xC00000EE) +#define STATUS_INVALID_PARAMETER_1 __constant_cpu_to_le32(0xC00000EF) +#define STATUS_INVALID_PARAMETER_2 __constant_cpu_to_le32(0xC00000F0) +#define STATUS_INVALID_PARAMETER_3 __constant_cpu_to_le32(0xC00000F1) +#define STATUS_INVALID_PARAMETER_4 __constant_cpu_to_le32(0xC00000F2) +#define STATUS_INVALID_PARAMETER_5 __constant_cpu_to_le32(0xC00000F3) +#define STATUS_INVALID_PARAMETER_6 __constant_cpu_to_le32(0xC00000F4) +#define STATUS_INVALID_PARAMETER_7 __constant_cpu_to_le32(0xC00000F5) +#define STATUS_INVALID_PARAMETER_8 __constant_cpu_to_le32(0xC00000F6) +#define STATUS_INVALID_PARAMETER_9 __constant_cpu_to_le32(0xC00000F7) +#define STATUS_INVALID_PARAMETER_10 __constant_cpu_to_le32(0xC00000F8) +#define STATUS_INVALID_PARAMETER_11 __constant_cpu_to_le32(0xC00000F9) +#define STATUS_INVALID_PARAMETER_12 __constant_cpu_to_le32(0xC00000FA) +#define STATUS_REDIRECTOR_NOT_STARTED __constant_cpu_to_le32(0xC00000FB) +#define STATUS_REDIRECTOR_STARTED __constant_cpu_to_le32(0xC00000FC) +#define STATUS_STACK_OVERFLOW __constant_cpu_to_le32(0xC00000FD) +#define STATUS_NO_SUCH_PACKAGE __constant_cpu_to_le32(0xC00000FE) +#define STATUS_BAD_FUNCTION_TABLE __constant_cpu_to_le32(0xC00000FF) +#define STATUS_VARIABLE_NOT_FOUND __constant_cpu_to_le32(0xC0000100) +#define STATUS_DIRECTORY_NOT_EMPTY __constant_cpu_to_le32(0xC0000101) +#define STATUS_FILE_CORRUPT_ERROR __constant_cpu_to_le32(0xC0000102) +#define STATUS_NOT_A_DIRECTORY __constant_cpu_to_le32(0xC0000103) +#define STATUS_BAD_LOGON_SESSION_STATE __constant_cpu_to_le32(0xC0000104) +#define STATUS_LOGON_SESSION_COLLISION __constant_cpu_to_le32(0xC0000105) +#define STATUS_NAME_TOO_LONG __constant_cpu_to_le32(0xC0000106) +#define STATUS_FILES_OPEN __constant_cpu_to_le32(0xC0000107) +#define STATUS_CONNECTION_IN_USE __constant_cpu_to_le32(0xC0000108) +#define STATUS_MESSAGE_NOT_FOUND __constant_cpu_to_le32(0xC0000109) +#define STATUS_PROCESS_IS_TERMINATING __constant_cpu_to_le32(0xC000010A) +#define STATUS_INVALID_LOGON_TYPE __constant_cpu_to_le32(0xC000010B) +#define STATUS_NO_GUID_TRANSLATION __constant_cpu_to_le32(0xC000010C) +#define STATUS_CANNOT_IMPERSONATE __constant_cpu_to_le32(0xC000010D) +#define STATUS_IMAGE_ALREADY_LOADED __constant_cpu_to_le32(0xC000010E) +#define STATUS_ABIOS_NOT_PRESENT __constant_cpu_to_le32(0xC000010F) +#define STATUS_ABIOS_LID_NOT_EXIST __constant_cpu_to_le32(0xC0000110) +#define STATUS_ABIOS_LID_ALREADY_OWNED __constant_cpu_to_le32(0xC0000111) +#define STATUS_ABIOS_NOT_LID_OWNER __constant_cpu_to_le32(0xC0000112) +#define STATUS_ABIOS_INVALID_COMMAND __constant_cpu_to_le32(0xC0000113) +#define STATUS_ABIOS_INVALID_LID __constant_cpu_to_le32(0xC0000114) +#define STATUS_ABIOS_SELECTOR_NOT_AVAILABLE __constant_cpu_to_le32(0xC0000115) +#define STATUS_ABIOS_INVALID_SELECTOR __constant_cpu_to_le32(0xC0000116) +#define STATUS_NO_LDT __constant_cpu_to_le32(0xC0000117) +#define STATUS_INVALID_LDT_SIZE __constant_cpu_to_le32(0xC0000118) +#define STATUS_INVALID_LDT_OFFSET __constant_cpu_to_le32(0xC0000119) +#define STATUS_INVALID_LDT_DESCRIPTOR __constant_cpu_to_le32(0xC000011A) +#define STATUS_INVALID_IMAGE_NE_FORMAT __constant_cpu_to_le32(0xC000011B) +#define STATUS_RXACT_INVALID_STATE __constant_cpu_to_le32(0xC000011C) +#define STATUS_RXACT_COMMIT_FAILURE __constant_cpu_to_le32(0xC000011D) +#define STATUS_MAPPED_FILE_SIZE_ZERO __constant_cpu_to_le32(0xC000011E) +#define STATUS_TOO_MANY_OPENED_FILES __constant_cpu_to_le32(0xC000011F) +#define STATUS_CANCELLED __constant_cpu_to_le32(0xC0000120) +#define STATUS_CANNOT_DELETE __constant_cpu_to_le32(0xC0000121) +#define STATUS_INVALID_COMPUTER_NAME __constant_cpu_to_le32(0xC0000122) +#define STATUS_FILE_DELETED __constant_cpu_to_le32(0xC0000123) +#define STATUS_SPECIAL_ACCOUNT __constant_cpu_to_le32(0xC0000124) +#define STATUS_SPECIAL_GROUP __constant_cpu_to_le32(0xC0000125) +#define STATUS_SPECIAL_USER __constant_cpu_to_le32(0xC0000126) +#define STATUS_MEMBERS_PRIMARY_GROUP __constant_cpu_to_le32(0xC0000127) +#define STATUS_FILE_CLOSED __constant_cpu_to_le32(0xC0000128) +#define STATUS_TOO_MANY_THREADS __constant_cpu_to_le32(0xC0000129) +#define STATUS_THREAD_NOT_IN_PROCESS __constant_cpu_to_le32(0xC000012A) +#define STATUS_TOKEN_ALREADY_IN_USE __constant_cpu_to_le32(0xC000012B) +#define STATUS_PAGEFILE_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC000012C) +#define STATUS_COMMITMENT_LIMIT __constant_cpu_to_le32(0xC000012D) +#define STATUS_INVALID_IMAGE_LE_FORMAT __constant_cpu_to_le32(0xC000012E) +#define STATUS_INVALID_IMAGE_NOT_MZ __constant_cpu_to_le32(0xC000012F) +#define STATUS_INVALID_IMAGE_PROTECT __constant_cpu_to_le32(0xC0000130) +#define STATUS_INVALID_IMAGE_WIN_16 __constant_cpu_to_le32(0xC0000131) +#define STATUS_LOGON_SERVER_CONFLICT __constant_cpu_to_le32(0xC0000132) +#define STATUS_TIME_DIFFERENCE_AT_DC __constant_cpu_to_le32(0xC0000133) +#define STATUS_SYNCHRONIZATION_REQUIRED __constant_cpu_to_le32(0xC0000134) +#define STATUS_DLL_NOT_FOUND __constant_cpu_to_le32(0xC0000135) +#define STATUS_OPEN_FAILED __constant_cpu_to_le32(0xC0000136) +#define STATUS_IO_PRIVILEGE_FAILED __constant_cpu_to_le32(0xC0000137) +#define STATUS_ORDINAL_NOT_FOUND __constant_cpu_to_le32(0xC0000138) +#define STATUS_ENTRYPOINT_NOT_FOUND __constant_cpu_to_le32(0xC0000139) +#define STATUS_CONTROL_C_EXIT __constant_cpu_to_le32(0xC000013A) +#define STATUS_LOCAL_DISCONNECT __constant_cpu_to_le32(0xC000013B) +#define STATUS_REMOTE_DISCONNECT __constant_cpu_to_le32(0xC000013C) +#define STATUS_REMOTE_RESOURCES __constant_cpu_to_le32(0xC000013D) +#define STATUS_LINK_FAILED __constant_cpu_to_le32(0xC000013E) +#define STATUS_LINK_TIMEOUT __constant_cpu_to_le32(0xC000013F) +#define STATUS_INVALID_CONNECTION __constant_cpu_to_le32(0xC0000140) +#define STATUS_INVALID_ADDRESS __constant_cpu_to_le32(0xC0000141) +#define STATUS_DLL_INIT_FAILED __constant_cpu_to_le32(0xC0000142) +#define STATUS_MISSING_SYSTEMFILE __constant_cpu_to_le32(0xC0000143) +#define STATUS_UNHANDLED_EXCEPTION __constant_cpu_to_le32(0xC0000144) +#define STATUS_APP_INIT_FAILURE __constant_cpu_to_le32(0xC0000145) +#define STATUS_PAGEFILE_CREATE_FAILED __constant_cpu_to_le32(0xC0000146) +#define STATUS_NO_PAGEFILE __constant_cpu_to_le32(0xC0000147) +#define STATUS_INVALID_LEVEL __constant_cpu_to_le32(0xC0000148) +#define STATUS_WRONG_PASSWORD_CORE __constant_cpu_to_le32(0xC0000149) +#define STATUS_ILLEGAL_FLOAT_CONTEXT __constant_cpu_to_le32(0xC000014A) +#define STATUS_PIPE_BROKEN __constant_cpu_to_le32(0xC000014B) +#define STATUS_REGISTRY_CORRUPT __constant_cpu_to_le32(0xC000014C) +#define STATUS_REGISTRY_IO_FAILED __constant_cpu_to_le32(0xC000014D) +#define STATUS_NO_EVENT_PAIR __constant_cpu_to_le32(0xC000014E) +#define STATUS_UNRECOGNIZED_VOLUME __constant_cpu_to_le32(0xC000014F) +#define STATUS_SERIAL_NO_DEVICE_INITED __constant_cpu_to_le32(0xC0000150) +#define STATUS_NO_SUCH_ALIAS __constant_cpu_to_le32(0xC0000151) +#define STATUS_MEMBER_NOT_IN_ALIAS __constant_cpu_to_le32(0xC0000152) +#define STATUS_MEMBER_IN_ALIAS __constant_cpu_to_le32(0xC0000153) +#define STATUS_ALIAS_EXISTS __constant_cpu_to_le32(0xC0000154) +#define STATUS_LOGON_NOT_GRANTED __constant_cpu_to_le32(0xC0000155) +#define STATUS_TOO_MANY_SECRETS __constant_cpu_to_le32(0xC0000156) +#define STATUS_SECRET_TOO_LONG __constant_cpu_to_le32(0xC0000157) +#define STATUS_INTERNAL_DB_ERROR __constant_cpu_to_le32(0xC0000158) +#define STATUS_FULLSCREEN_MODE __constant_cpu_to_le32(0xC0000159) +#define STATUS_TOO_MANY_CONTEXT_IDS __constant_cpu_to_le32(0xC000015A) +#define STATUS_LOGON_TYPE_NOT_GRANTED __constant_cpu_to_le32(0xC000015B) +#define STATUS_NOT_REGISTRY_FILE __constant_cpu_to_le32(0xC000015C) +#define STATUS_NT_CROSS_ENCRYPTION_REQUIRED __constant_cpu_to_le32(0xC000015D) +#define STATUS_DOMAIN_CTRLR_CONFIG_ERROR __constant_cpu_to_le32(0xC000015E) +#define STATUS_FT_MISSING_MEMBER __constant_cpu_to_le32(0xC000015F) +#define STATUS_ILL_FORMED_SERVICE_ENTRY __constant_cpu_to_le32(0xC0000160) +#define STATUS_ILLEGAL_CHARACTER __constant_cpu_to_le32(0xC0000161) +#define STATUS_UNMAPPABLE_CHARACTER __constant_cpu_to_le32(0xC0000162) +#define STATUS_UNDEFINED_CHARACTER __constant_cpu_to_le32(0xC0000163) +#define STATUS_FLOPPY_VOLUME __constant_cpu_to_le32(0xC0000164) +#define STATUS_FLOPPY_ID_MARK_NOT_FOUND __constant_cpu_to_le32(0xC0000165) +#define STATUS_FLOPPY_WRONG_CYLINDER __constant_cpu_to_le32(0xC0000166) +#define STATUS_FLOPPY_UNKNOWN_ERROR __constant_cpu_to_le32(0xC0000167) +#define STATUS_FLOPPY_BAD_REGISTERS __constant_cpu_to_le32(0xC0000168) +#define STATUS_DISK_RECALIBRATE_FAILED __constant_cpu_to_le32(0xC0000169) +#define STATUS_DISK_OPERATION_FAILED __constant_cpu_to_le32(0xC000016A) +#define STATUS_DISK_RESET_FAILED __constant_cpu_to_le32(0xC000016B) +#define STATUS_SHARED_IRQ_BUSY __constant_cpu_to_le32(0xC000016C) +#define STATUS_FT_ORPHANING __constant_cpu_to_le32(0xC000016D) +#define STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT __constant_cpu_to_le32(0xC000016E) +#define STATUS_PARTITION_FAILURE __constant_cpu_to_le32(0xC0000172) +#define STATUS_INVALID_BLOCK_LENGTH __constant_cpu_to_le32(0xC0000173) +#define STATUS_DEVICE_NOT_PARTITIONED __constant_cpu_to_le32(0xC0000174) +#define STATUS_UNABLE_TO_LOCK_MEDIA __constant_cpu_to_le32(0xC0000175) +#define STATUS_UNABLE_TO_UNLOAD_MEDIA __constant_cpu_to_le32(0xC0000176) +#define STATUS_EOM_OVERFLOW __constant_cpu_to_le32(0xC0000177) +#define STATUS_NO_MEDIA __constant_cpu_to_le32(0xC0000178) +#define STATUS_NO_SUCH_MEMBER __constant_cpu_to_le32(0xC000017A) +#define STATUS_INVALID_MEMBER __constant_cpu_to_le32(0xC000017B) +#define STATUS_KEY_DELETED __constant_cpu_to_le32(0xC000017C) +#define STATUS_NO_LOG_SPACE __constant_cpu_to_le32(0xC000017D) +#define STATUS_TOO_MANY_SIDS __constant_cpu_to_le32(0xC000017E) +#define STATUS_LM_CROSS_ENCRYPTION_REQUIRED __constant_cpu_to_le32(0xC000017F) +#define STATUS_KEY_HAS_CHILDREN __constant_cpu_to_le32(0xC0000180) +#define STATUS_CHILD_MUST_BE_VOLATILE __constant_cpu_to_le32(0xC0000181) +#define STATUS_DEVICE_CONFIGURATION_ERROR __constant_cpu_to_le32(0xC0000182) +#define STATUS_DRIVER_INTERNAL_ERROR __constant_cpu_to_le32(0xC0000183) +#define STATUS_INVALID_DEVICE_STATE __constant_cpu_to_le32(0xC0000184) +#define STATUS_IO_DEVICE_ERROR __constant_cpu_to_le32(0xC0000185) +#define STATUS_DEVICE_PROTOCOL_ERROR __constant_cpu_to_le32(0xC0000186) +#define STATUS_BACKUP_CONTROLLER __constant_cpu_to_le32(0xC0000187) +#define STATUS_LOG_FILE_FULL __constant_cpu_to_le32(0xC0000188) +#define STATUS_TOO_LATE __constant_cpu_to_le32(0xC0000189) +#define STATUS_NO_TRUST_LSA_SECRET __constant_cpu_to_le32(0xC000018A) +#define STATUS_NO_TRUST_SAM_ACCOUNT __constant_cpu_to_le32(0xC000018B) +#define STATUS_TRUSTED_DOMAIN_FAILURE __constant_cpu_to_le32(0xC000018C) +#define STATUS_TRUSTED_RELATIONSHIP_FAILURE __constant_cpu_to_le32(0xC000018D) +#define STATUS_EVENTLOG_FILE_CORRUPT __constant_cpu_to_le32(0xC000018E) +#define STATUS_EVENTLOG_CANT_START __constant_cpu_to_le32(0xC000018F) +#define STATUS_TRUST_FAILURE __constant_cpu_to_le32(0xC0000190) +#define STATUS_MUTANT_LIMIT_EXCEEDED __constant_cpu_to_le32(0xC0000191) +#define STATUS_NETLOGON_NOT_STARTED __constant_cpu_to_le32(0xC0000192) +#define STATUS_ACCOUNT_EXPIRED __constant_cpu_to_le32(0xC0000193) +#define STATUS_POSSIBLE_DEADLOCK __constant_cpu_to_le32(0xC0000194) +#define STATUS_NETWORK_CREDENTIAL_CONFLICT __constant_cpu_to_le32(0xC0000195) +#define STATUS_REMOTE_SESSION_LIMIT __constant_cpu_to_le32(0xC0000196) +#define STATUS_EVENTLOG_FILE_CHANGED __constant_cpu_to_le32(0xC0000197) +#define STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT __constant_cpu_to_le32(0xC0000198) +#define STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT __constant_cpu_to_le32(0xC0000199) +#define STATUS_NOLOGON_SERVER_TRUST_ACCOUNT __constant_cpu_to_le32(0xC000019A) +#define STATUS_DOMAIN_TRUST_INCONSISTENT __constant_cpu_to_le32(0xC000019B) +#define STATUS_FS_DRIVER_REQUIRED __constant_cpu_to_le32(0xC000019C) +#define STATUS_IMAGE_ALREADY_LOADED_AS_DLL __constant_cpu_to_le32(0xC000019D) +#define STATUS_NETWORK_OPEN_RESTRICTION __constant_cpu_to_le32(0xC0000201) +#define STATUS_NO_USER_SESSION_KEY __constant_cpu_to_le32(0xC0000202) +#define STATUS_USER_SESSION_DELETED __constant_cpu_to_le32(0xC0000203) +#define STATUS_RESOURCE_LANG_NOT_FOUND __constant_cpu_to_le32(0xC0000204) +#define STATUS_INSUFF_SERVER_RESOURCES __constant_cpu_to_le32(0xC0000205) +#define STATUS_INVALID_BUFFER_SIZE __constant_cpu_to_le32(0xC0000206) +#define STATUS_INVALID_ADDRESS_COMPONENT __constant_cpu_to_le32(0xC0000207) +#define STATUS_INVALID_ADDRESS_WILDCARD __constant_cpu_to_le32(0xC0000208) +#define STATUS_TOO_MANY_ADDRESSES __constant_cpu_to_le32(0xC0000209) +#define STATUS_ADDRESS_ALREADY_EXISTS __constant_cpu_to_le32(0xC000020A) +#define STATUS_ADDRESS_CLOSED __constant_cpu_to_le32(0xC000020B) +#define STATUS_CONNECTION_DISCONNECTED __constant_cpu_to_le32(0xC000020C) +#define STATUS_CONNECTION_RESET __constant_cpu_to_le32(0xC000020D) +#define STATUS_TOO_MANY_NODES __constant_cpu_to_le32(0xC000020E) +#define STATUS_TRANSACTION_ABORTED __constant_cpu_to_le32(0xC000020F) +#define STATUS_TRANSACTION_TIMED_OUT __constant_cpu_to_le32(0xC0000210) +#define STATUS_TRANSACTION_NO_RELEASE __constant_cpu_to_le32(0xC0000211) +#define STATUS_TRANSACTION_NO_MATCH __constant_cpu_to_le32(0xC0000212) +#define STATUS_TRANSACTION_RESPONDED __constant_cpu_to_le32(0xC0000213) +#define STATUS_TRANSACTION_INVALID_ID __constant_cpu_to_le32(0xC0000214) +#define STATUS_TRANSACTION_INVALID_TYPE __constant_cpu_to_le32(0xC0000215) +#define STATUS_NOT_SERVER_SESSION __constant_cpu_to_le32(0xC0000216) +#define STATUS_NOT_CLIENT_SESSION __constant_cpu_to_le32(0xC0000217) +#define STATUS_CANNOT_LOAD_REGISTRY_FILE __constant_cpu_to_le32(0xC0000218) +#define STATUS_DEBUG_ATTACH_FAILED __constant_cpu_to_le32(0xC0000219) +#define STATUS_SYSTEM_PROCESS_TERMINATED __constant_cpu_to_le32(0xC000021A) +#define STATUS_DATA_NOT_ACCEPTED __constant_cpu_to_le32(0xC000021B) +#define STATUS_NO_BROWSER_SERVERS_FOUND __constant_cpu_to_le32(0xC000021C) +#define STATUS_VDM_HARD_ERROR __constant_cpu_to_le32(0xC000021D) +#define STATUS_DRIVER_CANCEL_TIMEOUT __constant_cpu_to_le32(0xC000021E) +#define STATUS_REPLY_MESSAGE_MISMATCH __constant_cpu_to_le32(0xC000021F) +#define STATUS_MAPPED_ALIGNMENT __constant_cpu_to_le32(0xC0000220) +#define STATUS_IMAGE_CHECKSUM_MISMATCH __constant_cpu_to_le32(0xC0000221) +#define STATUS_LOST_WRITEBEHIND_DATA __constant_cpu_to_le32(0xC0000222) +#define STATUS_CLIENT_SERVER_PARAMETERS_INVALID __constant_cpu_to_le32(0xC0000223) +#define STATUS_PASSWORD_MUST_CHANGE __constant_cpu_to_le32(0xC0000224) +#define STATUS_NOT_FOUND __constant_cpu_to_le32(0xC0000225) +#define STATUS_NOT_TINY_STREAM __constant_cpu_to_le32(0xC0000226) +#define STATUS_RECOVERY_FAILURE __constant_cpu_to_le32(0xC0000227) +#define STATUS_STACK_OVERFLOW_READ __constant_cpu_to_le32(0xC0000228) +#define STATUS_FAIL_CHECK __constant_cpu_to_le32(0xC0000229) +#define STATUS_DUPLICATE_OBJECTID __constant_cpu_to_le32(0xC000022A) +#define STATUS_OBJECTID_EXISTS __constant_cpu_to_le32(0xC000022B) +#define STATUS_CONVERT_TO_LARGE __constant_cpu_to_le32(0xC000022C) +#define STATUS_RETRY __constant_cpu_to_le32(0xC000022D) +#define STATUS_FOUND_OUT_OF_SCOPE __constant_cpu_to_le32(0xC000022E) +#define STATUS_ALLOCATE_BUCKET __constant_cpu_to_le32(0xC000022F) +#define STATUS_PROPSET_NOT_FOUND __constant_cpu_to_le32(0xC0000230) +#define STATUS_MARSHALL_OVERFLOW __constant_cpu_to_le32(0xC0000231) +#define STATUS_INVALID_VARIANT __constant_cpu_to_le32(0xC0000232) +#define STATUS_DOMAIN_CONTROLLER_NOT_FOUND __constant_cpu_to_le32(0xC0000233) +#define STATUS_ACCOUNT_LOCKED_OUT __constant_cpu_to_le32(0xC0000234) +#define STATUS_HANDLE_NOT_CLOSABLE __constant_cpu_to_le32(0xC0000235) +#define STATUS_CONNECTION_REFUSED __constant_cpu_to_le32(0xC0000236) +#define STATUS_GRACEFUL_DISCONNECT __constant_cpu_to_le32(0xC0000237) +#define STATUS_ADDRESS_ALREADY_ASSOCIATED __constant_cpu_to_le32(0xC0000238) +#define STATUS_ADDRESS_NOT_ASSOCIATED __constant_cpu_to_le32(0xC0000239) +#define STATUS_CONNECTION_INVALID __constant_cpu_to_le32(0xC000023A) +#define STATUS_CONNECTION_ACTIVE __constant_cpu_to_le32(0xC000023B) +#define STATUS_NETWORK_UNREACHABLE __constant_cpu_to_le32(0xC000023C) +#define STATUS_HOST_UNREACHABLE __constant_cpu_to_le32(0xC000023D) +#define STATUS_PROTOCOL_UNREACHABLE __constant_cpu_to_le32(0xC000023E) +#define STATUS_PORT_UNREACHABLE __constant_cpu_to_le32(0xC000023F) +#define STATUS_REQUEST_ABORTED __constant_cpu_to_le32(0xC0000240) +#define STATUS_CONNECTION_ABORTED __constant_cpu_to_le32(0xC0000241) +#define STATUS_BAD_COMPRESSION_BUFFER __constant_cpu_to_le32(0xC0000242) +#define STATUS_USER_MAPPED_FILE __constant_cpu_to_le32(0xC0000243) +#define STATUS_AUDIT_FAILED __constant_cpu_to_le32(0xC0000244) +#define STATUS_TIMER_RESOLUTION_NOT_SET __constant_cpu_to_le32(0xC0000245) +#define STATUS_CONNECTION_COUNT_LIMIT __constant_cpu_to_le32(0xC0000246) +#define STATUS_LOGIN_TIME_RESTRICTION __constant_cpu_to_le32(0xC0000247) +#define STATUS_LOGIN_WKSTA_RESTRICTION __constant_cpu_to_le32(0xC0000248) +#define STATUS_IMAGE_MP_UP_MISMATCH __constant_cpu_to_le32(0xC0000249) +#define STATUS_INSUFFICIENT_LOGON_INFO __constant_cpu_to_le32(0xC0000250) +#define STATUS_BAD_DLL_ENTRYPOINT __constant_cpu_to_le32(0xC0000251) +#define STATUS_BAD_SERVICE_ENTRYPOINT __constant_cpu_to_le32(0xC0000252) +#define STATUS_LPC_REPLY_LOST __constant_cpu_to_le32(0xC0000253) +#define STATUS_IP_ADDRESS_CONFLICT1 __constant_cpu_to_le32(0xC0000254) +#define STATUS_IP_ADDRESS_CONFLICT2 __constant_cpu_to_le32(0xC0000255) +#define STATUS_REGISTRY_QUOTA_LIMIT __constant_cpu_to_le32(0xC0000256) +#define STATUS_PATH_NOT_COVERED __constant_cpu_to_le32(0xC0000257) +#define STATUS_NO_CALLBACK_ACTIVE __constant_cpu_to_le32(0xC0000258) +#define STATUS_LICENSE_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000259) +#define STATUS_PWD_TOO_SHORT __constant_cpu_to_le32(0xC000025A) +#define STATUS_PWD_TOO_RECENT __constant_cpu_to_le32(0xC000025B) +#define STATUS_PWD_HISTORY_CONFLICT __constant_cpu_to_le32(0xC000025C) +#define STATUS_PLUGPLAY_NO_DEVICE __constant_cpu_to_le32(0xC000025E) +#define STATUS_UNSUPPORTED_COMPRESSION __constant_cpu_to_le32(0xC000025F) +#define STATUS_INVALID_HW_PROFILE __constant_cpu_to_le32(0xC0000260) +#define STATUS_INVALID_PLUGPLAY_DEVICE_PATH __constant_cpu_to_le32(0xC0000261) +#define STATUS_DRIVER_ORDINAL_NOT_FOUND __constant_cpu_to_le32(0xC0000262) +#define STATUS_DRIVER_ENTRYPOINT_NOT_FOUND __constant_cpu_to_le32(0xC0000263) +#define STATUS_RESOURCE_NOT_OWNED __constant_cpu_to_le32(0xC0000264) +#define STATUS_TOO_MANY_LINKS __constant_cpu_to_le32(0xC0000265) +#define STATUS_QUOTA_LIST_INCONSISTENT __constant_cpu_to_le32(0xC0000266) +#define STATUS_FILE_IS_OFFLINE __constant_cpu_to_le32(0xC0000267) +#define STATUS_EVALUATION_EXPIRATION __constant_cpu_to_le32(0xC0000268) +#define STATUS_ILLEGAL_DLL_RELOCATION __constant_cpu_to_le32(0xC0000269) +#define STATUS_LICENSE_VIOLATION __constant_cpu_to_le32(0xC000026A) +#define STATUS_DLL_INIT_FAILED_LOGOFF __constant_cpu_to_le32(0xC000026B) +#define STATUS_DRIVER_UNABLE_TO_LOAD __constant_cpu_to_le32(0xC000026C) +#define STATUS_DFS_UNAVAILABLE __constant_cpu_to_le32(0xC000026D) +#define STATUS_VOLUME_DISMOUNTED __constant_cpu_to_le32(0xC000026E) +#define STATUS_WX86_INTERNAL_ERROR __constant_cpu_to_le32(0xC000026F) +#define STATUS_WX86_FLOAT_STACK_CHECK __constant_cpu_to_le32(0xC0000270) +#define STATUS_VALIDATE_CONTINUE __constant_cpu_to_le32(0xC0000271) +#define STATUS_NO_MATCH __constant_cpu_to_le32(0xC0000272) +#define STATUS_NO_MORE_MATCHES __constant_cpu_to_le32(0xC0000273) +#define STATUS_NOT_A_REPARSE_POINT __constant_cpu_to_le32(0xC0000275) +#define STATUS_IO_REPARSE_TAG_INVALID __constant_cpu_to_le32(0xC0000276) +#define STATUS_IO_REPARSE_TAG_MISMATCH __constant_cpu_to_le32(0xC0000277) +#define STATUS_IO_REPARSE_DATA_INVALID __constant_cpu_to_le32(0xC0000278) +#define STATUS_IO_REPARSE_TAG_NOT_HANDLED __constant_cpu_to_le32(0xC0000279) +#define STATUS_REPARSE_POINT_NOT_RESOLVED __constant_cpu_to_le32(0xC0000280) +#define STATUS_DIRECTORY_IS_A_REPARSE_POINT __constant_cpu_to_le32(0xC0000281) +#define STATUS_RANGE_LIST_CONFLICT __constant_cpu_to_le32(0xC0000282) +#define STATUS_SOURCE_ELEMENT_EMPTY __constant_cpu_to_le32(0xC0000283) +#define STATUS_DESTINATION_ELEMENT_FULL __constant_cpu_to_le32(0xC0000284) +#define STATUS_ILLEGAL_ELEMENT_ADDRESS __constant_cpu_to_le32(0xC0000285) +#define STATUS_MAGAZINE_NOT_PRESENT __constant_cpu_to_le32(0xC0000286) +#define STATUS_REINITIALIZATION_NEEDED __constant_cpu_to_le32(0xC0000287) +#define STATUS_ENCRYPTION_FAILED __constant_cpu_to_le32(0xC000028A) +#define STATUS_DECRYPTION_FAILED __constant_cpu_to_le32(0xC000028B) +#define STATUS_RANGE_NOT_FOUND __constant_cpu_to_le32(0xC000028C) +#define STATUS_NO_RECOVERY_POLICY __constant_cpu_to_le32(0xC000028D) +#define STATUS_NO_EFS __constant_cpu_to_le32(0xC000028E) +#define STATUS_WRONG_EFS __constant_cpu_to_le32(0xC000028F) +#define STATUS_NO_USER_KEYS __constant_cpu_to_le32(0xC0000290) +#define STATUS_FILE_NOT_ENCRYPTED __constant_cpu_to_le32(0xC0000291) +#define STATUS_NOT_EXPORT_FORMAT __constant_cpu_to_le32(0xC0000292) +#define STATUS_FILE_ENCRYPTED __constant_cpu_to_le32(0xC0000293) +#define STATUS_WMI_GUID_NOT_FOUND __constant_cpu_to_le32(0xC0000295) +#define STATUS_WMI_INSTANCE_NOT_FOUND __constant_cpu_to_le32(0xC0000296) +#define STATUS_WMI_ITEMID_NOT_FOUND __constant_cpu_to_le32(0xC0000297) +#define STATUS_WMI_TRY_AGAIN __constant_cpu_to_le32(0xC0000298) +#define STATUS_SHARED_POLICY __constant_cpu_to_le32(0xC0000299) +#define STATUS_POLICY_OBJECT_NOT_FOUND __constant_cpu_to_le32(0xC000029A) +#define STATUS_POLICY_ONLY_IN_DS __constant_cpu_to_le32(0xC000029B) +#define STATUS_VOLUME_NOT_UPGRADED __constant_cpu_to_le32(0xC000029C) +#define STATUS_REMOTE_STORAGE_NOT_ACTIVE __constant_cpu_to_le32(0xC000029D) +#define STATUS_REMOTE_STORAGE_MEDIA_ERROR __constant_cpu_to_le32(0xC000029E) +#define STATUS_NO_TRACKING_SERVICE __constant_cpu_to_le32(0xC000029F) +#define STATUS_SERVER_SID_MISMATCH __constant_cpu_to_le32(0xC00002A0) +#define STATUS_DS_NO_ATTRIBUTE_OR_VALUE __constant_cpu_to_le32(0xC00002A1) +#define STATUS_DS_INVALID_ATTRIBUTE_SYNTAX __constant_cpu_to_le32(0xC00002A2) +#define STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED __constant_cpu_to_le32(0xC00002A3) +#define STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS __constant_cpu_to_le32(0xC00002A4) +#define STATUS_DS_BUSY __constant_cpu_to_le32(0xC00002A5) +#define STATUS_DS_UNAVAILABLE __constant_cpu_to_le32(0xC00002A6) +#define STATUS_DS_NO_RIDS_ALLOCATED __constant_cpu_to_le32(0xC00002A7) +#define STATUS_DS_NO_MORE_RIDS __constant_cpu_to_le32(0xC00002A8) +#define STATUS_DS_INCORRECT_ROLE_OWNER __constant_cpu_to_le32(0xC00002A9) +#define STATUS_DS_RIDMGR_INIT_ERROR __constant_cpu_to_le32(0xC00002AA) +#define STATUS_DS_OBJ_CLASS_VIOLATION __constant_cpu_to_le32(0xC00002AB) +#define STATUS_DS_CANT_ON_NON_LEAF __constant_cpu_to_le32(0xC00002AC) +#define STATUS_DS_CANT_ON_RDN __constant_cpu_to_le32(0xC00002AD) +#define STATUS_DS_CANT_MOD_OBJ_CLASS __constant_cpu_to_le32(0xC00002AE) +#define STATUS_DS_CROSS_DOM_MOVE_FAILED __constant_cpu_to_le32(0xC00002AF) +#define STATUS_DS_GC_NOT_AVAILABLE __constant_cpu_to_le32(0xC00002B0) +#define STATUS_DIRECTORY_SERVICE_REQUIRED __constant_cpu_to_le32(0xC00002B1) +#define STATUS_REPARSE_ATTRIBUTE_CONFLICT __constant_cpu_to_le32(0xC00002B2) +#define STATUS_CANT_ENABLE_DENY_ONLY __constant_cpu_to_le32(0xC00002B3) +#define STATUS_FLOAT_MULTIPLE_FAULTS __constant_cpu_to_le32(0xC00002B4) +#define STATUS_FLOAT_MULTIPLE_TRAPS __constant_cpu_to_le32(0xC00002B5) +#define STATUS_DEVICE_REMOVED __constant_cpu_to_le32(0xC00002B6) +#define STATUS_JOURNAL_DELETE_IN_PROGRESS __constant_cpu_to_le32(0xC00002B7) +#define STATUS_JOURNAL_NOT_ACTIVE __constant_cpu_to_le32(0xC00002B8) +#define STATUS_NOINTERFACE __constant_cpu_to_le32(0xC00002B9) +#define STATUS_DS_ADMIN_LIMIT_EXCEEDED __constant_cpu_to_le32(0xC00002C1) +#define STATUS_DRIVER_FAILED_SLEEP __constant_cpu_to_le32(0xC00002C2) +#define STATUS_MUTUAL_AUTHENTICATION_FAILED __constant_cpu_to_le32(0xC00002C3) +#define STATUS_CORRUPT_SYSTEM_FILE __constant_cpu_to_le32(0xC00002C4) +#define STATUS_DATATYPE_MISALIGNMENT_ERROR __constant_cpu_to_le32(0xC00002C5) +#define STATUS_WMI_READ_ONLY __constant_cpu_to_le32(0xC00002C6) +#define STATUS_WMI_SET_FAILURE __constant_cpu_to_le32(0xC00002C7) +#define STATUS_COMMITMENT_MINIMUM __constant_cpu_to_le32(0xC00002C8) +#define STATUS_REG_NAT_CONSUMPTION __constant_cpu_to_le32(0xC00002C9) +#define STATUS_TRANSPORT_FULL __constant_cpu_to_le32(0xC00002CA) +#define STATUS_DS_SAM_INIT_FAILURE __constant_cpu_to_le32(0xC00002CB) +#define STATUS_ONLY_IF_CONNECTED __constant_cpu_to_le32(0xC00002CC) +#define STATUS_DS_SENSITIVE_GROUP_VIOLATION __constant_cpu_to_le32(0xC00002CD) +#define STATUS_PNP_RESTART_ENUMERATION __constant_cpu_to_le32(0xC00002CE) +#define STATUS_JOURNAL_ENTRY_DELETED __constant_cpu_to_le32(0xC00002CF) +#define STATUS_DS_CANT_MOD_PRIMARYGROUPID __constant_cpu_to_le32(0xC00002D0) +#define STATUS_SYSTEM_IMAGE_BAD_SIGNATURE __constant_cpu_to_le32(0xC00002D1) +#define STATUS_PNP_REBOOT_REQUIRED __constant_cpu_to_le32(0xC00002D2) +#define STATUS_POWER_STATE_INVALID __constant_cpu_to_le32(0xC00002D3) +#define STATUS_DS_INVALID_GROUP_TYPE __constant_cpu_to_le32(0xC00002D4) +#define STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN __constant_cpu_to_le32(0xC00002D5) +#define STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN __constant_cpu_to_le32(0xC00002D6) +#define STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER __constant_cpu_to_le32(0xC00002D7) +#define STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER __constant_cpu_to_le32(0xC00002D8) +#define STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER __constant_cpu_to_le32(0xC00002D9) +#define STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER __constant_cpu_to_le32(0xC00002DA) +#define STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER __constant_cpu_to_le32(0xC00002DB) +#define STATUS_DS_HAVE_PRIMARY_MEMBERS __constant_cpu_to_le32(0xC00002DC) +#define STATUS_WMI_NOT_SUPPORTED __constant_cpu_to_le32(0xC00002DD) +#define STATUS_INSUFFICIENT_POWER __constant_cpu_to_le32(0xC00002DE) +#define STATUS_SAM_NEED_BOOTKEY_PASSWORD __constant_cpu_to_le32(0xC00002DF) +#define STATUS_SAM_NEED_BOOTKEY_FLOPPY __constant_cpu_to_le32(0xC00002E0) +#define STATUS_DS_CANT_START __constant_cpu_to_le32(0xC00002E1) +#define STATUS_DS_INIT_FAILURE __constant_cpu_to_le32(0xC00002E2) +#define STATUS_SAM_INIT_FAILURE __constant_cpu_to_le32(0xC00002E3) +#define STATUS_DS_GC_REQUIRED __constant_cpu_to_le32(0xC00002E4) +#define STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY __constant_cpu_to_le32(0xC00002E5) +#define STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS __constant_cpu_to_le32(0xC00002E6) +#define STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC00002E7) +#define STATUS_MULTIPLE_FAULT_VIOLATION __constant_cpu_to_le32(0xC00002E8) +#define STATUS_CURRENT_DOMAIN_NOT_ALLOWED __constant_cpu_to_le32(0xC00002E9) +#define STATUS_CANNOT_MAKE __constant_cpu_to_le32(0xC00002EA) +#define STATUS_SYSTEM_SHUTDOWN __constant_cpu_to_le32(0xC00002EB) +#define STATUS_DS_INIT_FAILURE_CONSOLE __constant_cpu_to_le32(0xC00002EC) +#define STATUS_DS_SAM_INIT_FAILURE_CONSOLE __constant_cpu_to_le32(0xC00002ED) +#define STATUS_UNFINISHED_CONTEXT_DELETED __constant_cpu_to_le32(0xC00002EE) +#define STATUS_NO_TGT_REPLY __constant_cpu_to_le32(0xC00002EF) +#define STATUS_OBJECTID_NOT_FOUND __constant_cpu_to_le32(0xC00002F0) +#define STATUS_NO_IP_ADDRESSES __constant_cpu_to_le32(0xC00002F1) +#define STATUS_WRONG_CREDENTIAL_HANDLE __constant_cpu_to_le32(0xC00002F2) +#define STATUS_CRYPTO_SYSTEM_INVALID __constant_cpu_to_le32(0xC00002F3) +#define STATUS_MAX_REFERRALS_EXCEEDED __constant_cpu_to_le32(0xC00002F4) +#define STATUS_MUST_BE_KDC __constant_cpu_to_le32(0xC00002F5) +#define STATUS_STRONG_CRYPTO_NOT_SUPPORTED __constant_cpu_to_le32(0xC00002F6) +#define STATUS_TOO_MANY_PRINCIPALS __constant_cpu_to_le32(0xC00002F7) +#define STATUS_NO_PA_DATA __constant_cpu_to_le32(0xC00002F8) +#define STATUS_PKINIT_NAME_MISMATCH __constant_cpu_to_le32(0xC00002F9) +#define STATUS_SMARTCARD_LOGON_REQUIRED __constant_cpu_to_le32(0xC00002FA) +#define STATUS_KDC_INVALID_REQUEST __constant_cpu_to_le32(0xC00002FB) +#define STATUS_KDC_UNABLE_TO_REFER __constant_cpu_to_le32(0xC00002FC) +#define STATUS_KDC_UNKNOWN_ETYPE __constant_cpu_to_le32(0xC00002FD) +#define STATUS_SHUTDOWN_IN_PROGRESS __constant_cpu_to_le32(0xC00002FE) +#define STATUS_SERVER_SHUTDOWN_IN_PROGRESS __constant_cpu_to_le32(0xC00002FF) +#define STATUS_NOT_SUPPORTED_ON_SBS __constant_cpu_to_le32(0xC0000300) +#define STATUS_WMI_GUID_DISCONNECTED __constant_cpu_to_le32(0xC0000301) +#define STATUS_WMI_ALREADY_DISABLED __constant_cpu_to_le32(0xC0000302) +#define STATUS_WMI_ALREADY_ENABLED __constant_cpu_to_le32(0xC0000303) +#define STATUS_MFT_TOO_FRAGMENTED __constant_cpu_to_le32(0xC0000304) +#define STATUS_COPY_PROTECTION_FAILURE __constant_cpu_to_le32(0xC0000305) +#define STATUS_CSS_AUTHENTICATION_FAILURE __constant_cpu_to_le32(0xC0000306) +#define STATUS_CSS_KEY_NOT_PRESENT __constant_cpu_to_le32(0xC0000307) +#define STATUS_CSS_KEY_NOT_ESTABLISHED __constant_cpu_to_le32(0xC0000308) +#define STATUS_CSS_SCRAMBLED_SECTOR __constant_cpu_to_le32(0xC0000309) +#define STATUS_CSS_REGION_MISMATCH __constant_cpu_to_le32(0xC000030A) +#define STATUS_CSS_RESETS_EXHAUSTED __constant_cpu_to_le32(0xC000030B) +#define STATUS_PKINIT_FAILURE __constant_cpu_to_le32(0xC0000320) +#define STATUS_SMARTCARD_SUBSYSTEM_FAILURE __constant_cpu_to_le32(0xC0000321) +#define STATUS_NO_KERB_KEY __constant_cpu_to_le32(0xC0000322) +#define STATUS_HOST_DOWN __constant_cpu_to_le32(0xC0000350) +#define STATUS_UNSUPPORTED_PREAUTH __constant_cpu_to_le32(0xC0000351) +#define STATUS_EFS_ALG_BLOB_TOO_BIG __constant_cpu_to_le32(0xC0000352) +#define STATUS_PORT_NOT_SET __constant_cpu_to_le32(0xC0000353) +#define STATUS_DEBUGGER_INACTIVE __constant_cpu_to_le32(0xC0000354) +#define STATUS_DS_VERSION_CHECK_FAILURE __constant_cpu_to_le32(0xC0000355) +#define STATUS_AUDITING_DISABLED __constant_cpu_to_le32(0xC0000356) +#define STATUS_PRENT4_MACHINE_ACCOUNT __constant_cpu_to_le32(0xC0000357) +#define STATUS_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER __constant_cpu_to_le32(0xC0000358) +#define STATUS_INVALID_IMAGE_WIN_32 __constant_cpu_to_le32(0xC0000359) +#define STATUS_INVALID_IMAGE_WIN_64 __constant_cpu_to_le32(0xC000035A) +#define STATUS_BAD_BINDINGS __constant_cpu_to_le32(0xC000035B) +#define STATUS_NETWORK_SESSION_EXPIRED __constant_cpu_to_le32(0xC000035C) +#define STATUS_APPHELP_BLOCK __constant_cpu_to_le32(0xC000035D) +#define STATUS_ALL_SIDS_FILTERED __constant_cpu_to_le32(0xC000035E) +#define STATUS_NOT_SAFE_MODE_DRIVER __constant_cpu_to_le32(0xC000035F) +#define STATUS_ACCESS_DISABLED_BY_POLICY_DEFAULT __constant_cpu_to_le32(0xC0000361) +#define STATUS_ACCESS_DISABLED_BY_POLICY_PATH __constant_cpu_to_le32(0xC0000362) +#define STATUS_ACCESS_DISABLED_BY_POLICY_PUBLISHER __constant_cpu_to_le32(0xC0000363) +#define STATUS_ACCESS_DISABLED_BY_POLICY_OTHER __constant_cpu_to_le32(0xC0000364) +#define STATUS_FAILED_DRIVER_ENTRY __constant_cpu_to_le32(0xC0000365) +#define STATUS_DEVICE_ENUMERATION_ERROR __constant_cpu_to_le32(0xC0000366) +#define STATUS_MOUNT_POINT_NOT_RESOLVED __constant_cpu_to_le32(0xC0000368) +#define STATUS_INVALID_DEVICE_OBJECT_PARAMETER __constant_cpu_to_le32(0xC0000369) +#define STATUS_MCA_OCCURED __constant_cpu_to_le32(0xC000036A) +#define STATUS_DRIVER_BLOCKED_CRITICAL __constant_cpu_to_le32(0xC000036B) +#define STATUS_DRIVER_BLOCKED __constant_cpu_to_le32(0xC000036C) +#define STATUS_DRIVER_DATABASE_ERROR __constant_cpu_to_le32(0xC000036D) +#define STATUS_SYSTEM_HIVE_TOO_LARGE __constant_cpu_to_le32(0xC000036E) +#define STATUS_INVALID_IMPORT_OF_NON_DLL __constant_cpu_to_le32(0xC000036F) +#define STATUS_NO_SECRETS __constant_cpu_to_le32(0xC0000371) +#define STATUS_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY __constant_cpu_to_le32(0xC0000372) +#define STATUS_FAILED_STACK_SWITCH __constant_cpu_to_le32(0xC0000373) +#define STATUS_HEAP_CORRUPTION __constant_cpu_to_le32(0xC0000374) +#define STATUS_SMARTCARD_WRONG_PIN __constant_cpu_to_le32(0xC0000380) +#define STATUS_SMARTCARD_CARD_BLOCKED __constant_cpu_to_le32(0xC0000381) +#define STATUS_SMARTCARD_CARD_NOT_AUTHENTICATED __constant_cpu_to_le32(0xC0000382) +#define STATUS_SMARTCARD_NO_CARD __constant_cpu_to_le32(0xC0000383) +#define STATUS_SMARTCARD_NO_KEY_CONTAINER __constant_cpu_to_le32(0xC0000384) +#define STATUS_SMARTCARD_NO_CERTIFICATE __constant_cpu_to_le32(0xC0000385) +#define STATUS_SMARTCARD_NO_KEYSET __constant_cpu_to_le32(0xC0000386) +#define STATUS_SMARTCARD_IO_ERROR __constant_cpu_to_le32(0xC0000387) +#define STATUS_DOWNGRADE_DETECTED __constant_cpu_to_le32(0xC0000388) +#define STATUS_SMARTCARD_CERT_REVOKED __constant_cpu_to_le32(0xC0000389) +#define STATUS_ISSUING_CA_UNTRUSTED __constant_cpu_to_le32(0xC000038A) +#define STATUS_REVOCATION_OFFLINE_C __constant_cpu_to_le32(0xC000038B) +#define STATUS_PKINIT_CLIENT_FAILURE __constant_cpu_to_le32(0xC000038C) +#define STATUS_SMARTCARD_CERT_EXPIRED __constant_cpu_to_le32(0xC000038D) +#define STATUS_DRIVER_FAILED_PRIOR_UNLOAD __constant_cpu_to_le32(0xC000038E) +#define STATUS_SMARTCARD_SILENT_CONTEXT __constant_cpu_to_le32(0xC000038F) +#define STATUS_PER_USER_TRUST_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000401) +#define STATUS_ALL_USER_TRUST_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000402) +#define STATUS_USER_DELETE_TRUST_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000403) +#define STATUS_DS_NAME_NOT_UNIQUE __constant_cpu_to_le32(0xC0000404) +#define STATUS_DS_DUPLICATE_ID_FOUND __constant_cpu_to_le32(0xC0000405) +#define STATUS_DS_GROUP_CONVERSION_ERROR __constant_cpu_to_le32(0xC0000406) +#define STATUS_VOLSNAP_PREPARE_HIBERNATE __constant_cpu_to_le32(0xC0000407) +#define STATUS_USER2USER_REQUIRED __constant_cpu_to_le32(0xC0000408) +#define STATUS_STACK_BUFFER_OVERRUN __constant_cpu_to_le32(0xC0000409) +#define STATUS_NO_S4U_PROT_SUPPORT __constant_cpu_to_le32(0xC000040A) +#define STATUS_CROSSREALM_DELEGATION_FAILURE __constant_cpu_to_le32(0xC000040B) +#define STATUS_REVOCATION_OFFLINE_KDC __constant_cpu_to_le32(0xC000040C) +#define STATUS_ISSUING_CA_UNTRUSTED_KDC __constant_cpu_to_le32(0xC000040D) +#define STATUS_KDC_CERT_EXPIRED __constant_cpu_to_le32(0xC000040E) +#define STATUS_KDC_CERT_REVOKED __constant_cpu_to_le32(0xC000040F) +#define STATUS_PARAMETER_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000410) +#define STATUS_HIBERNATION_FAILURE __constant_cpu_to_le32(0xC0000411) +#define STATUS_DELAY_LOAD_FAILED __constant_cpu_to_le32(0xC0000412) +#define STATUS_AUTHENTICATION_FIREWALL_FAILED __constant_cpu_to_le32(0xC0000413) +#define STATUS_VDM_DISALLOWED __constant_cpu_to_le32(0xC0000414) +#define STATUS_HUNG_DISPLAY_DRIVER_THREAD __constant_cpu_to_le32(0xC0000415) +#define STATUS_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE __constant_cpu_to_le32(0xC0000416) +#define STATUS_INVALID_CRUNTIME_PARAMETER __constant_cpu_to_le32(0xC0000417) +#define STATUS_NTLM_BLOCKED __constant_cpu_to_le32(0xC0000418) +#define STATUS_ASSERTION_FAILURE __constant_cpu_to_le32(0xC0000420) +#define STATUS_VERIFIER_STOP __constant_cpu_to_le32(0xC0000421) +#define STATUS_CALLBACK_POP_STACK __constant_cpu_to_le32(0xC0000423) +#define STATUS_INCOMPATIBLE_DRIVER_BLOCKED __constant_cpu_to_le32(0xC0000424) +#define STATUS_HIVE_UNLOADED __constant_cpu_to_le32(0xC0000425) +#define STATUS_COMPRESSION_DISABLED __constant_cpu_to_le32(0xC0000426) +#define STATUS_FILE_SYSTEM_LIMITATION __constant_cpu_to_le32(0xC0000427) +#define STATUS_INVALID_IMAGE_HASH __constant_cpu_to_le32(0xC0000428) +#define STATUS_NOT_CAPABLE __constant_cpu_to_le32(0xC0000429) +#define STATUS_REQUEST_OUT_OF_SEQUENCE __constant_cpu_to_le32(0xC000042A) +#define STATUS_IMPLEMENTATION_LIMIT __constant_cpu_to_le32(0xC000042B) +#define STATUS_ELEVATION_REQUIRED __constant_cpu_to_le32(0xC000042C) +#define STATUS_BEYOND_VDL __constant_cpu_to_le32(0xC0000432) +#define STATUS_ENCOUNTERED_WRITE_IN_PROGRESS __constant_cpu_to_le32(0xC0000433) +#define STATUS_PTE_CHANGED __constant_cpu_to_le32(0xC0000434) +#define STATUS_PURGE_FAILED __constant_cpu_to_le32(0xC0000435) +#define STATUS_CRED_REQUIRES_CONFIRMATION __constant_cpu_to_le32(0xC0000440) +#define STATUS_CS_ENCRYPTION_INVALID_SERVER_RESPONSE __constant_cpu_to_le32(0xC0000441) +#define STATUS_CS_ENCRYPTION_UNSUPPORTED_SERVER __constant_cpu_to_le32(0xC0000442) +#define STATUS_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE __constant_cpu_to_le32(0xC0000443) +#define STATUS_CS_ENCRYPTION_NEW_ENCRYPTED_FILE __constant_cpu_to_le32(0xC0000444) +#define STATUS_CS_ENCRYPTION_FILE_NOT_CSE __constant_cpu_to_le32(0xC0000445) +#define STATUS_INVALID_LABEL __constant_cpu_to_le32(0xC0000446) +#define STATUS_DRIVER_PROCESS_TERMINATED __constant_cpu_to_le32(0xC0000450) +#define STATUS_AMBIGUOUS_SYSTEM_DEVICE __constant_cpu_to_le32(0xC0000451) +#define STATUS_SYSTEM_DEVICE_NOT_FOUND __constant_cpu_to_le32(0xC0000452) +#define STATUS_RESTART_BOOT_APPLICATION __constant_cpu_to_le32(0xC0000453) +#define STATUS_INVALID_TASK_NAME __constant_cpu_to_le32(0xC0000500) +#define STATUS_INVALID_TASK_INDEX __constant_cpu_to_le32(0xC0000501) +#define STATUS_THREAD_ALREADY_IN_TASK __constant_cpu_to_le32(0xC0000502) +#define STATUS_CALLBACK_BYPASS __constant_cpu_to_le32(0xC0000503) +#define STATUS_PORT_CLOSED __constant_cpu_to_le32(0xC0000700) +#define STATUS_MESSAGE_LOST __constant_cpu_to_le32(0xC0000701) +#define STATUS_INVALID_MESSAGE __constant_cpu_to_le32(0xC0000702) +#define STATUS_REQUEST_CANCELED __constant_cpu_to_le32(0xC0000703) +#define STATUS_RECURSIVE_DISPATCH __constant_cpu_to_le32(0xC0000704) +#define STATUS_LPC_RECEIVE_BUFFER_EXPECTED __constant_cpu_to_le32(0xC0000705) +#define STATUS_LPC_INVALID_CONNECTION_USAGE __constant_cpu_to_le32(0xC0000706) +#define STATUS_LPC_REQUESTS_NOT_ALLOWED __constant_cpu_to_le32(0xC0000707) +#define STATUS_RESOURCE_IN_USE __constant_cpu_to_le32(0xC0000708) +#define STATUS_HARDWARE_MEMORY_ERROR __constant_cpu_to_le32(0xC0000709) +#define STATUS_THREADPOOL_HANDLE_EXCEPTION __constant_cpu_to_le32(0xC000070A) +#define STATUS_THREADPOOL_SET_EVENT_ON_COMPLETION_FAILED __constant_cpu_to_le32(0xC000070B) +#define STATUS_THREADPOOL_RELEASE_SEMAPHORE_ON_COMPLETION_FAILED __constant_cpu_to_le32(0xC000070C) +#define STATUS_THREADPOOL_RELEASE_MUTEX_ON_COMPLETION_FAILED __constant_cpu_to_le32(0xC000070D) +#define STATUS_THREADPOOL_FREE_LIBRARY_ON_COMPLETION_FAILED __constant_cpu_to_le32(0xC000070E) +#define STATUS_THREADPOOL_RELEASED_DURING_OPERATION __constant_cpu_to_le32(0xC000070F) +#define STATUS_CALLBACK_RETURNED_WHILE_IMPERSONATING __constant_cpu_to_le32(0xC0000710) +#define STATUS_APC_RETURNED_WHILE_IMPERSONATING __constant_cpu_to_le32(0xC0000711) +#define STATUS_PROCESS_IS_PROTECTED __constant_cpu_to_le32(0xC0000712) +#define STATUS_MCA_EXCEPTION __constant_cpu_to_le32(0xC0000713) +#define STATUS_CERTIFICATE_MAPPING_NOT_UNIQUE __constant_cpu_to_le32(0xC0000714) +#define STATUS_SYMLINK_CLASS_DISABLED __constant_cpu_to_le32(0xC0000715) +#define STATUS_INVALID_IDN_NORMALIZATION __constant_cpu_to_le32(0xC0000716) +#define STATUS_NO_UNICODE_TRANSLATION __constant_cpu_to_le32(0xC0000717) +#define STATUS_ALREADY_REGISTERED __constant_cpu_to_le32(0xC0000718) +#define STATUS_CONTEXT_MISMATCH __constant_cpu_to_le32(0xC0000719) +#define STATUS_PORT_ALREADY_HAS_COMPLETION_LIST __constant_cpu_to_le32(0xC000071A) +#define STATUS_CALLBACK_RETURNED_THREAD_PRIORITY __constant_cpu_to_le32(0xC000071B) +#define STATUS_INVALID_THREAD __constant_cpu_to_le32(0xC000071C) +#define STATUS_CALLBACK_RETURNED_TRANSACTION __constant_cpu_to_le32(0xC000071D) +#define STATUS_CALLBACK_RETURNED_LDR_LOCK __constant_cpu_to_le32(0xC000071E) +#define STATUS_CALLBACK_RETURNED_LANG __constant_cpu_to_le32(0xC000071F) +#define STATUS_CALLBACK_RETURNED_PRI_BACK __constant_cpu_to_le32(0xC0000720) +#define STATUS_CALLBACK_RETURNED_THREAD_AFFINITY __constant_cpu_to_le32(0xC0000721) +#define STATUS_DISK_REPAIR_DISABLED __constant_cpu_to_le32(0xC0000800) +#define STATUS_DS_DOMAIN_RENAME_IN_PROGRESS __constant_cpu_to_le32(0xC0000801) +#define STATUS_DISK_QUOTA_EXCEEDED __constant_cpu_to_le32(0xC0000802) +#define STATUS_CONTENT_BLOCKED __constant_cpu_to_le32(0xC0000804) +#define STATUS_BAD_CLUSTERS __constant_cpu_to_le32(0xC0000805) +#define STATUS_VOLUME_DIRTY __constant_cpu_to_le32(0xC0000806) +#define STATUS_FILE_CHECKED_OUT __constant_cpu_to_le32(0xC0000901) +#define STATUS_CHECKOUT_REQUIRED __constant_cpu_to_le32(0xC0000902) +#define STATUS_BAD_FILE_TYPE __constant_cpu_to_le32(0xC0000903) +#define STATUS_FILE_TOO_LARGE __constant_cpu_to_le32(0xC0000904) +#define STATUS_FORMS_AUTH_REQUIRED __constant_cpu_to_le32(0xC0000905) +#define STATUS_VIRUS_INFECTED __constant_cpu_to_le32(0xC0000906) +#define STATUS_VIRUS_DELETED __constant_cpu_to_le32(0xC0000907) +#define STATUS_BAD_MCFG_TABLE __constant_cpu_to_le32(0xC0000908) +#define STATUS_WOW_ASSERTION __constant_cpu_to_le32(0xC0009898) +#define STATUS_INVALID_SIGNATURE __constant_cpu_to_le32(0xC000A000) +#define STATUS_HMAC_NOT_SUPPORTED __constant_cpu_to_le32(0xC000A001) +#define STATUS_IPSEC_QUEUE_OVERFLOW __constant_cpu_to_le32(0xC000A010) +#define STATUS_ND_QUEUE_OVERFLOW __constant_cpu_to_le32(0xC000A011) +#define STATUS_HOPLIMIT_EXCEEDED __constant_cpu_to_le32(0xC000A012) +#define STATUS_PROTOCOL_NOT_SUPPORTED __constant_cpu_to_le32(0xC000A013) +#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED __constant_cpu_to_le32(0xC000A080) +#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR __constant_cpu_to_le32(0xC000A081) +#define STATUS_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR __constant_cpu_to_le32(0xC000A082) +#define STATUS_XML_PARSE_ERROR __constant_cpu_to_le32(0xC000A083) +#define STATUS_XMLDSIG_ERROR __constant_cpu_to_le32(0xC000A084) +#define STATUS_WRONG_COMPARTMENT __constant_cpu_to_le32(0xC000A085) +#define STATUS_AUTHIP_FAILURE __constant_cpu_to_le32(0xC000A086) +#define DBG_NO_STATE_CHANGE __constant_cpu_to_le32(0xC0010001) +#define DBG_APP_NOT_IDLE __constant_cpu_to_le32(0xC0010002) +#define RPC_NT_INVALID_STRING_BINDING __constant_cpu_to_le32(0xC0020001) +#define RPC_NT_WRONG_KIND_OF_BINDING __constant_cpu_to_le32(0xC0020002) +#define RPC_NT_INVALID_BINDING __constant_cpu_to_le32(0xC0020003) +#define RPC_NT_PROTSEQ_NOT_SUPPORTED __constant_cpu_to_le32(0xC0020004) +#define RPC_NT_INVALID_RPC_PROTSEQ __constant_cpu_to_le32(0xC0020005) +#define RPC_NT_INVALID_STRING_UUID __constant_cpu_to_le32(0xC0020006) +#define RPC_NT_INVALID_ENDPOINT_FORMAT __constant_cpu_to_le32(0xC0020007) +#define RPC_NT_INVALID_NET_ADDR __constant_cpu_to_le32(0xC0020008) +#define RPC_NT_NO_ENDPOINT_FOUND __constant_cpu_to_le32(0xC0020009) +#define RPC_NT_INVALID_TIMEOUT __constant_cpu_to_le32(0xC002000A) +#define RPC_NT_OBJECT_NOT_FOUND __constant_cpu_to_le32(0xC002000B) +#define RPC_NT_ALREADY_REGISTERED __constant_cpu_to_le32(0xC002000C) +#define RPC_NT_TYPE_ALREADY_REGISTERED __constant_cpu_to_le32(0xC002000D) +#define RPC_NT_ALREADY_LISTENING __constant_cpu_to_le32(0xC002000E) +#define RPC_NT_NO_PROTSEQS_REGISTERED __constant_cpu_to_le32(0xC002000F) +#define RPC_NT_NOT_LISTENING __constant_cpu_to_le32(0xC0020010) +#define RPC_NT_UNKNOWN_MGR_TYPE __constant_cpu_to_le32(0xC0020011) +#define RPC_NT_UNKNOWN_IF __constant_cpu_to_le32(0xC0020012) +#define RPC_NT_NO_BINDINGS __constant_cpu_to_le32(0xC0020013) +#define RPC_NT_NO_PROTSEQS __constant_cpu_to_le32(0xC0020014) +#define RPC_NT_CANT_CREATE_ENDPOINT __constant_cpu_to_le32(0xC0020015) +#define RPC_NT_OUT_OF_RESOURCES __constant_cpu_to_le32(0xC0020016) +#define RPC_NT_SERVER_UNAVAILABLE __constant_cpu_to_le32(0xC0020017) +#define RPC_NT_SERVER_TOO_BUSY __constant_cpu_to_le32(0xC0020018) +#define RPC_NT_INVALID_NETWORK_OPTIONS __constant_cpu_to_le32(0xC0020019) +#define RPC_NT_NO_CALL_ACTIVE __constant_cpu_to_le32(0xC002001A) +#define RPC_NT_CALL_FAILED __constant_cpu_to_le32(0xC002001B) +#define RPC_NT_CALL_FAILED_DNE __constant_cpu_to_le32(0xC002001C) +#define RPC_NT_PROTOCOL_ERROR __constant_cpu_to_le32(0xC002001D) +#define RPC_NT_UNSUPPORTED_TRANS_SYN __constant_cpu_to_le32(0xC002001F) +#define RPC_NT_UNSUPPORTED_TYPE __constant_cpu_to_le32(0xC0020021) +#define RPC_NT_INVALID_TAG __constant_cpu_to_le32(0xC0020022) +#define RPC_NT_INVALID_BOUND __constant_cpu_to_le32(0xC0020023) +#define RPC_NT_NO_ENTRY_NAME __constant_cpu_to_le32(0xC0020024) +#define RPC_NT_INVALID_NAME_SYNTAX __constant_cpu_to_le32(0xC0020025) +#define RPC_NT_UNSUPPORTED_NAME_SYNTAX __constant_cpu_to_le32(0xC0020026) +#define RPC_NT_UUID_NO_ADDRESS __constant_cpu_to_le32(0xC0020028) +#define RPC_NT_DUPLICATE_ENDPOINT __constant_cpu_to_le32(0xC0020029) +#define RPC_NT_UNKNOWN_AUTHN_TYPE __constant_cpu_to_le32(0xC002002A) +#define RPC_NT_MAX_CALLS_TOO_SMALL __constant_cpu_to_le32(0xC002002B) +#define RPC_NT_STRING_TOO_LONG __constant_cpu_to_le32(0xC002002C) +#define RPC_NT_PROTSEQ_NOT_FOUND __constant_cpu_to_le32(0xC002002D) +#define RPC_NT_PROCNUM_OUT_OF_RANGE __constant_cpu_to_le32(0xC002002E) +#define RPC_NT_BINDING_HAS_NO_AUTH __constant_cpu_to_le32(0xC002002F) +#define RPC_NT_UNKNOWN_AUTHN_SERVICE __constant_cpu_to_le32(0xC0020030) +#define RPC_NT_UNKNOWN_AUTHN_LEVEL __constant_cpu_to_le32(0xC0020031) +#define RPC_NT_INVALID_AUTH_IDENTITY __constant_cpu_to_le32(0xC0020032) +#define RPC_NT_UNKNOWN_AUTHZ_SERVICE __constant_cpu_to_le32(0xC0020033) +#define EPT_NT_INVALID_ENTRY __constant_cpu_to_le32(0xC0020034) +#define EPT_NT_CANT_PERFORM_OP __constant_cpu_to_le32(0xC0020035) +#define EPT_NT_NOT_REGISTERED __constant_cpu_to_le32(0xC0020036) +#define RPC_NT_NOTHING_TO_EXPORT __constant_cpu_to_le32(0xC0020037) +#define RPC_NT_INCOMPLETE_NAME __constant_cpu_to_le32(0xC0020038) +#define RPC_NT_INVALID_VERS_OPTION __constant_cpu_to_le32(0xC0020039) +#define RPC_NT_NO_MORE_MEMBERS __constant_cpu_to_le32(0xC002003A) +#define RPC_NT_NOT_ALL_OBJS_UNEXPORTED __constant_cpu_to_le32(0xC002003B) +#define RPC_NT_INTERFACE_NOT_FOUND __constant_cpu_to_le32(0xC002003C) +#define RPC_NT_ENTRY_ALREADY_EXISTS __constant_cpu_to_le32(0xC002003D) +#define RPC_NT_ENTRY_NOT_FOUND __constant_cpu_to_le32(0xC002003E) +#define RPC_NT_NAME_SERVICE_UNAVAILABLE __constant_cpu_to_le32(0xC002003F) +#define RPC_NT_INVALID_NAF_ID __constant_cpu_to_le32(0xC0020040) +#define RPC_NT_CANNOT_SUPPORT __constant_cpu_to_le32(0xC0020041) +#define RPC_NT_NO_CONTEXT_AVAILABLE __constant_cpu_to_le32(0xC0020042) +#define RPC_NT_INTERNAL_ERROR __constant_cpu_to_le32(0xC0020043) +#define RPC_NT_ZERO_DIVIDE __constant_cpu_to_le32(0xC0020044) +#define RPC_NT_ADDRESS_ERROR __constant_cpu_to_le32(0xC0020045) +#define RPC_NT_FP_DIV_ZERO __constant_cpu_to_le32(0xC0020046) +#define RPC_NT_FP_UNDERFLOW __constant_cpu_to_le32(0xC0020047) +#define RPC_NT_FP_OVERFLOW __constant_cpu_to_le32(0xC0020048) +#define RPC_NT_CALL_IN_PROGRESS __constant_cpu_to_le32(0xC0020049) +#define RPC_NT_NO_MORE_BINDINGS __constant_cpu_to_le32(0xC002004A) +#define RPC_NT_GROUP_MEMBER_NOT_FOUND __constant_cpu_to_le32(0xC002004B) +#define EPT_NT_CANT_CREATE __constant_cpu_to_le32(0xC002004C) +#define RPC_NT_INVALID_OBJECT __constant_cpu_to_le32(0xC002004D) +#define RPC_NT_NO_INTERFACES __constant_cpu_to_le32(0xC002004F) +#define RPC_NT_CALL_CANCELLED __constant_cpu_to_le32(0xC0020050) +#define RPC_NT_BINDING_INCOMPLETE __constant_cpu_to_le32(0xC0020051) +#define RPC_NT_COMM_FAILURE __constant_cpu_to_le32(0xC0020052) +#define RPC_NT_UNSUPPORTED_AUTHN_LEVEL __constant_cpu_to_le32(0xC0020053) +#define RPC_NT_NO_PRINC_NAME __constant_cpu_to_le32(0xC0020054) +#define RPC_NT_NOT_RPC_ERROR __constant_cpu_to_le32(0xC0020055) +#define RPC_NT_SEC_PKG_ERROR __constant_cpu_to_le32(0xC0020057) +#define RPC_NT_NOT_CANCELLED __constant_cpu_to_le32(0xC0020058) +#define RPC_NT_INVALID_ASYNC_HANDLE __constant_cpu_to_le32(0xC0020062) +#define RPC_NT_INVALID_ASYNC_CALL __constant_cpu_to_le32(0xC0020063) +#define RPC_NT_PROXY_ACCESS_DENIED __constant_cpu_to_le32(0xC0020064) +#define RPC_NT_NO_MORE_ENTRIES __constant_cpu_to_le32(0xC0030001) +#define RPC_NT_SS_CHAR_TRANS_OPEN_FAIL __constant_cpu_to_le32(0xC0030002) +#define RPC_NT_SS_CHAR_TRANS_SHORT_FILE __constant_cpu_to_le32(0xC0030003) +#define RPC_NT_SS_IN_NULL_CONTEXT __constant_cpu_to_le32(0xC0030004) +#define RPC_NT_SS_CONTEXT_MISMATCH __constant_cpu_to_le32(0xC0030005) +#define RPC_NT_SS_CONTEXT_DAMAGED __constant_cpu_to_le32(0xC0030006) +#define RPC_NT_SS_HANDLES_MISMATCH __constant_cpu_to_le32(0xC0030007) +#define RPC_NT_SS_CANNOT_GET_CALL_HANDLE __constant_cpu_to_le32(0xC0030008) +#define RPC_NT_NULL_REF_POINTER __constant_cpu_to_le32(0xC0030009) +#define RPC_NT_ENUM_VALUE_OUT_OF_RANGE __constant_cpu_to_le32(0xC003000A) +#define RPC_NT_BYTE_COUNT_TOO_SMALL __constant_cpu_to_le32(0xC003000B) +#define RPC_NT_BAD_STUB_DATA __constant_cpu_to_le32(0xC003000C) +#define RPC_NT_INVALID_ES_ACTION __constant_cpu_to_le32(0xC0030059) +#define RPC_NT_WRONG_ES_VERSION __constant_cpu_to_le32(0xC003005A) +#define RPC_NT_WRONG_STUB_VERSION __constant_cpu_to_le32(0xC003005B) +#define RPC_NT_INVALID_PIPE_OBJECT __constant_cpu_to_le32(0xC003005C) +#define RPC_NT_INVALID_PIPE_OPERATION __constant_cpu_to_le32(0xC003005D) +#define RPC_NT_WRONG_PIPE_VERSION __constant_cpu_to_le32(0xC003005E) +#define RPC_NT_PIPE_CLOSED __constant_cpu_to_le32(0xC003005F) +#define RPC_NT_PIPE_DISCIPLINE_ERROR __constant_cpu_to_le32(0xC0030060) +#define RPC_NT_PIPE_EMPTY __constant_cpu_to_le32(0xC0030061) +#define STATUS_PNP_BAD_MPS_TABLE __constant_cpu_to_le32(0xC0040035) +#define STATUS_PNP_TRANSLATION_FAILED __constant_cpu_to_le32(0xC0040036) +#define STATUS_PNP_IRQ_TRANSLATION_FAILED __constant_cpu_to_le32(0xC0040037) +#define STATUS_PNP_INVALID_ID __constant_cpu_to_le32(0xC0040038) +#define STATUS_IO_REISSUE_AS_CACHED __constant_cpu_to_le32(0xC0040039) +#define STATUS_CTX_WINSTATION_NAME_INVALID __constant_cpu_to_le32(0xC00A0001) +#define STATUS_CTX_INVALID_PD __constant_cpu_to_le32(0xC00A0002) +#define STATUS_CTX_PD_NOT_FOUND __constant_cpu_to_le32(0xC00A0003) +#define STATUS_CTX_CLOSE_PENDING __constant_cpu_to_le32(0xC00A0006) +#define STATUS_CTX_NO_OUTBUF __constant_cpu_to_le32(0xC00A0007) +#define STATUS_CTX_MODEM_INF_NOT_FOUND __constant_cpu_to_le32(0xC00A0008) +#define STATUS_CTX_INVALID_MODEMNAME __constant_cpu_to_le32(0xC00A0009) +#define STATUS_CTX_RESPONSE_ERROR __constant_cpu_to_le32(0xC00A000A) +#define STATUS_CTX_MODEM_RESPONSE_TIMEOUT __constant_cpu_to_le32(0xC00A000B) +#define STATUS_CTX_MODEM_RESPONSE_NO_CARRIER __constant_cpu_to_le32(0xC00A000C) +#define STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE __constant_cpu_to_le32(0xC00A000D) +#define STATUS_CTX_MODEM_RESPONSE_BUSY __constant_cpu_to_le32(0xC00A000E) +#define STATUS_CTX_MODEM_RESPONSE_VOICE __constant_cpu_to_le32(0xC00A000F) +#define STATUS_CTX_TD_ERROR __constant_cpu_to_le32(0xC00A0010) +#define STATUS_CTX_LICENSE_CLIENT_INVALID __constant_cpu_to_le32(0xC00A0012) +#define STATUS_CTX_LICENSE_NOT_AVAILABLE __constant_cpu_to_le32(0xC00A0013) +#define STATUS_CTX_LICENSE_EXPIRED __constant_cpu_to_le32(0xC00A0014) +#define STATUS_CTX_WINSTATION_NOT_FOUND __constant_cpu_to_le32(0xC00A0015) +#define STATUS_CTX_WINSTATION_NAME_COLLISION __constant_cpu_to_le32(0xC00A0016) +#define STATUS_CTX_WINSTATION_BUSY __constant_cpu_to_le32(0xC00A0017) +#define STATUS_CTX_BAD_VIDEO_MODE __constant_cpu_to_le32(0xC00A0018) +#define STATUS_CTX_GRAPHICS_INVALID __constant_cpu_to_le32(0xC00A0022) +#define STATUS_CTX_NOT_CONSOLE __constant_cpu_to_le32(0xC00A0024) +#define STATUS_CTX_CLIENT_QUERY_TIMEOUT __constant_cpu_to_le32(0xC00A0026) +#define STATUS_CTX_CONSOLE_DISCONNECT __constant_cpu_to_le32(0xC00A0027) +#define STATUS_CTX_CONSOLE_CONNECT __constant_cpu_to_le32(0xC00A0028) +#define STATUS_CTX_SHADOW_DENIED __constant_cpu_to_le32(0xC00A002A) +#define STATUS_CTX_WINSTATION_ACCESS_DENIED __constant_cpu_to_le32(0xC00A002B) +#define STATUS_CTX_INVALID_WD __constant_cpu_to_le32(0xC00A002E) +#define STATUS_CTX_WD_NOT_FOUND __constant_cpu_to_le32(0xC00A002F) +#define STATUS_CTX_SHADOW_INVALID __constant_cpu_to_le32(0xC00A0030) +#define STATUS_CTX_SHADOW_DISABLED __constant_cpu_to_le32(0xC00A0031) +#define STATUS_RDP_PROTOCOL_ERROR __constant_cpu_to_le32(0xC00A0032) +#define STATUS_CTX_CLIENT_LICENSE_NOT_SET __constant_cpu_to_le32(0xC00A0033) +#define STATUS_CTX_CLIENT_LICENSE_IN_USE __constant_cpu_to_le32(0xC00A0034) +#define STATUS_CTX_SHADOW_ENDED_BY_MODE_CHANGE __constant_cpu_to_le32(0xC00A0035) +#define STATUS_CTX_SHADOW_NOT_RUNNING __constant_cpu_to_le32(0xC00A0036) +#define STATUS_CTX_LOGON_DISABLED __constant_cpu_to_le32(0xC00A0037) +#define STATUS_CTX_SECURITY_LAYER_ERROR __constant_cpu_to_le32(0xC00A0038) +#define STATUS_TS_INCOMPATIBLE_SESSIONS __constant_cpu_to_le32(0xC00A0039) +#define STATUS_MUI_FILE_NOT_FOUND __constant_cpu_to_le32(0xC00B0001) +#define STATUS_MUI_INVALID_FILE __constant_cpu_to_le32(0xC00B0002) +#define STATUS_MUI_INVALID_RC_CONFIG __constant_cpu_to_le32(0xC00B0003) +#define STATUS_MUI_INVALID_LOCALE_NAME __constant_cpu_to_le32(0xC00B0004) +#define STATUS_MUI_INVALID_ULTIMATEFALLBACK_NAME __constant_cpu_to_le32(0xC00B0005) +#define STATUS_MUI_FILE_NOT_LOADED __constant_cpu_to_le32(0xC00B0006) +#define STATUS_RESOURCE_ENUM_USER_STOP __constant_cpu_to_le32(0xC00B0007) +#define STATUS_CLUSTER_INVALID_NODE __constant_cpu_to_le32(0xC0130001) +#define STATUS_CLUSTER_NODE_EXISTS __constant_cpu_to_le32(0xC0130002) +#define STATUS_CLUSTER_JOIN_IN_PROGRESS __constant_cpu_to_le32(0xC0130003) +#define STATUS_CLUSTER_NODE_NOT_FOUND __constant_cpu_to_le32(0xC0130004) +#define STATUS_CLUSTER_LOCAL_NODE_NOT_FOUND __constant_cpu_to_le32(0xC0130005) +#define STATUS_CLUSTER_NETWORK_EXISTS __constant_cpu_to_le32(0xC0130006) +#define STATUS_CLUSTER_NETWORK_NOT_FOUND __constant_cpu_to_le32(0xC0130007) +#define STATUS_CLUSTER_NETINTERFACE_EXISTS __constant_cpu_to_le32(0xC0130008) +#define STATUS_CLUSTER_NETINTERFACE_NOT_FOUND __constant_cpu_to_le32(0xC0130009) +#define STATUS_CLUSTER_INVALID_REQUEST __constant_cpu_to_le32(0xC013000A) +#define STATUS_CLUSTER_INVALID_NETWORK_PROVIDER __constant_cpu_to_le32(0xC013000B) +#define STATUS_CLUSTER_NODE_DOWN __constant_cpu_to_le32(0xC013000C) +#define STATUS_CLUSTER_NODE_UNREACHABLE __constant_cpu_to_le32(0xC013000D) +#define STATUS_CLUSTER_NODE_NOT_MEMBER __constant_cpu_to_le32(0xC013000E) +#define STATUS_CLUSTER_JOIN_NOT_IN_PROGRESS __constant_cpu_to_le32(0xC013000F) +#define STATUS_CLUSTER_INVALID_NETWORK __constant_cpu_to_le32(0xC0130010) +#define STATUS_CLUSTER_NO_NET_ADAPTERS __constant_cpu_to_le32(0xC0130011) +#define STATUS_CLUSTER_NODE_UP __constant_cpu_to_le32(0xC0130012) +#define STATUS_CLUSTER_NODE_PAUSED __constant_cpu_to_le32(0xC0130013) +#define STATUS_CLUSTER_NODE_NOT_PAUSED __constant_cpu_to_le32(0xC0130014) +#define STATUS_CLUSTER_NO_SECURITY_CONTEXT __constant_cpu_to_le32(0xC0130015) +#define STATUS_CLUSTER_NETWORK_NOT_INTERNAL __constant_cpu_to_le32(0xC0130016) +#define STATUS_CLUSTER_POISONED __constant_cpu_to_le32(0xC0130017) +#define STATUS_ACPI_INVALID_OPCODE __constant_cpu_to_le32(0xC0140001) +#define STATUS_ACPI_STACK_OVERFLOW __constant_cpu_to_le32(0xC0140002) +#define STATUS_ACPI_ASSERT_FAILED __constant_cpu_to_le32(0xC0140003) +#define STATUS_ACPI_INVALID_INDEX __constant_cpu_to_le32(0xC0140004) +#define STATUS_ACPI_INVALID_ARGUMENT __constant_cpu_to_le32(0xC0140005) +#define STATUS_ACPI_FATAL __constant_cpu_to_le32(0xC0140006) +#define STATUS_ACPI_INVALID_SUPERNAME __constant_cpu_to_le32(0xC0140007) +#define STATUS_ACPI_INVALID_ARGTYPE __constant_cpu_to_le32(0xC0140008) +#define STATUS_ACPI_INVALID_OBJTYPE __constant_cpu_to_le32(0xC0140009) +#define STATUS_ACPI_INVALID_TARGETTYPE __constant_cpu_to_le32(0xC014000A) +#define STATUS_ACPI_INCORRECT_ARGUMENT_COUNT __constant_cpu_to_le32(0xC014000B) +#define STATUS_ACPI_ADDRESS_NOT_MAPPED __constant_cpu_to_le32(0xC014000C) +#define STATUS_ACPI_INVALID_EVENTTYPE __constant_cpu_to_le32(0xC014000D) +#define STATUS_ACPI_HANDLER_COLLISION __constant_cpu_to_le32(0xC014000E) +#define STATUS_ACPI_INVALID_DATA __constant_cpu_to_le32(0xC014000F) +#define STATUS_ACPI_INVALID_REGION __constant_cpu_to_le32(0xC0140010) +#define STATUS_ACPI_INVALID_ACCESS_SIZE __constant_cpu_to_le32(0xC0140011) +#define STATUS_ACPI_ACQUIRE_GLOBAL_LOCK __constant_cpu_to_le32(0xC0140012) +#define STATUS_ACPI_ALREADY_INITIALIZED __constant_cpu_to_le32(0xC0140013) +#define STATUS_ACPI_NOT_INITIALIZED __constant_cpu_to_le32(0xC0140014) +#define STATUS_ACPI_INVALID_MUTEX_LEVEL __constant_cpu_to_le32(0xC0140015) +#define STATUS_ACPI_MUTEX_NOT_OWNED __constant_cpu_to_le32(0xC0140016) +#define STATUS_ACPI_MUTEX_NOT_OWNER __constant_cpu_to_le32(0xC0140017) +#define STATUS_ACPI_RS_ACCESS __constant_cpu_to_le32(0xC0140018) +#define STATUS_ACPI_INVALID_TABLE __constant_cpu_to_le32(0xC0140019) +#define STATUS_ACPI_REG_HANDLER_FAILED __constant_cpu_to_le32(0xC0140020) +#define STATUS_ACPI_POWER_REQUEST_FAILED __constant_cpu_to_le32(0xC0140021) +#define STATUS_SXS_SECTION_NOT_FOUND __constant_cpu_to_le32(0xC0150001) +#define STATUS_SXS_CANT_GEN_ACTCTX __constant_cpu_to_le32(0xC0150002) +#define STATUS_SXS_INVALID_ACTCTXDATA_FORMAT __constant_cpu_to_le32(0xC0150003) +#define STATUS_SXS_ASSEMBLY_NOT_FOUND __constant_cpu_to_le32(0xC0150004) +#define STATUS_SXS_MANIFEST_FORMAT_ERROR __constant_cpu_to_le32(0xC0150005) +#define STATUS_SXS_MANIFEST_PARSE_ERROR __constant_cpu_to_le32(0xC0150006) +#define STATUS_SXS_ACTIVATION_CONTEXT_DISABLED __constant_cpu_to_le32(0xC0150007) +#define STATUS_SXS_KEY_NOT_FOUND __constant_cpu_to_le32(0xC0150008) +#define STATUS_SXS_VERSION_CONFLICT __constant_cpu_to_le32(0xC0150009) +#define STATUS_SXS_WRONG_SECTION_TYPE __constant_cpu_to_le32(0xC015000A) +#define STATUS_SXS_THREAD_QUERIES_DISABLED __constant_cpu_to_le32(0xC015000B) +#define STATUS_SXS_ASSEMBLY_MISSING __constant_cpu_to_le32(0xC015000C) +#define STATUS_SXS_PROCESS_DEFAULT_ALREADY_SET __constant_cpu_to_le32(0xC015000E) +#define STATUS_SXS_EARLY_DEACTIVATION __constant_cpu_to_le32(0xC015000F) +#define STATUS_SXS_INVALID_DEACTIVATION __constant_cpu_to_le32(0xC0150010) +#define STATUS_SXS_MULTIPLE_DEACTIVATION __constant_cpu_to_le32(0xC0150011) +#define STATUS_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY __constant_cpu_to_le32(0xC0150012) +#define STATUS_SXS_PROCESS_TERMINATION_REQUESTED __constant_cpu_to_le32(0xC0150013) +#define STATUS_SXS_CORRUPT_ACTIVATION_STACK __constant_cpu_to_le32(0xC0150014) +#define STATUS_SXS_CORRUPTION __constant_cpu_to_le32(0xC0150015) +#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE __constant_cpu_to_le32(0xC0150016) +#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME __constant_cpu_to_le32(0xC0150017) +#define STATUS_SXS_IDENTITY_DUPLICATE_ATTRIBUTE __constant_cpu_to_le32(0xC0150018) +#define STATUS_SXS_IDENTITY_PARSE_ERROR __constant_cpu_to_le32(0xC0150019) +#define STATUS_SXS_COMPONENT_STORE_CORRUPT __constant_cpu_to_le32(0xC015001A) +#define STATUS_SXS_FILE_HASH_MISMATCH __constant_cpu_to_le32(0xC015001B) +#define STATUS_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT __constant_cpu_to_le32(0xC015001C) +#define STATUS_SXS_IDENTITIES_DIFFERENT __constant_cpu_to_le32(0xC015001D) +#define STATUS_SXS_ASSEMBLY_IS_NOT_A_DEPLOYMENT __constant_cpu_to_le32(0xC015001E) +#define STATUS_SXS_FILE_NOT_PART_OF_ASSEMBLY __constant_cpu_to_le32(0xC015001F) +#define STATUS_ADVANCED_INSTALLER_FAILED __constant_cpu_to_le32(0xC0150020) +#define STATUS_XML_ENCODING_MISMATCH __constant_cpu_to_le32(0xC0150021) +#define STATUS_SXS_MANIFEST_TOO_BIG __constant_cpu_to_le32(0xC0150022) +#define STATUS_SXS_SETTING_NOT_REGISTERED __constant_cpu_to_le32(0xC0150023) +#define STATUS_SXS_TRANSACTION_CLOSURE_INCOMPLETE __constant_cpu_to_le32(0xC0150024) +#define STATUS_SMI_PRIMITIVE_INSTALLER_FAILED __constant_cpu_to_le32(0xC0150025) +#define STATUS_GENERIC_COMMAND_FAILED __constant_cpu_to_le32(0xC0150026) +#define STATUS_SXS_FILE_HASH_MISSING __constant_cpu_to_le32(0xC0150027) +#define STATUS_TRANSACTIONAL_CONFLICT __constant_cpu_to_le32(0xC0190001) +#define STATUS_INVALID_TRANSACTION __constant_cpu_to_le32(0xC0190002) +#define STATUS_TRANSACTION_NOT_ACTIVE __constant_cpu_to_le32(0xC0190003) +#define STATUS_TM_INITIALIZATION_FAILED __constant_cpu_to_le32(0xC0190004) +#define STATUS_RM_NOT_ACTIVE __constant_cpu_to_le32(0xC0190005) +#define STATUS_RM_METADATA_CORRUPT __constant_cpu_to_le32(0xC0190006) +#define STATUS_TRANSACTION_NOT_JOINED __constant_cpu_to_le32(0xC0190007) +#define STATUS_DIRECTORY_NOT_RM __constant_cpu_to_le32(0xC0190008) +#define STATUS_TRANSACTIONS_UNSUPPORTED_REMOTE __constant_cpu_to_le32(0xC019000A) +#define STATUS_LOG_RESIZE_INVALID_SIZE __constant_cpu_to_le32(0xC019000B) +#define STATUS_REMOTE_FILE_VERSION_MISMATCH __constant_cpu_to_le32(0xC019000C) +#define STATUS_CRM_PROTOCOL_ALREADY_EXISTS __constant_cpu_to_le32(0xC019000F) +#define STATUS_TRANSACTION_PROPAGATION_FAILED __constant_cpu_to_le32(0xC0190010) +#define STATUS_CRM_PROTOCOL_NOT_FOUND __constant_cpu_to_le32(0xC0190011) +#define STATUS_TRANSACTION_SUPERIOR_EXISTS __constant_cpu_to_le32(0xC0190012) +#define STATUS_TRANSACTION_REQUEST_NOT_VALID __constant_cpu_to_le32(0xC0190013) +#define STATUS_TRANSACTION_NOT_REQUESTED __constant_cpu_to_le32(0xC0190014) +#define STATUS_TRANSACTION_ALREADY_ABORTED __constant_cpu_to_le32(0xC0190015) +#define STATUS_TRANSACTION_ALREADY_COMMITTED __constant_cpu_to_le32(0xC0190016) +#define STATUS_TRANSACTION_INVALID_MARSHALL_BUFFER __constant_cpu_to_le32(0xC0190017) +#define STATUS_CURRENT_TRANSACTION_NOT_VALID __constant_cpu_to_le32(0xC0190018) +#define STATUS_LOG_GROWTH_FAILED __constant_cpu_to_le32(0xC0190019) +#define STATUS_OBJECT_NO_LONGER_EXISTS __constant_cpu_to_le32(0xC0190021) +#define STATUS_STREAM_MINIVERSION_NOT_FOUND __constant_cpu_to_le32(0xC0190022) +#define STATUS_STREAM_MINIVERSION_NOT_VALID __constant_cpu_to_le32(0xC0190023) +#define STATUS_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION __constant_cpu_to_le32(0xC0190024) +#define STATUS_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT __constant_cpu_to_le32(0xC0190025) +#define STATUS_CANT_CREATE_MORE_STREAM_MINIVERSIONS __constant_cpu_to_le32(0xC0190026) +#define STATUS_HANDLE_NO_LONGER_VALID __constant_cpu_to_le32(0xC0190028) +#define STATUS_LOG_CORRUPTION_DETECTED __constant_cpu_to_le32(0xC0190030) +#define STATUS_RM_DISCONNECTED __constant_cpu_to_le32(0xC0190032) +#define STATUS_ENLISTMENT_NOT_SUPERIOR __constant_cpu_to_le32(0xC0190033) +#define STATUS_FILE_IDENTITY_NOT_PERSISTENT __constant_cpu_to_le32(0xC0190036) +#define STATUS_CANT_BREAK_TRANSACTIONAL_DEPENDENCY __constant_cpu_to_le32(0xC0190037) +#define STATUS_CANT_CROSS_RM_BOUNDARY __constant_cpu_to_le32(0xC0190038) +#define STATUS_TXF_DIR_NOT_EMPTY __constant_cpu_to_le32(0xC0190039) +#define STATUS_INDOUBT_TRANSACTIONS_EXIST __constant_cpu_to_le32(0xC019003A) +#define STATUS_TM_VOLATILE __constant_cpu_to_le32(0xC019003B) +#define STATUS_ROLLBACK_TIMER_EXPIRED __constant_cpu_to_le32(0xC019003C) +#define STATUS_TXF_ATTRIBUTE_CORRUPT __constant_cpu_to_le32(0xC019003D) +#define STATUS_EFS_NOT_ALLOWED_IN_TRANSACTION __constant_cpu_to_le32(0xC019003E) +#define STATUS_TRANSACTIONAL_OPEN_NOT_ALLOWED __constant_cpu_to_le32(0xC019003F) +#define STATUS_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE __constant_cpu_to_le32(0xC0190040) +#define STATUS_TRANSACTION_REQUIRED_PROMOTION __constant_cpu_to_le32(0xC0190043) +#define STATUS_CANNOT_EXECUTE_FILE_IN_TRANSACTION __constant_cpu_to_le32(0xC0190044) +#define STATUS_TRANSACTIONS_NOT_FROZEN __constant_cpu_to_le32(0xC0190045) +#define STATUS_TRANSACTION_FREEZE_IN_PROGRESS __constant_cpu_to_le32(0xC0190046) +#define STATUS_NOT_SNAPSHOT_VOLUME __constant_cpu_to_le32(0xC0190047) +#define STATUS_NO_SAVEPOINT_WITH_OPEN_FILES __constant_cpu_to_le32(0xC0190048) +#define STATUS_SPARSE_NOT_ALLOWED_IN_TRANSACTION __constant_cpu_to_le32(0xC0190049) +#define STATUS_TM_IDENTITY_MISMATCH __constant_cpu_to_le32(0xC019004A) +#define STATUS_FLOATED_SECTION __constant_cpu_to_le32(0xC019004B) +#define STATUS_CANNOT_ACCEPT_TRANSACTED_WORK __constant_cpu_to_le32(0xC019004C) +#define STATUS_CANNOT_ABORT_TRANSACTIONS __constant_cpu_to_le32(0xC019004D) +#define STATUS_TRANSACTION_NOT_FOUND __constant_cpu_to_le32(0xC019004E) +#define STATUS_RESOURCEMANAGER_NOT_FOUND __constant_cpu_to_le32(0xC019004F) +#define STATUS_ENLISTMENT_NOT_FOUND __constant_cpu_to_le32(0xC0190050) +#define STATUS_TRANSACTIONMANAGER_NOT_FOUND __constant_cpu_to_le32(0xC0190051) +#define STATUS_TRANSACTIONMANAGER_NOT_ONLINE __constant_cpu_to_le32(0xC0190052) +#define STATUS_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION __constant_cpu_to_le32(0xC0190053) +#define STATUS_TRANSACTION_NOT_ROOT __constant_cpu_to_le32(0xC0190054) +#define STATUS_TRANSACTION_OBJECT_EXPIRED __constant_cpu_to_le32(0xC0190055) +#define STATUS_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION __constant_cpu_to_le32(0xC0190056) +#define STATUS_TRANSACTION_RESPONSE_NOT_ENLISTED __constant_cpu_to_le32(0xC0190057) +#define STATUS_TRANSACTION_RECORD_TOO_LONG __constant_cpu_to_le32(0xC0190058) +#define STATUS_NO_LINK_TRACKING_IN_TRANSACTION __constant_cpu_to_le32(0xC0190059) +#define STATUS_OPERATION_NOT_SUPPORTED_IN_TRANSACTION __constant_cpu_to_le32(0xC019005A) +#define STATUS_TRANSACTION_INTEGRITY_VIOLATED __constant_cpu_to_le32(0xC019005B) +#define STATUS_LOG_SECTOR_INVALID __constant_cpu_to_le32(0xC01A0001) +#define STATUS_LOG_SECTOR_PARITY_INVALID __constant_cpu_to_le32(0xC01A0002) +#define STATUS_LOG_SECTOR_REMAPPED __constant_cpu_to_le32(0xC01A0003) +#define STATUS_LOG_BLOCK_INCOMPLETE __constant_cpu_to_le32(0xC01A0004) +#define STATUS_LOG_INVALID_RANGE __constant_cpu_to_le32(0xC01A0005) +#define STATUS_LOG_BLOCKS_EXHAUSTED __constant_cpu_to_le32(0xC01A0006) +#define STATUS_LOG_READ_CONTEXT_INVALID __constant_cpu_to_le32(0xC01A0007) +#define STATUS_LOG_RESTART_INVALID __constant_cpu_to_le32(0xC01A0008) +#define STATUS_LOG_BLOCK_VERSION __constant_cpu_to_le32(0xC01A0009) +#define STATUS_LOG_BLOCK_INVALID __constant_cpu_to_le32(0xC01A000A) +#define STATUS_LOG_READ_MODE_INVALID __constant_cpu_to_le32(0xC01A000B) +#define STATUS_LOG_METADATA_CORRUPT __constant_cpu_to_le32(0xC01A000D) +#define STATUS_LOG_METADATA_INVALID __constant_cpu_to_le32(0xC01A000E) +#define STATUS_LOG_METADATA_INCONSISTENT __constant_cpu_to_le32(0xC01A000F) +#define STATUS_LOG_RESERVATION_INVALID __constant_cpu_to_le32(0xC01A0010) +#define STATUS_LOG_CANT_DELETE __constant_cpu_to_le32(0xC01A0011) +#define STATUS_LOG_CONTAINER_LIMIT_EXCEEDED __constant_cpu_to_le32(0xC01A0012) +#define STATUS_LOG_START_OF_LOG __constant_cpu_to_le32(0xC01A0013) +#define STATUS_LOG_POLICY_ALREADY_INSTALLED __constant_cpu_to_le32(0xC01A0014) +#define STATUS_LOG_POLICY_NOT_INSTALLED __constant_cpu_to_le32(0xC01A0015) +#define STATUS_LOG_POLICY_INVALID __constant_cpu_to_le32(0xC01A0016) +#define STATUS_LOG_POLICY_CONFLICT __constant_cpu_to_le32(0xC01A0017) +#define STATUS_LOG_PINNED_ARCHIVE_TAIL __constant_cpu_to_le32(0xC01A0018) +#define STATUS_LOG_RECORD_NONEXISTENT __constant_cpu_to_le32(0xC01A0019) +#define STATUS_LOG_RECORDS_RESERVED_INVALID __constant_cpu_to_le32(0xC01A001A) +#define STATUS_LOG_SPACE_RESERVED_INVALID __constant_cpu_to_le32(0xC01A001B) +#define STATUS_LOG_TAIL_INVALID __constant_cpu_to_le32(0xC01A001C) +#define STATUS_LOG_FULL __constant_cpu_to_le32(0xC01A001D) +#define STATUS_LOG_MULTIPLEXED __constant_cpu_to_le32(0xC01A001E) +#define STATUS_LOG_DEDICATED __constant_cpu_to_le32(0xC01A001F) +#define STATUS_LOG_ARCHIVE_NOT_IN_PROGRESS __constant_cpu_to_le32(0xC01A0020) +#define STATUS_LOG_ARCHIVE_IN_PROGRESS __constant_cpu_to_le32(0xC01A0021) +#define STATUS_LOG_EPHEMERAL __constant_cpu_to_le32(0xC01A0022) +#define STATUS_LOG_NOT_ENOUGH_CONTAINERS __constant_cpu_to_le32(0xC01A0023) +#define STATUS_LOG_CLIENT_ALREADY_REGISTERED __constant_cpu_to_le32(0xC01A0024) +#define STATUS_LOG_CLIENT_NOT_REGISTERED __constant_cpu_to_le32(0xC01A0025) +#define STATUS_LOG_FULL_HANDLER_IN_PROGRESS __constant_cpu_to_le32(0xC01A0026) +#define STATUS_LOG_CONTAINER_READ_FAILED __constant_cpu_to_le32(0xC01A0027) +#define STATUS_LOG_CONTAINER_WRITE_FAILED __constant_cpu_to_le32(0xC01A0028) +#define STATUS_LOG_CONTAINER_OPEN_FAILED __constant_cpu_to_le32(0xC01A0029) +#define STATUS_LOG_CONTAINER_STATE_INVALID __constant_cpu_to_le32(0xC01A002A) +#define STATUS_LOG_STATE_INVALID __constant_cpu_to_le32(0xC01A002B) +#define STATUS_LOG_PINNED __constant_cpu_to_le32(0xC01A002C) +#define STATUS_LOG_METADATA_FLUSH_FAILED __constant_cpu_to_le32(0xC01A002D) +#define STATUS_LOG_INCONSISTENT_SECURITY __constant_cpu_to_le32(0xC01A002E) +#define STATUS_LOG_APPENDED_FLUSH_FAILED __constant_cpu_to_le32(0xC01A002F) +#define STATUS_LOG_PINNED_RESERVATION __constant_cpu_to_le32(0xC01A0030) +#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD __constant_cpu_to_le32(0xC01B00EA) +#define STATUS_FLT_NO_HANDLER_DEFINED __constant_cpu_to_le32(0xC01C0001) +#define STATUS_FLT_CONTEXT_ALREADY_DEFINED __constant_cpu_to_le32(0xC01C0002) +#define STATUS_FLT_INVALID_ASYNCHRONOUS_REQUEST __constant_cpu_to_le32(0xC01C0003) +#define STATUS_FLT_DISALLOW_FAST_IO __constant_cpu_to_le32(0xC01C0004) +#define STATUS_FLT_INVALID_NAME_REQUEST __constant_cpu_to_le32(0xC01C0005) +#define STATUS_FLT_NOT_SAFE_TO_POST_OPERATION __constant_cpu_to_le32(0xC01C0006) +#define STATUS_FLT_NOT_INITIALIZED __constant_cpu_to_le32(0xC01C0007) +#define STATUS_FLT_FILTER_NOT_READY __constant_cpu_to_le32(0xC01C0008) +#define STATUS_FLT_POST_OPERATION_CLEANUP __constant_cpu_to_le32(0xC01C0009) +#define STATUS_FLT_INTERNAL_ERROR __constant_cpu_to_le32(0xC01C000A) +#define STATUS_FLT_DELETING_OBJECT __constant_cpu_to_le32(0xC01C000B) +#define STATUS_FLT_MUST_BE_NONPAGED_POOL __constant_cpu_to_le32(0xC01C000C) +#define STATUS_FLT_DUPLICATE_ENTRY __constant_cpu_to_le32(0xC01C000D) +#define STATUS_FLT_CBDQ_DISABLED __constant_cpu_to_le32(0xC01C000E) +#define STATUS_FLT_DO_NOT_ATTACH __constant_cpu_to_le32(0xC01C000F) +#define STATUS_FLT_DO_NOT_DETACH __constant_cpu_to_le32(0xC01C0010) +#define STATUS_FLT_INSTANCE_ALTITUDE_COLLISION __constant_cpu_to_le32(0xC01C0011) +#define STATUS_FLT_INSTANCE_NAME_COLLISION __constant_cpu_to_le32(0xC01C0012) +#define STATUS_FLT_FILTER_NOT_FOUND __constant_cpu_to_le32(0xC01C0013) +#define STATUS_FLT_VOLUME_NOT_FOUND __constant_cpu_to_le32(0xC01C0014) +#define STATUS_FLT_INSTANCE_NOT_FOUND __constant_cpu_to_le32(0xC01C0015) +#define STATUS_FLT_CONTEXT_ALLOCATION_NOT_FOUND __constant_cpu_to_le32(0xC01C0016) +#define STATUS_FLT_INVALID_CONTEXT_REGISTRATION __constant_cpu_to_le32(0xC01C0017) +#define STATUS_FLT_NAME_CACHE_MISS __constant_cpu_to_le32(0xC01C0018) +#define STATUS_FLT_NO_DEVICE_OBJECT __constant_cpu_to_le32(0xC01C0019) +#define STATUS_FLT_VOLUME_ALREADY_MOUNTED __constant_cpu_to_le32(0xC01C001A) +#define STATUS_FLT_ALREADY_ENLISTED __constant_cpu_to_le32(0xC01C001B) +#define STATUS_FLT_CONTEXT_ALREADY_LINKED __constant_cpu_to_le32(0xC01C001C) +#define STATUS_FLT_NO_WAITER_FOR_REPLY __constant_cpu_to_le32(0xC01C0020) +#define STATUS_MONITOR_NO_DESCRIPTOR __constant_cpu_to_le32(0xC01D0001) +#define STATUS_MONITOR_UNKNOWN_DESCRIPTOR_FORMAT __constant_cpu_to_le32(0xC01D0002) +#define STATUS_MONITOR_INVALID_DESCRIPTOR_CHECKSUM __constant_cpu_to_le32(0xC01D0003) +#define STATUS_MONITOR_INVALID_STANDARD_TIMING_BLOCK __constant_cpu_to_le32(0xC01D0004) +#define STATUS_MONITOR_WMI_DATABLOCK_REGISTRATION_FAILED __constant_cpu_to_le32(0xC01D0005) +#define STATUS_MONITOR_INVALID_SERIAL_NUMBER_MONDSC_BLOCK __constant_cpu_to_le32(0xC01D0006) +#define STATUS_MONITOR_INVALID_USER_FRIENDLY_MONDSC_BLOCK __constant_cpu_to_le32(0xC01D0007) +#define STATUS_MONITOR_NO_MORE_DESCRIPTOR_DATA __constant_cpu_to_le32(0xC01D0008) +#define STATUS_MONITOR_INVALID_DETAILED_TIMING_BLOCK __constant_cpu_to_le32(0xC01D0009) +#define STATUS_GRAPHICS_NOT_EXCLUSIVE_MODE_OWNER __constant_cpu_to_le32(0xC01E0000) +#define STATUS_GRAPHICS_INSUFFICIENT_DMA_BUFFER __constant_cpu_to_le32(0xC01E0001) +#define STATUS_GRAPHICS_INVALID_DISPLAY_ADAPTER __constant_cpu_to_le32(0xC01E0002) +#define STATUS_GRAPHICS_ADAPTER_WAS_RESET __constant_cpu_to_le32(0xC01E0003) +#define STATUS_GRAPHICS_INVALID_DRIVER_MODEL __constant_cpu_to_le32(0xC01E0004) +#define STATUS_GRAPHICS_PRESENT_MODE_CHANGED __constant_cpu_to_le32(0xC01E0005) +#define STATUS_GRAPHICS_PRESENT_OCCLUDED __constant_cpu_to_le32(0xC01E0006) +#define STATUS_GRAPHICS_PRESENT_DENIED __constant_cpu_to_le32(0xC01E0007) +#define STATUS_GRAPHICS_CANNOTCOLORCONVERT __constant_cpu_to_le32(0xC01E0008) +#define STATUS_GRAPHICS_NO_VIDEO_MEMORY __constant_cpu_to_le32(0xC01E0100) +#define STATUS_GRAPHICS_CANT_LOCK_MEMORY __constant_cpu_to_le32(0xC01E0101) +#define STATUS_GRAPHICS_ALLOCATION_BUSY __constant_cpu_to_le32(0xC01E0102) +#define STATUS_GRAPHICS_TOO_MANY_REFERENCES __constant_cpu_to_le32(0xC01E0103) +#define STATUS_GRAPHICS_TRY_AGAIN_LATER __constant_cpu_to_le32(0xC01E0104) +#define STATUS_GRAPHICS_TRY_AGAIN_NOW __constant_cpu_to_le32(0xC01E0105) +#define STATUS_GRAPHICS_ALLOCATION_INVALID __constant_cpu_to_le32(0xC01E0106) +#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNAVAILABLE __constant_cpu_to_le32(0xC01E0107) +#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNSUPPORTED __constant_cpu_to_le32(0xC01E0108) +#define STATUS_GRAPHICS_CANT_EVICT_PINNED_ALLOCATION __constant_cpu_to_le32(0xC01E0109) +#define STATUS_GRAPHICS_INVALID_ALLOCATION_USAGE __constant_cpu_to_le32(0xC01E0110) +#define STATUS_GRAPHICS_CANT_RENDER_LOCKED_ALLOCATION __constant_cpu_to_le32(0xC01E0111) +#define STATUS_GRAPHICS_ALLOCATION_CLOSED __constant_cpu_to_le32(0xC01E0112) +#define STATUS_GRAPHICS_INVALID_ALLOCATION_INSTANCE __constant_cpu_to_le32(0xC01E0113) +#define STATUS_GRAPHICS_INVALID_ALLOCATION_HANDLE __constant_cpu_to_le32(0xC01E0114) +#define STATUS_GRAPHICS_WRONG_ALLOCATION_DEVICE __constant_cpu_to_le32(0xC01E0115) +#define STATUS_GRAPHICS_ALLOCATION_CONTENT_LOST __constant_cpu_to_le32(0xC01E0116) +#define STATUS_GRAPHICS_GPU_EXCEPTION_ON_DEVICE __constant_cpu_to_le32(0xC01E0200) +#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY __constant_cpu_to_le32(0xC01E0300) +#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0301) +#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_CURRENTLY_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0302) +#define STATUS_GRAPHICS_INVALID_VIDPN __constant_cpu_to_le32(0xC01E0303) +#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE __constant_cpu_to_le32(0xC01E0304) +#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET __constant_cpu_to_le32(0xC01E0305) +#define STATUS_GRAPHICS_VIDPN_MODALITY_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0306) +#define STATUS_GRAPHICS_INVALID_VIDPN_SOURCEMODESET __constant_cpu_to_le32(0xC01E0308) +#define STATUS_GRAPHICS_INVALID_VIDPN_TARGETMODESET __constant_cpu_to_le32(0xC01E0309) +#define STATUS_GRAPHICS_INVALID_FREQUENCY __constant_cpu_to_le32(0xC01E030A) +#define STATUS_GRAPHICS_INVALID_ACTIVE_REGION __constant_cpu_to_le32(0xC01E030B) +#define STATUS_GRAPHICS_INVALID_TOTAL_REGION __constant_cpu_to_le32(0xC01E030C) +#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE_MODE __constant_cpu_to_le32(0xC01E0310) +#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET_MODE __constant_cpu_to_le32(0xC01E0311) +#define STATUS_GRAPHICS_PINNED_MODE_MUST_REMAIN_IN_SET __constant_cpu_to_le32(0xC01E0312) +#define STATUS_GRAPHICS_PATH_ALREADY_IN_TOPOLOGY __constant_cpu_to_le32(0xC01E0313) +#define STATUS_GRAPHICS_MODE_ALREADY_IN_MODESET __constant_cpu_to_le32(0xC01E0314) +#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTSOURCESET __constant_cpu_to_le32(0xC01E0315) +#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTTARGETSET __constant_cpu_to_le32(0xC01E0316) +#define STATUS_GRAPHICS_SOURCE_ALREADY_IN_SET __constant_cpu_to_le32(0xC01E0317) +#define STATUS_GRAPHICS_TARGET_ALREADY_IN_SET __constant_cpu_to_le32(0xC01E0318) +#define STATUS_GRAPHICS_INVALID_VIDPN_PRESENT_PATH __constant_cpu_to_le32(0xC01E0319) +#define STATUS_GRAPHICS_NO_RECOMMENDED_VIDPN_TOPOLOGY __constant_cpu_to_le32(0xC01E031A) +#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGESET __constant_cpu_to_le32(0xC01E031B) +#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE __constant_cpu_to_le32(0xC01E031C) +#define STATUS_GRAPHICS_FREQUENCYRANGE_NOT_IN_SET __constant_cpu_to_le32(0xC01E031D) +#define STATUS_GRAPHICS_FREQUENCYRANGE_ALREADY_IN_SET __constant_cpu_to_le32(0xC01E031F) +#define STATUS_GRAPHICS_STALE_MODESET __constant_cpu_to_le32(0xC01E0320) +#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCEMODESET __constant_cpu_to_le32(0xC01E0321) +#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCE_MODE __constant_cpu_to_le32(0xC01E0322) +#define STATUS_GRAPHICS_NO_RECOMMENDED_FUNCTIONAL_VIDPN __constant_cpu_to_le32(0xC01E0323) +#define STATUS_GRAPHICS_MODE_ID_MUST_BE_UNIQUE __constant_cpu_to_le32(0xC01E0324) +#define STATUS_GRAPHICS_EMPTY_ADAPTER_MONITOR_MODE_SUPPORT_INTERSECTION __constant_cpu_to_le32(0xC01E0325) +#define STATUS_GRAPHICS_VIDEO_PRESENT_TARGETS_LESS_THAN_SOURCES __constant_cpu_to_le32(0xC01E0326) +#define STATUS_GRAPHICS_PATH_NOT_IN_TOPOLOGY __constant_cpu_to_le32(0xC01E0327) +#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_SOURCE __constant_cpu_to_le32(0xC01E0328) +#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_TARGET __constant_cpu_to_le32(0xC01E0329) +#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTORSET __constant_cpu_to_le32(0xC01E032A) +#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTOR __constant_cpu_to_le32(0xC01E032B) +#define STATUS_GRAPHICS_MONITORDESCRIPTOR_NOT_IN_SET __constant_cpu_to_le32(0xC01E032C) +#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ALREADY_IN_SET __constant_cpu_to_le32(0xC01E032D) +#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ID_MUST_BE_UNIQUE __constant_cpu_to_le32(0xC01E032E) +#define STATUS_GRAPHICS_INVALID_VIDPN_TARGET_SUBSET_TYPE __constant_cpu_to_le32(0xC01E032F) +#define STATUS_GRAPHICS_RESOURCES_NOT_RELATED __constant_cpu_to_le32(0xC01E0330) +#define STATUS_GRAPHICS_SOURCE_ID_MUST_BE_UNIQUE __constant_cpu_to_le32(0xC01E0331) +#define STATUS_GRAPHICS_TARGET_ID_MUST_BE_UNIQUE __constant_cpu_to_le32(0xC01E0332) +#define STATUS_GRAPHICS_NO_AVAILABLE_VIDPN_TARGET __constant_cpu_to_le32(0xC01E0333) +#define STATUS_GRAPHICS_MONITOR_COULD_NOT_BE_ASSOCIATED_WITH_ADAPTER __constant_cpu_to_le32(0xC01E0334) +#define STATUS_GRAPHICS_NO_VIDPNMGR __constant_cpu_to_le32(0xC01E0335) +#define STATUS_GRAPHICS_NO_ACTIVE_VIDPN __constant_cpu_to_le32(0xC01E0336) +#define STATUS_GRAPHICS_STALE_VIDPN_TOPOLOGY __constant_cpu_to_le32(0xC01E0337) +#define STATUS_GRAPHICS_MONITOR_NOT_CONNECTED __constant_cpu_to_le32(0xC01E0338) +#define STATUS_GRAPHICS_SOURCE_NOT_IN_TOPOLOGY __constant_cpu_to_le32(0xC01E0339) +#define STATUS_GRAPHICS_INVALID_PRIMARYSURFACE_SIZE __constant_cpu_to_le32(0xC01E033A) +#define STATUS_GRAPHICS_INVALID_VISIBLEREGION_SIZE __constant_cpu_to_le32(0xC01E033B) +#define STATUS_GRAPHICS_INVALID_STRIDE __constant_cpu_to_le32(0xC01E033C) +#define STATUS_GRAPHICS_INVALID_PIXELFORMAT __constant_cpu_to_le32(0xC01E033D) +#define STATUS_GRAPHICS_INVALID_COLORBASIS __constant_cpu_to_le32(0xC01E033E) +#define STATUS_GRAPHICS_INVALID_PIXELVALUEACCESSMODE __constant_cpu_to_le32(0xC01E033F) +#define STATUS_GRAPHICS_TARGET_NOT_IN_TOPOLOGY __constant_cpu_to_le32(0xC01E0340) +#define STATUS_GRAPHICS_NO_DISPLAY_MODE_MANAGEMENT_SUPPORT __constant_cpu_to_le32(0xC01E0341) +#define STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE __constant_cpu_to_le32(0xC01E0342) +#define STATUS_GRAPHICS_CANT_ACCESS_ACTIVE_VIDPN __constant_cpu_to_le32(0xC01E0343) +#define STATUS_GRAPHICS_INVALID_PATH_IMPORTANCE_ORDINAL __constant_cpu_to_le32(0xC01E0344) +#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_GEOMETRY_TRANSFORMATION __constant_cpu_to_le32(0xC01E0345) +#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0346) +#define STATUS_GRAPHICS_INVALID_GAMMA_RAMP __constant_cpu_to_le32(0xC01E0347) +#define STATUS_GRAPHICS_GAMMA_RAMP_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0348) +#define STATUS_GRAPHICS_MULTISAMPLING_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0349) +#define STATUS_GRAPHICS_MODE_NOT_IN_MODESET __constant_cpu_to_le32(0xC01E034A) +#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY_RECOMMENDATION_REASON __constant_cpu_to_le32(0xC01E034D) +#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_TYPE __constant_cpu_to_le32(0xC01E034E) +#define STATUS_GRAPHICS_INVALID_COPYPROTECTION_TYPE __constant_cpu_to_le32(0xC01E034F) +#define STATUS_GRAPHICS_UNASSIGNED_MODESET_ALREADY_EXISTS __constant_cpu_to_le32(0xC01E0350) +#define STATUS_GRAPHICS_INVALID_SCANLINE_ORDERING __constant_cpu_to_le32(0xC01E0352) +#define STATUS_GRAPHICS_TOPOLOGY_CHANGES_NOT_ALLOWED __constant_cpu_to_le32(0xC01E0353) +#define STATUS_GRAPHICS_NO_AVAILABLE_IMPORTANCE_ORDINALS __constant_cpu_to_le32(0xC01E0354) +#define STATUS_GRAPHICS_INCOMPATIBLE_PRIVATE_FORMAT __constant_cpu_to_le32(0xC01E0355) +#define STATUS_GRAPHICS_INVALID_MODE_PRUNING_ALGORITHM __constant_cpu_to_le32(0xC01E0356) +#define STATUS_GRAPHICS_INVALID_MONITOR_CAPABILITY_ORIGIN __constant_cpu_to_le32(0xC01E0357) +#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE_CONSTRAINT __constant_cpu_to_le32(0xC01E0358) +#define STATUS_GRAPHICS_MAX_NUM_PATHS_REACHED __constant_cpu_to_le32(0xC01E0359) +#define STATUS_GRAPHICS_CANCEL_VIDPN_TOPOLOGY_AUGMENTATION __constant_cpu_to_le32(0xC01E035A) +#define STATUS_GRAPHICS_INVALID_CLIENT_TYPE __constant_cpu_to_le32(0xC01E035B) +#define STATUS_GRAPHICS_CLIENTVIDPN_NOT_SET __constant_cpu_to_le32(0xC01E035C) +#define STATUS_GRAPHICS_SPECIFIED_CHILD_ALREADY_CONNECTED __constant_cpu_to_le32(0xC01E0400) +#define STATUS_GRAPHICS_CHILD_DESCRIPTOR_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0401) +#define STATUS_GRAPHICS_NOT_A_LINKED_ADAPTER __constant_cpu_to_le32(0xC01E0430) +#define STATUS_GRAPHICS_LEADLINK_NOT_ENUMERATED __constant_cpu_to_le32(0xC01E0431) +#define STATUS_GRAPHICS_CHAINLINKS_NOT_ENUMERATED __constant_cpu_to_le32(0xC01E0432) +#define STATUS_GRAPHICS_ADAPTER_CHAIN_NOT_READY __constant_cpu_to_le32(0xC01E0433) +#define STATUS_GRAPHICS_CHAINLINKS_NOT_STARTED __constant_cpu_to_le32(0xC01E0434) +#define STATUS_GRAPHICS_CHAINLINKS_NOT_POWERED_ON __constant_cpu_to_le32(0xC01E0435) +#define STATUS_GRAPHICS_INCONSISTENT_DEVICE_LINK_STATE __constant_cpu_to_le32(0xC01E0436) +#define STATUS_GRAPHICS_NOT_POST_DEVICE_DRIVER __constant_cpu_to_le32(0xC01E0438) +#define STATUS_GRAPHICS_ADAPTER_ACCESS_NOT_EXCLUDED __constant_cpu_to_le32(0xC01E043B) +#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_COPP_SEMANTICS __constant_cpu_to_le32(0xC01E051C) +#define STATUS_GRAPHICS_OPM_INVALID_INFORMATION_REQUEST __constant_cpu_to_le32(0xC01E051D) +#define STATUS_GRAPHICS_OPM_DRIVER_INTERNAL_ERROR __constant_cpu_to_le32(0xC01E051E) +#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_OPM_SEMANTICS __constant_cpu_to_le32(0xC01E051F) +#define STATUS_GRAPHICS_OPM_SIGNALING_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0520) +#define STATUS_GRAPHICS_OPM_INVALID_CONFIGURATION_REQUEST __constant_cpu_to_le32(0xC01E0521) +#define STATUS_GRAPHICS_OPM_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0500) +#define STATUS_GRAPHICS_COPP_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0501) +#define STATUS_GRAPHICS_UAB_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0502) +#define STATUS_GRAPHICS_OPM_INVALID_ENCRYPTED_PARAMETERS __constant_cpu_to_le32(0xC01E0503) +#define STATUS_GRAPHICS_OPM_PARAMETER_ARRAY_TOO_SMALL __constant_cpu_to_le32(0xC01E0504) +#define STATUS_GRAPHICS_OPM_NO_PROTECTED_OUTPUTS_EXIST __constant_cpu_to_le32(0xC01E0505) +#define STATUS_GRAPHICS_PVP_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME __constant_cpu_to_le32(0xC01E0506) +#define STATUS_GRAPHICS_PVP_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP __constant_cpu_to_le32(0xC01E0507) +#define STATUS_GRAPHICS_PVP_MIRRORING_DEVICES_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0508) +#define STATUS_GRAPHICS_OPM_INVALID_POINTER __constant_cpu_to_le32(0xC01E050A) +#define STATUS_GRAPHICS_OPM_INTERNAL_ERROR __constant_cpu_to_le32(0xC01E050B) +#define STATUS_GRAPHICS_OPM_INVALID_HANDLE __constant_cpu_to_le32(0xC01E050C) +#define STATUS_GRAPHICS_PVP_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE __constant_cpu_to_le32(0xC01E050D) +#define STATUS_GRAPHICS_PVP_INVALID_CERTIFICATE_LENGTH __constant_cpu_to_le32(0xC01E050E) +#define STATUS_GRAPHICS_OPM_SPANNING_MODE_ENABLED __constant_cpu_to_le32(0xC01E050F) +#define STATUS_GRAPHICS_OPM_THEATER_MODE_ENABLED __constant_cpu_to_le32(0xC01E0510) +#define STATUS_GRAPHICS_PVP_HFS_FAILED __constant_cpu_to_le32(0xC01E0511) +#define STATUS_GRAPHICS_OPM_INVALID_SRM __constant_cpu_to_le32(0xC01E0512) +#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_HDCP __constant_cpu_to_le32(0xC01E0513) +#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_ACP __constant_cpu_to_le32(0xC01E0514) +#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_CGMSA __constant_cpu_to_le32(0xC01E0515) +#define STATUS_GRAPHICS_OPM_HDCP_SRM_NEVER_SET __constant_cpu_to_le32(0xC01E0516) +#define STATUS_GRAPHICS_OPM_RESOLUTION_TOO_HIGH __constant_cpu_to_le32(0xC01E0517) +#define STATUS_GRAPHICS_OPM_ALL_HDCP_HARDWARE_ALREADY_IN_USE __constant_cpu_to_le32(0xC01E0518) +#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_NO_LONGER_EXISTS __constant_cpu_to_le32(0xC01E051A) +#define STATUS_GRAPHICS_OPM_SESSION_TYPE_CHANGE_IN_PROGRESS __constant_cpu_to_le32(0xC01E051B) +#define STATUS_GRAPHICS_I2C_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0580) +#define STATUS_GRAPHICS_I2C_DEVICE_DOES_NOT_EXIST __constant_cpu_to_le32(0xC01E0581) +#define STATUS_GRAPHICS_I2C_ERROR_TRANSMITTING_DATA __constant_cpu_to_le32(0xC01E0582) +#define STATUS_GRAPHICS_I2C_ERROR_RECEIVING_DATA __constant_cpu_to_le32(0xC01E0583) +#define STATUS_GRAPHICS_DDCCI_VCP_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E0584) +#define STATUS_GRAPHICS_DDCCI_INVALID_DATA __constant_cpu_to_le32(0xC01E0585) +#define STATUS_GRAPHICS_DDCCI_MONITOR_RETURNED_INVALID_TIMING_STATUS_BYTE __constant_cpu_to_le32(0xC01E0586) +#define STATUS_GRAPHICS_DDCCI_INVALID_CAPABILITIES_STRING __constant_cpu_to_le32(0xC01E0587) +#define STATUS_GRAPHICS_MCA_INTERNAL_ERROR __constant_cpu_to_le32(0xC01E0588) +#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_COMMAND __constant_cpu_to_le32(0xC01E0589) +#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_LENGTH __constant_cpu_to_le32(0xC01E058A) +#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_CHECKSUM __constant_cpu_to_le32(0xC01E058B) +#define STATUS_GRAPHICS_INVALID_PHYSICAL_MONITOR_HANDLE __constant_cpu_to_le32(0xC01E058C) +#define STATUS_GRAPHICS_MONITOR_NO_LONGER_EXISTS __constant_cpu_to_le32(0xC01E058D) +#define STATUS_GRAPHICS_ONLY_CONSOLE_SESSION_SUPPORTED __constant_cpu_to_le32(0xC01E05E0) +#define STATUS_GRAPHICS_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME __constant_cpu_to_le32(0xC01E05E1) +#define STATUS_GRAPHICS_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP __constant_cpu_to_le32(0xC01E05E2) +#define STATUS_GRAPHICS_MIRRORING_DEVICES_NOT_SUPPORTED __constant_cpu_to_le32(0xC01E05E3) +#define STATUS_GRAPHICS_INVALID_POINTER __constant_cpu_to_le32(0xC01E05E4) +#define STATUS_GRAPHICS_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE __constant_cpu_to_le32(0xC01E05E5) +#define STATUS_GRAPHICS_PARAMETER_ARRAY_TOO_SMALL __constant_cpu_to_le32(0xC01E05E6) +#define STATUS_GRAPHICS_INTERNAL_ERROR __constant_cpu_to_le32(0xC01E05E7) +#define STATUS_GRAPHICS_SESSION_TYPE_CHANGE_IN_PROGRESS __constant_cpu_to_le32(0xC01E05E8) +#define STATUS_FVE_LOCKED_VOLUME __constant_cpu_to_le32(0xC0210000) +#define STATUS_FVE_NOT_ENCRYPTED __constant_cpu_to_le32(0xC0210001) +#define STATUS_FVE_BAD_INFORMATION __constant_cpu_to_le32(0xC0210002) +#define STATUS_FVE_TOO_SMALL __constant_cpu_to_le32(0xC0210003) +#define STATUS_FVE_FAILED_WRONG_FS __constant_cpu_to_le32(0xC0210004) +#define STATUS_FVE_FAILED_BAD_FS __constant_cpu_to_le32(0xC0210005) +#define STATUS_FVE_FS_NOT_EXTENDED __constant_cpu_to_le32(0xC0210006) +#define STATUS_FVE_FS_MOUNTED __constant_cpu_to_le32(0xC0210007) +#define STATUS_FVE_NO_LICENSE __constant_cpu_to_le32(0xC0210008) +#define STATUS_FVE_ACTION_NOT_ALLOWED __constant_cpu_to_le32(0xC0210009) +#define STATUS_FVE_BAD_DATA __constant_cpu_to_le32(0xC021000A) +#define STATUS_FVE_VOLUME_NOT_BOUND __constant_cpu_to_le32(0xC021000B) +#define STATUS_FVE_NOT_DATA_VOLUME __constant_cpu_to_le32(0xC021000C) +#define STATUS_FVE_CONV_READ_ERROR __constant_cpu_to_le32(0xC021000D) +#define STATUS_FVE_CONV_WRITE_ERROR __constant_cpu_to_le32(0xC021000E) +#define STATUS_FVE_OVERLAPPED_UPDATE __constant_cpu_to_le32(0xC021000F) +#define STATUS_FVE_FAILED_SECTOR_SIZE __constant_cpu_to_le32(0xC0210010) +#define STATUS_FVE_FAILED_AUTHENTICATION __constant_cpu_to_le32(0xC0210011) +#define STATUS_FVE_NOT_OS_VOLUME __constant_cpu_to_le32(0xC0210012) +#define STATUS_FVE_KEYFILE_NOT_FOUND __constant_cpu_to_le32(0xC0210013) +#define STATUS_FVE_KEYFILE_INVALID __constant_cpu_to_le32(0xC0210014) +#define STATUS_FVE_KEYFILE_NO_VMK __constant_cpu_to_le32(0xC0210015) +#define STATUS_FVE_TPM_DISABLED __constant_cpu_to_le32(0xC0210016) +#define STATUS_FVE_TPM_SRK_AUTH_NOT_ZERO __constant_cpu_to_le32(0xC0210017) +#define STATUS_FVE_TPM_INVALID_PCR __constant_cpu_to_le32(0xC0210018) +#define STATUS_FVE_TPM_NO_VMK __constant_cpu_to_le32(0xC0210019) +#define STATUS_FVE_PIN_INVALID __constant_cpu_to_le32(0xC021001A) +#define STATUS_FVE_AUTH_INVALID_APPLICATION __constant_cpu_to_le32(0xC021001B) +#define STATUS_FVE_AUTH_INVALID_CONFIG __constant_cpu_to_le32(0xC021001C) +#define STATUS_FVE_DEBUGGER_ENABLED __constant_cpu_to_le32(0xC021001D) +#define STATUS_FVE_DRY_RUN_FAILED __constant_cpu_to_le32(0xC021001E) +#define STATUS_FVE_BAD_METADATA_POINTER __constant_cpu_to_le32(0xC021001F) +#define STATUS_FVE_OLD_METADATA_COPY __constant_cpu_to_le32(0xC0210020) +#define STATUS_FVE_REBOOT_REQUIRED __constant_cpu_to_le32(0xC0210021) +#define STATUS_FVE_RAW_ACCESS __constant_cpu_to_le32(0xC0210022) +#define STATUS_FVE_RAW_BLOCKED __constant_cpu_to_le32(0xC0210023) +#define STATUS_FWP_CALLOUT_NOT_FOUND __constant_cpu_to_le32(0xC0220001) +#define STATUS_FWP_CONDITION_NOT_FOUND __constant_cpu_to_le32(0xC0220002) +#define STATUS_FWP_FILTER_NOT_FOUND __constant_cpu_to_le32(0xC0220003) +#define STATUS_FWP_LAYER_NOT_FOUND __constant_cpu_to_le32(0xC0220004) +#define STATUS_FWP_PROVIDER_NOT_FOUND __constant_cpu_to_le32(0xC0220005) +#define STATUS_FWP_PROVIDER_CONTEXT_NOT_FOUND __constant_cpu_to_le32(0xC0220006) +#define STATUS_FWP_SUBLAYER_NOT_FOUND __constant_cpu_to_le32(0xC0220007) +#define STATUS_FWP_NOT_FOUND __constant_cpu_to_le32(0xC0220008) +#define STATUS_FWP_ALREADY_EXISTS __constant_cpu_to_le32(0xC0220009) +#define STATUS_FWP_IN_USE __constant_cpu_to_le32(0xC022000A) +#define STATUS_FWP_DYNAMIC_SESSION_IN_PROGRESS __constant_cpu_to_le32(0xC022000B) +#define STATUS_FWP_WRONG_SESSION __constant_cpu_to_le32(0xC022000C) +#define STATUS_FWP_NO_TXN_IN_PROGRESS __constant_cpu_to_le32(0xC022000D) +#define STATUS_FWP_TXN_IN_PROGRESS __constant_cpu_to_le32(0xC022000E) +#define STATUS_FWP_TXN_ABORTED __constant_cpu_to_le32(0xC022000F) +#define STATUS_FWP_SESSION_ABORTED __constant_cpu_to_le32(0xC0220010) +#define STATUS_FWP_INCOMPATIBLE_TXN __constant_cpu_to_le32(0xC0220011) +#define STATUS_FWP_TIMEOUT __constant_cpu_to_le32(0xC0220012) +#define STATUS_FWP_NET_EVENTS_DISABLED __constant_cpu_to_le32(0xC0220013) +#define STATUS_FWP_INCOMPATIBLE_LAYER __constant_cpu_to_le32(0xC0220014) +#define STATUS_FWP_KM_CLIENTS_ONLY __constant_cpu_to_le32(0xC0220015) +#define STATUS_FWP_LIFETIME_MISMATCH __constant_cpu_to_le32(0xC0220016) +#define STATUS_FWP_BUILTIN_OBJECT __constant_cpu_to_le32(0xC0220017) +#define STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS __constant_cpu_to_le32(0xC0220018) +#define STATUS_FWP_TOO_MANY_CALLOUTS __constant_cpu_to_le32(0xC0220018) +#define STATUS_FWP_NOTIFICATION_DROPPED __constant_cpu_to_le32(0xC0220019) +#define STATUS_FWP_TRAFFIC_MISMATCH __constant_cpu_to_le32(0xC022001A) +#define STATUS_FWP_INCOMPATIBLE_SA_STATE __constant_cpu_to_le32(0xC022001B) +#define STATUS_FWP_NULL_POINTER __constant_cpu_to_le32(0xC022001C) +#define STATUS_FWP_INVALID_ENUMERATOR __constant_cpu_to_le32(0xC022001D) +#define STATUS_FWP_INVALID_FLAGS __constant_cpu_to_le32(0xC022001E) +#define STATUS_FWP_INVALID_NET_MASK __constant_cpu_to_le32(0xC022001F) +#define STATUS_FWP_INVALID_RANGE __constant_cpu_to_le32(0xC0220020) +#define STATUS_FWP_INVALID_INTERVAL __constant_cpu_to_le32(0xC0220021) +#define STATUS_FWP_ZERO_LENGTH_ARRAY __constant_cpu_to_le32(0xC0220022) +#define STATUS_FWP_NULL_DISPLAY_NAME __constant_cpu_to_le32(0xC0220023) +#define STATUS_FWP_INVALID_ACTION_TYPE __constant_cpu_to_le32(0xC0220024) +#define STATUS_FWP_INVALID_WEIGHT __constant_cpu_to_le32(0xC0220025) +#define STATUS_FWP_MATCH_TYPE_MISMATCH __constant_cpu_to_le32(0xC0220026) +#define STATUS_FWP_TYPE_MISMATCH __constant_cpu_to_le32(0xC0220027) +#define STATUS_FWP_OUT_OF_BOUNDS __constant_cpu_to_le32(0xC0220028) +#define STATUS_FWP_RESERVED __constant_cpu_to_le32(0xC0220029) +#define STATUS_FWP_DUPLICATE_CONDITION __constant_cpu_to_le32(0xC022002A) +#define STATUS_FWP_DUPLICATE_KEYMOD __constant_cpu_to_le32(0xC022002B) +#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_LAYER __constant_cpu_to_le32(0xC022002C) +#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_SUBLAYER __constant_cpu_to_le32(0xC022002D) +#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_LAYER __constant_cpu_to_le32(0xC022002E) +#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_CALLOUT __constant_cpu_to_le32(0xC022002F) +#define STATUS_FWP_INCOMPATIBLE_AUTH_METHOD __constant_cpu_to_le32(0xC0220030) +#define STATUS_FWP_INCOMPATIBLE_DH_GROUP __constant_cpu_to_le32(0xC0220031) +#define STATUS_FWP_EM_NOT_SUPPORTED __constant_cpu_to_le32(0xC0220032) +#define STATUS_FWP_NEVER_MATCH __constant_cpu_to_le32(0xC0220033) +#define STATUS_FWP_PROVIDER_CONTEXT_MISMATCH __constant_cpu_to_le32(0xC0220034) +#define STATUS_FWP_INVALID_PARAMETER __constant_cpu_to_le32(0xC0220035) +#define STATUS_FWP_TOO_MANY_SUBLAYERS __constant_cpu_to_le32(0xC0220036) +#define STATUS_FWP_CALLOUT_NOTIFICATION_FAILED __constant_cpu_to_le32(0xC0220037) +#define STATUS_FWP_INCOMPATIBLE_AUTH_CONFIG __constant_cpu_to_le32(0xC0220038) +#define STATUS_FWP_INCOMPATIBLE_CIPHER_CONFIG __constant_cpu_to_le32(0xC0220039) +#define STATUS_FWP_TCPIP_NOT_READY __constant_cpu_to_le32(0xC0220100) +#define STATUS_FWP_INJECT_HANDLE_CLOSING __constant_cpu_to_le32(0xC0220101) +#define STATUS_FWP_INJECT_HANDLE_STALE __constant_cpu_to_le32(0xC0220102) +#define STATUS_FWP_CANNOT_PEND __constant_cpu_to_le32(0xC0220103) +#define STATUS_NDIS_CLOSING __constant_cpu_to_le32(0xC0230002) +#define STATUS_NDIS_BAD_VERSION __constant_cpu_to_le32(0xC0230004) +#define STATUS_NDIS_BAD_CHARACTERISTICS __constant_cpu_to_le32(0xC0230005) +#define STATUS_NDIS_ADAPTER_NOT_FOUND __constant_cpu_to_le32(0xC0230006) +#define STATUS_NDIS_OPEN_FAILED __constant_cpu_to_le32(0xC0230007) +#define STATUS_NDIS_DEVICE_FAILED __constant_cpu_to_le32(0xC0230008) +#define STATUS_NDIS_MULTICAST_FULL __constant_cpu_to_le32(0xC0230009) +#define STATUS_NDIS_MULTICAST_EXISTS __constant_cpu_to_le32(0xC023000A) +#define STATUS_NDIS_MULTICAST_NOT_FOUND __constant_cpu_to_le32(0xC023000B) +#define STATUS_NDIS_REQUEST_ABORTED __constant_cpu_to_le32(0xC023000C) +#define STATUS_NDIS_RESET_IN_PROGRESS __constant_cpu_to_le32(0xC023000D) +#define STATUS_NDIS_INVALID_PACKET __constant_cpu_to_le32(0xC023000F) +#define STATUS_NDIS_INVALID_DEVICE_REQUEST __constant_cpu_to_le32(0xC0230010) +#define STATUS_NDIS_ADAPTER_NOT_READY __constant_cpu_to_le32(0xC0230011) +#define STATUS_NDIS_INVALID_LENGTH __constant_cpu_to_le32(0xC0230014) +#define STATUS_NDIS_INVALID_DATA __constant_cpu_to_le32(0xC0230015) +#define STATUS_NDIS_BUFFER_TOO_SHORT __constant_cpu_to_le32(0xC0230016) +#define STATUS_NDIS_INVALID_OID __constant_cpu_to_le32(0xC0230017) +#define STATUS_NDIS_ADAPTER_REMOVED __constant_cpu_to_le32(0xC0230018) +#define STATUS_NDIS_UNSUPPORTED_MEDIA __constant_cpu_to_le32(0xC0230019) +#define STATUS_NDIS_GROUP_ADDRESS_IN_USE __constant_cpu_to_le32(0xC023001A) +#define STATUS_NDIS_FILE_NOT_FOUND __constant_cpu_to_le32(0xC023001B) +#define STATUS_NDIS_ERROR_READING_FILE __constant_cpu_to_le32(0xC023001C) +#define STATUS_NDIS_ALREADY_MAPPED __constant_cpu_to_le32(0xC023001D) +#define STATUS_NDIS_RESOURCE_CONFLICT __constant_cpu_to_le32(0xC023001E) +#define STATUS_NDIS_MEDIA_DISCONNECTED __constant_cpu_to_le32(0xC023001F) +#define STATUS_NDIS_INVALID_ADDRESS __constant_cpu_to_le32(0xC0230022) +#define STATUS_NDIS_PAUSED __constant_cpu_to_le32(0xC023002A) +#define STATUS_NDIS_INTERFACE_NOT_FOUND __constant_cpu_to_le32(0xC023002B) +#define STATUS_NDIS_UNSUPPORTED_REVISION __constant_cpu_to_le32(0xC023002C) +#define STATUS_NDIS_INVALID_PORT __constant_cpu_to_le32(0xC023002D) +#define STATUS_NDIS_INVALID_PORT_STATE __constant_cpu_to_le32(0xC023002E) +#define STATUS_NDIS_LOW_POWER_STATE __constant_cpu_to_le32(0xC023002F) +#define STATUS_NDIS_NOT_SUPPORTED __constant_cpu_to_le32(0xC02300BB) +#define STATUS_NDIS_DOT11_AUTO_CONFIG_ENABLED __constant_cpu_to_le32(0xC0232000) +#define STATUS_NDIS_DOT11_MEDIA_IN_USE __constant_cpu_to_le32(0xC0232001) +#define STATUS_NDIS_DOT11_POWER_STATE_INVALID __constant_cpu_to_le32(0xC0232002) +#define STATUS_IPSEC_BAD_SPI __constant_cpu_to_le32(0xC0360001) +#define STATUS_IPSEC_SA_LIFETIME_EXPIRED __constant_cpu_to_le32(0xC0360002) +#define STATUS_IPSEC_WRONG_SA __constant_cpu_to_le32(0xC0360003) +#define STATUS_IPSEC_REPLAY_CHECK_FAILED __constant_cpu_to_le32(0xC0360004) +#define STATUS_IPSEC_INVALID_PACKET __constant_cpu_to_le32(0xC0360005) +#define STATUS_IPSEC_INTEGRITY_CHECK_FAILED __constant_cpu_to_le32(0xC0360006) +#define STATUS_IPSEC_CLEAR_TEXT_DROP __constant_cpu_to_le32(0xC0360007) -- cgit v1.2.3 From ddfbefbd393fb1a935bdf27cba5ad2eb24a76e75 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 15 Mar 2011 02:08:48 +0000 Subject: CIFS: Map SMB2 status codes to POSIX errors Add mapping table for 32 bit SMB2 status codes to linux errors. Note that SMB2 does not use DOS/OS2 errors (ever) so mapping to DOS/OS2 errors as a common network subset (as we do for cifs) doesn't help. And note that the set of status codes is much more complete here. Signed-off-by: Steve French Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/smb2maperror.c | 2477 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 56 ++ fs/cifs/smb2proto.h | 37 + 4 files changed, 2571 insertions(+), 1 deletion(-) create mode 100644 fs/cifs/smb2maperror.c create mode 100644 fs/cifs/smb2pdu.h create mode 100644 fs/cifs/smb2proto.h (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 4b4127544349..4a7727143721 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -16,4 +16,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o -cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o +cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c new file mode 100644 index 000000000000..be41478acc05 --- /dev/null +++ b/fs/cifs/smb2maperror.c @@ -0,0 +1,2477 @@ +/* + * fs/smb2/smb2maperror.c + * + * Functions which do error mapping of SMB2 status codes to POSIX errors + * + * Copyright (C) International Business Machines Corp., 2009 + * Author(s): Steve French (sfrench@us.ibm.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include "cifsglob.h" +#include "cifs_debug.h" +#include "smb2pdu.h" +#include "smb2proto.h" +#include "smb2status.h" + +struct status_to_posix_error { + __le32 smb2_status; + int posix_error; + char *status_string; +}; + +static const struct status_to_posix_error smb2_error_map_table[] = { + {STATUS_SUCCESS, 0, "STATUS_SUCCESS"}, + {STATUS_WAIT_0, 0, "STATUS_WAIT_0"}, + {STATUS_WAIT_1, -EIO, "STATUS_WAIT_1"}, + {STATUS_WAIT_2, -EIO, "STATUS_WAIT_2"}, + {STATUS_WAIT_3, -EIO, "STATUS_WAIT_3"}, + {STATUS_WAIT_63, -EIO, "STATUS_WAIT_63"}, + {STATUS_ABANDONED, -EIO, "STATUS_ABANDONED"}, + {STATUS_ABANDONED_WAIT_0, -EIO, "STATUS_ABANDONED_WAIT_0"}, + {STATUS_ABANDONED_WAIT_63, -EIO, "STATUS_ABANDONED_WAIT_63"}, + {STATUS_USER_APC, -EIO, "STATUS_USER_APC"}, + {STATUS_KERNEL_APC, -EIO, "STATUS_KERNEL_APC"}, + {STATUS_ALERTED, -EIO, "STATUS_ALERTED"}, + {STATUS_TIMEOUT, -ETIMEDOUT, "STATUS_TIMEOUT"}, + {STATUS_PENDING, -EIO, "STATUS_PENDING"}, + {STATUS_REPARSE, -EIO, "STATUS_REPARSE"}, + {STATUS_MORE_ENTRIES, -EIO, "STATUS_MORE_ENTRIES"}, + {STATUS_NOT_ALL_ASSIGNED, -EIO, "STATUS_NOT_ALL_ASSIGNED"}, + {STATUS_SOME_NOT_MAPPED, -EIO, "STATUS_SOME_NOT_MAPPED"}, + {STATUS_OPLOCK_BREAK_IN_PROGRESS, -EIO, + "STATUS_OPLOCK_BREAK_IN_PROGRESS"}, + {STATUS_VOLUME_MOUNTED, -EIO, "STATUS_VOLUME_MOUNTED"}, + {STATUS_RXACT_COMMITTED, -EIO, "STATUS_RXACT_COMMITTED"}, + {STATUS_NOTIFY_CLEANUP, -EIO, "STATUS_NOTIFY_CLEANUP"}, + {STATUS_NOTIFY_ENUM_DIR, -EIO, "STATUS_NOTIFY_ENUM_DIR"}, + {STATUS_NO_QUOTAS_FOR_ACCOUNT, -EIO, "STATUS_NO_QUOTAS_FOR_ACCOUNT"}, + {STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED, -EIO, + "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED"}, + {STATUS_PAGE_FAULT_TRANSITION, -EIO, "STATUS_PAGE_FAULT_TRANSITION"}, + {STATUS_PAGE_FAULT_DEMAND_ZERO, -EIO, "STATUS_PAGE_FAULT_DEMAND_ZERO"}, + {STATUS_PAGE_FAULT_COPY_ON_WRITE, -EIO, + "STATUS_PAGE_FAULT_COPY_ON_WRITE"}, + {STATUS_PAGE_FAULT_GUARD_PAGE, -EIO, "STATUS_PAGE_FAULT_GUARD_PAGE"}, + {STATUS_PAGE_FAULT_PAGING_FILE, -EIO, "STATUS_PAGE_FAULT_PAGING_FILE"}, + {STATUS_CACHE_PAGE_LOCKED, -EIO, "STATUS_CACHE_PAGE_LOCKED"}, + {STATUS_CRASH_DUMP, -EIO, "STATUS_CRASH_DUMP"}, + {STATUS_BUFFER_ALL_ZEROS, -EIO, "STATUS_BUFFER_ALL_ZEROS"}, + {STATUS_REPARSE_OBJECT, -EIO, "STATUS_REPARSE_OBJECT"}, + {STATUS_RESOURCE_REQUIREMENTS_CHANGED, -EIO, + "STATUS_RESOURCE_REQUIREMENTS_CHANGED"}, + {STATUS_TRANSLATION_COMPLETE, -EIO, "STATUS_TRANSLATION_COMPLETE"}, + {STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY, -EIO, + "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY"}, + {STATUS_NOTHING_TO_TERMINATE, -EIO, "STATUS_NOTHING_TO_TERMINATE"}, + {STATUS_PROCESS_NOT_IN_JOB, -EIO, "STATUS_PROCESS_NOT_IN_JOB"}, + {STATUS_PROCESS_IN_JOB, -EIO, "STATUS_PROCESS_IN_JOB"}, + {STATUS_VOLSNAP_HIBERNATE_READY, -EIO, + "STATUS_VOLSNAP_HIBERNATE_READY"}, + {STATUS_FSFILTER_OP_COMPLETED_SUCCESSFULLY, -EIO, + "STATUS_FSFILTER_OP_COMPLETED_SUCCESSFULLY"}, + {STATUS_INTERRUPT_VECTOR_ALREADY_CONNECTED, -EIO, + "STATUS_INTERRUPT_VECTOR_ALREADY_CONNECTED"}, + {STATUS_INTERRUPT_STILL_CONNECTED, -EIO, + "STATUS_INTERRUPT_STILL_CONNECTED"}, + {STATUS_PROCESS_CLONED, -EIO, "STATUS_PROCESS_CLONED"}, + {STATUS_FILE_LOCKED_WITH_ONLY_READERS, -EIO, + "STATUS_FILE_LOCKED_WITH_ONLY_READERS"}, + {STATUS_FILE_LOCKED_WITH_WRITERS, -EIO, + "STATUS_FILE_LOCKED_WITH_WRITERS"}, + {STATUS_RESOURCEMANAGER_READ_ONLY, -EROFS, + "STATUS_RESOURCEMANAGER_READ_ONLY"}, + {STATUS_WAIT_FOR_OPLOCK, -EIO, "STATUS_WAIT_FOR_OPLOCK"}, + {DBG_EXCEPTION_HANDLED, -EIO, "DBG_EXCEPTION_HANDLED"}, + {DBG_CONTINUE, -EIO, "DBG_CONTINUE"}, + {STATUS_FLT_IO_COMPLETE, -EIO, "STATUS_FLT_IO_COMPLETE"}, + {STATUS_OBJECT_NAME_EXISTS, -EIO, "STATUS_OBJECT_NAME_EXISTS"}, + {STATUS_THREAD_WAS_SUSPENDED, -EIO, "STATUS_THREAD_WAS_SUSPENDED"}, + {STATUS_WORKING_SET_LIMIT_RANGE, -EIO, + "STATUS_WORKING_SET_LIMIT_RANGE"}, + {STATUS_IMAGE_NOT_AT_BASE, -EIO, "STATUS_IMAGE_NOT_AT_BASE"}, + {STATUS_RXACT_STATE_CREATED, -EIO, "STATUS_RXACT_STATE_CREATED"}, + {STATUS_SEGMENT_NOTIFICATION, -EIO, "STATUS_SEGMENT_NOTIFICATION"}, + {STATUS_LOCAL_USER_SESSION_KEY, -EIO, "STATUS_LOCAL_USER_SESSION_KEY"}, + {STATUS_BAD_CURRENT_DIRECTORY, -EIO, "STATUS_BAD_CURRENT_DIRECTORY"}, + {STATUS_SERIAL_MORE_WRITES, -EIO, "STATUS_SERIAL_MORE_WRITES"}, + {STATUS_REGISTRY_RECOVERED, -EIO, "STATUS_REGISTRY_RECOVERED"}, + {STATUS_FT_READ_RECOVERY_FROM_BACKUP, -EIO, + "STATUS_FT_READ_RECOVERY_FROM_BACKUP"}, + {STATUS_FT_WRITE_RECOVERY, -EIO, "STATUS_FT_WRITE_RECOVERY"}, + {STATUS_SERIAL_COUNTER_TIMEOUT, -ETIMEDOUT, + "STATUS_SERIAL_COUNTER_TIMEOUT"}, + {STATUS_NULL_LM_PASSWORD, -EIO, "STATUS_NULL_LM_PASSWORD"}, + {STATUS_IMAGE_MACHINE_TYPE_MISMATCH, -EIO, + "STATUS_IMAGE_MACHINE_TYPE_MISMATCH"}, + {STATUS_RECEIVE_PARTIAL, -EIO, "STATUS_RECEIVE_PARTIAL"}, + {STATUS_RECEIVE_EXPEDITED, -EIO, "STATUS_RECEIVE_EXPEDITED"}, + {STATUS_RECEIVE_PARTIAL_EXPEDITED, -EIO, + "STATUS_RECEIVE_PARTIAL_EXPEDITED"}, + {STATUS_EVENT_DONE, -EIO, "STATUS_EVENT_DONE"}, + {STATUS_EVENT_PENDING, -EIO, "STATUS_EVENT_PENDING"}, + {STATUS_CHECKING_FILE_SYSTEM, -EIO, "STATUS_CHECKING_FILE_SYSTEM"}, + {STATUS_FATAL_APP_EXIT, -EIO, "STATUS_FATAL_APP_EXIT"}, + {STATUS_PREDEFINED_HANDLE, -EIO, "STATUS_PREDEFINED_HANDLE"}, + {STATUS_WAS_UNLOCKED, -EIO, "STATUS_WAS_UNLOCKED"}, + {STATUS_SERVICE_NOTIFICATION, -EIO, "STATUS_SERVICE_NOTIFICATION"}, + {STATUS_WAS_LOCKED, -EIO, "STATUS_WAS_LOCKED"}, + {STATUS_LOG_HARD_ERROR, -EIO, "STATUS_LOG_HARD_ERROR"}, + {STATUS_ALREADY_WIN32, -EIO, "STATUS_ALREADY_WIN32"}, + {STATUS_WX86_UNSIMULATE, -EIO, "STATUS_WX86_UNSIMULATE"}, + {STATUS_WX86_CONTINUE, -EIO, "STATUS_WX86_CONTINUE"}, + {STATUS_WX86_SINGLE_STEP, -EIO, "STATUS_WX86_SINGLE_STEP"}, + {STATUS_WX86_BREAKPOINT, -EIO, "STATUS_WX86_BREAKPOINT"}, + {STATUS_WX86_EXCEPTION_CONTINUE, -EIO, + "STATUS_WX86_EXCEPTION_CONTINUE"}, + {STATUS_WX86_EXCEPTION_LASTCHANCE, -EIO, + "STATUS_WX86_EXCEPTION_LASTCHANCE"}, + {STATUS_WX86_EXCEPTION_CHAIN, -EIO, "STATUS_WX86_EXCEPTION_CHAIN"}, + {STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE, -EIO, + "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE"}, + {STATUS_NO_YIELD_PERFORMED, -EIO, "STATUS_NO_YIELD_PERFORMED"}, + {STATUS_TIMER_RESUME_IGNORED, -EIO, "STATUS_TIMER_RESUME_IGNORED"}, + {STATUS_ARBITRATION_UNHANDLED, -EIO, "STATUS_ARBITRATION_UNHANDLED"}, + {STATUS_CARDBUS_NOT_SUPPORTED, -ENOSYS, "STATUS_CARDBUS_NOT_SUPPORTED"}, + {STATUS_WX86_CREATEWX86TIB, -EIO, "STATUS_WX86_CREATEWX86TIB"}, + {STATUS_MP_PROCESSOR_MISMATCH, -EIO, "STATUS_MP_PROCESSOR_MISMATCH"}, + {STATUS_HIBERNATED, -EIO, "STATUS_HIBERNATED"}, + {STATUS_RESUME_HIBERNATION, -EIO, "STATUS_RESUME_HIBERNATION"}, + {STATUS_FIRMWARE_UPDATED, -EIO, "STATUS_FIRMWARE_UPDATED"}, + {STATUS_DRIVERS_LEAKING_LOCKED_PAGES, -EIO, + "STATUS_DRIVERS_LEAKING_LOCKED_PAGES"}, + {STATUS_MESSAGE_RETRIEVED, -EIO, "STATUS_MESSAGE_RETRIEVED"}, + {STATUS_SYSTEM_POWERSTATE_TRANSITION, -EIO, + "STATUS_SYSTEM_POWERSTATE_TRANSITION"}, + {STATUS_ALPC_CHECK_COMPLETION_LIST, -EIO, + "STATUS_ALPC_CHECK_COMPLETION_LIST"}, + {STATUS_SYSTEM_POWERSTATE_COMPLEX_TRANSITION, -EIO, + "STATUS_SYSTEM_POWERSTATE_COMPLEX_TRANSITION"}, + {STATUS_ACCESS_AUDIT_BY_POLICY, -EIO, "STATUS_ACCESS_AUDIT_BY_POLICY"}, + {STATUS_ABANDON_HIBERFILE, -EIO, "STATUS_ABANDON_HIBERFILE"}, + {STATUS_BIZRULES_NOT_ENABLED, -EIO, "STATUS_BIZRULES_NOT_ENABLED"}, + {STATUS_WAKE_SYSTEM, -EIO, "STATUS_WAKE_SYSTEM"}, + {STATUS_DS_SHUTTING_DOWN, -EIO, "STATUS_DS_SHUTTING_DOWN"}, + {DBG_REPLY_LATER, -EIO, "DBG_REPLY_LATER"}, + {DBG_UNABLE_TO_PROVIDE_HANDLE, -EIO, "DBG_UNABLE_TO_PROVIDE_HANDLE"}, + {DBG_TERMINATE_THREAD, -EIO, "DBG_TERMINATE_THREAD"}, + {DBG_TERMINATE_PROCESS, -EIO, "DBG_TERMINATE_PROCESS"}, + {DBG_CONTROL_C, -EIO, "DBG_CONTROL_C"}, + {DBG_PRINTEXCEPTION_C, -EIO, "DBG_PRINTEXCEPTION_C"}, + {DBG_RIPEXCEPTION, -EIO, "DBG_RIPEXCEPTION"}, + {DBG_CONTROL_BREAK, -EIO, "DBG_CONTROL_BREAK"}, + {DBG_COMMAND_EXCEPTION, -EIO, "DBG_COMMAND_EXCEPTION"}, + {RPC_NT_UUID_LOCAL_ONLY, -EIO, "RPC_NT_UUID_LOCAL_ONLY"}, + {RPC_NT_SEND_INCOMPLETE, -EIO, "RPC_NT_SEND_INCOMPLETE"}, + {STATUS_CTX_CDM_CONNECT, -EIO, "STATUS_CTX_CDM_CONNECT"}, + {STATUS_CTX_CDM_DISCONNECT, -EIO, "STATUS_CTX_CDM_DISCONNECT"}, + {STATUS_SXS_RELEASE_ACTIVATION_CONTEXT, -EIO, + "STATUS_SXS_RELEASE_ACTIVATION_CONTEXT"}, + {STATUS_RECOVERY_NOT_NEEDED, -EIO, "STATUS_RECOVERY_NOT_NEEDED"}, + {STATUS_RM_ALREADY_STARTED, -EIO, "STATUS_RM_ALREADY_STARTED"}, + {STATUS_LOG_NO_RESTART, -EIO, "STATUS_LOG_NO_RESTART"}, + {STATUS_VIDEO_DRIVER_DEBUG_REPORT_REQUEST, -EIO, + "STATUS_VIDEO_DRIVER_DEBUG_REPORT_REQUEST"}, + {STATUS_GRAPHICS_PARTIAL_DATA_POPULATED, -EIO, + "STATUS_GRAPHICS_PARTIAL_DATA_POPULATED"}, + {STATUS_GRAPHICS_DRIVER_MISMATCH, -EIO, + "STATUS_GRAPHICS_DRIVER_MISMATCH"}, + {STATUS_GRAPHICS_MODE_NOT_PINNED, -EIO, + "STATUS_GRAPHICS_MODE_NOT_PINNED"}, + {STATUS_GRAPHICS_NO_PREFERRED_MODE, -EIO, + "STATUS_GRAPHICS_NO_PREFERRED_MODE"}, + {STATUS_GRAPHICS_DATASET_IS_EMPTY, -EIO, + "STATUS_GRAPHICS_DATASET_IS_EMPTY"}, + {STATUS_GRAPHICS_NO_MORE_ELEMENTS_IN_DATASET, -EIO, + "STATUS_GRAPHICS_NO_MORE_ELEMENTS_IN_DATASET"}, + {STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_PINNED, -EIO, + "STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_PINNED"}, + {STATUS_GRAPHICS_UNKNOWN_CHILD_STATUS, -EIO, + "STATUS_GRAPHICS_UNKNOWN_CHILD_STATUS"}, + {STATUS_GRAPHICS_LEADLINK_START_DEFERRED, -EIO, + "STATUS_GRAPHICS_LEADLINK_START_DEFERRED"}, + {STATUS_GRAPHICS_POLLING_TOO_FREQUENTLY, -EIO, + "STATUS_GRAPHICS_POLLING_TOO_FREQUENTLY"}, + {STATUS_GRAPHICS_START_DEFERRED, -EIO, + "STATUS_GRAPHICS_START_DEFERRED"}, + {STATUS_NDIS_INDICATION_REQUIRED, -EIO, + "STATUS_NDIS_INDICATION_REQUIRED"}, + {STATUS_GUARD_PAGE_VIOLATION, -EIO, "STATUS_GUARD_PAGE_VIOLATION"}, + {STATUS_DATATYPE_MISALIGNMENT, -EIO, "STATUS_DATATYPE_MISALIGNMENT"}, + {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, + {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, + {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, + {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, + {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, + {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, + {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, + {STATUS_GUID_SUBSTITUTION_MADE, -EIO, "STATUS_GUID_SUBSTITUTION_MADE"}, + {STATUS_PARTIAL_COPY, -EIO, "STATUS_PARTIAL_COPY"}, + {STATUS_DEVICE_PAPER_EMPTY, -EIO, "STATUS_DEVICE_PAPER_EMPTY"}, + {STATUS_DEVICE_POWERED_OFF, -EIO, "STATUS_DEVICE_POWERED_OFF"}, + {STATUS_DEVICE_OFF_LINE, -EIO, "STATUS_DEVICE_OFF_LINE"}, + {STATUS_DEVICE_BUSY, -EBUSY, "STATUS_DEVICE_BUSY"}, + {STATUS_NO_MORE_EAS, -EIO, "STATUS_NO_MORE_EAS"}, + {STATUS_INVALID_EA_NAME, -EINVAL, "STATUS_INVALID_EA_NAME"}, + {STATUS_EA_LIST_INCONSISTENT, -EIO, "STATUS_EA_LIST_INCONSISTENT"}, + {STATUS_INVALID_EA_FLAG, -EINVAL, "STATUS_INVALID_EA_FLAG"}, + {STATUS_VERIFY_REQUIRED, -EIO, "STATUS_VERIFY_REQUIRED"}, + {STATUS_EXTRANEOUS_INFORMATION, -EIO, "STATUS_EXTRANEOUS_INFORMATION"}, + {STATUS_RXACT_COMMIT_NECESSARY, -EIO, "STATUS_RXACT_COMMIT_NECESSARY"}, + {STATUS_NO_MORE_ENTRIES, -EIO, "STATUS_NO_MORE_ENTRIES"}, + {STATUS_FILEMARK_DETECTED, -EIO, "STATUS_FILEMARK_DETECTED"}, + {STATUS_MEDIA_CHANGED, -EIO, "STATUS_MEDIA_CHANGED"}, + {STATUS_BUS_RESET, -EIO, "STATUS_BUS_RESET"}, + {STATUS_END_OF_MEDIA, -EIO, "STATUS_END_OF_MEDIA"}, + {STATUS_BEGINNING_OF_MEDIA, -EIO, "STATUS_BEGINNING_OF_MEDIA"}, + {STATUS_MEDIA_CHECK, -EIO, "STATUS_MEDIA_CHECK"}, + {STATUS_SETMARK_DETECTED, -EIO, "STATUS_SETMARK_DETECTED"}, + {STATUS_NO_DATA_DETECTED, -EIO, "STATUS_NO_DATA_DETECTED"}, + {STATUS_REDIRECTOR_HAS_OPEN_HANDLES, -EIO, + "STATUS_REDIRECTOR_HAS_OPEN_HANDLES"}, + {STATUS_SERVER_HAS_OPEN_HANDLES, -EIO, + "STATUS_SERVER_HAS_OPEN_HANDLES"}, + {STATUS_ALREADY_DISCONNECTED, -EIO, "STATUS_ALREADY_DISCONNECTED"}, + {STATUS_LONGJUMP, -EIO, "STATUS_LONGJUMP"}, + {STATUS_CLEANER_CARTRIDGE_INSTALLED, -EIO, + "STATUS_CLEANER_CARTRIDGE_INSTALLED"}, + {STATUS_PLUGPLAY_QUERY_VETOED, -EIO, "STATUS_PLUGPLAY_QUERY_VETOED"}, + {STATUS_UNWIND_CONSOLIDATE, -EIO, "STATUS_UNWIND_CONSOLIDATE"}, + {STATUS_REGISTRY_HIVE_RECOVERED, -EIO, + "STATUS_REGISTRY_HIVE_RECOVERED"}, + {STATUS_DLL_MIGHT_BE_INSECURE, -EIO, "STATUS_DLL_MIGHT_BE_INSECURE"}, + {STATUS_DLL_MIGHT_BE_INCOMPATIBLE, -EIO, + "STATUS_DLL_MIGHT_BE_INCOMPATIBLE"}, + {STATUS_STOPPED_ON_SYMLINK, -EOPNOTSUPP, "STATUS_STOPPED_ON_SYMLINK"}, + {STATUS_DEVICE_REQUIRES_CLEANING, -EIO, + "STATUS_DEVICE_REQUIRES_CLEANING"}, + {STATUS_DEVICE_DOOR_OPEN, -EIO, "STATUS_DEVICE_DOOR_OPEN"}, + {STATUS_DATA_LOST_REPAIR, -EIO, "STATUS_DATA_LOST_REPAIR"}, + {DBG_EXCEPTION_NOT_HANDLED, -EIO, "DBG_EXCEPTION_NOT_HANDLED"}, + {STATUS_CLUSTER_NODE_ALREADY_UP, -EIO, + "STATUS_CLUSTER_NODE_ALREADY_UP"}, + {STATUS_CLUSTER_NODE_ALREADY_DOWN, -EIO, + "STATUS_CLUSTER_NODE_ALREADY_DOWN"}, + {STATUS_CLUSTER_NETWORK_ALREADY_ONLINE, -EIO, + "STATUS_CLUSTER_NETWORK_ALREADY_ONLINE"}, + {STATUS_CLUSTER_NETWORK_ALREADY_OFFLINE, -EIO, + "STATUS_CLUSTER_NETWORK_ALREADY_OFFLINE"}, + {STATUS_CLUSTER_NODE_ALREADY_MEMBER, -EIO, + "STATUS_CLUSTER_NODE_ALREADY_MEMBER"}, + {STATUS_COULD_NOT_RESIZE_LOG, -EIO, "STATUS_COULD_NOT_RESIZE_LOG"}, + {STATUS_NO_TXF_METADATA, -EIO, "STATUS_NO_TXF_METADATA"}, + {STATUS_CANT_RECOVER_WITH_HANDLE_OPEN, -EIO, + "STATUS_CANT_RECOVER_WITH_HANDLE_OPEN"}, + {STATUS_TXF_METADATA_ALREADY_PRESENT, -EIO, + "STATUS_TXF_METADATA_ALREADY_PRESENT"}, + {STATUS_TRANSACTION_SCOPE_CALLBACKS_NOT_SET, -EIO, + "STATUS_TRANSACTION_SCOPE_CALLBACKS_NOT_SET"}, + {STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD_RECOVERED, -EIO, + "STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD_RECOVERED"}, + {STATUS_FLT_BUFFER_TOO_SMALL, -ENOBUFS, "STATUS_FLT_BUFFER_TOO_SMALL"}, + {STATUS_FVE_PARTIAL_METADATA, -EIO, "STATUS_FVE_PARTIAL_METADATA"}, + {STATUS_UNSUCCESSFUL, -EIO, "STATUS_UNSUCCESSFUL"}, + {STATUS_NOT_IMPLEMENTED, -ENOSYS, "STATUS_NOT_IMPLEMENTED"}, + {STATUS_INVALID_INFO_CLASS, -EIO, "STATUS_INVALID_INFO_CLASS"}, + {STATUS_INFO_LENGTH_MISMATCH, -EIO, "STATUS_INFO_LENGTH_MISMATCH"}, + {STATUS_ACCESS_VIOLATION, -EACCES, "STATUS_ACCESS_VIOLATION"}, + {STATUS_IN_PAGE_ERROR, -EFAULT, "STATUS_IN_PAGE_ERROR"}, + {STATUS_PAGEFILE_QUOTA, -EDQUOT, "STATUS_PAGEFILE_QUOTA"}, + {STATUS_INVALID_HANDLE, -EBADF, "STATUS_INVALID_HANDLE"}, + {STATUS_BAD_INITIAL_STACK, -EIO, "STATUS_BAD_INITIAL_STACK"}, + {STATUS_BAD_INITIAL_PC, -EIO, "STATUS_BAD_INITIAL_PC"}, + {STATUS_INVALID_CID, -EIO, "STATUS_INVALID_CID"}, + {STATUS_TIMER_NOT_CANCELED, -EIO, "STATUS_TIMER_NOT_CANCELED"}, + {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, + {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, + {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, + {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"}, + {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, + {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, + {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, + {STATUS_UNRECOGNIZED_MEDIA, -EIO, "STATUS_UNRECOGNIZED_MEDIA"}, + {STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"}, + {STATUS_MORE_PROCESSING_REQUIRED, -EIO, + "STATUS_MORE_PROCESSING_REQUIRED"}, + {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"}, + {STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE, + "STATUS_CONFLICTING_ADDRESSES"}, + {STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"}, + {STATUS_UNABLE_TO_FREE_VM, -EIO, "STATUS_UNABLE_TO_FREE_VM"}, + {STATUS_UNABLE_TO_DELETE_SECTION, -EIO, + "STATUS_UNABLE_TO_DELETE_SECTION"}, + {STATUS_INVALID_SYSTEM_SERVICE, -EIO, "STATUS_INVALID_SYSTEM_SERVICE"}, + {STATUS_ILLEGAL_INSTRUCTION, -EIO, "STATUS_ILLEGAL_INSTRUCTION"}, + {STATUS_INVALID_LOCK_SEQUENCE, -EIO, "STATUS_INVALID_LOCK_SEQUENCE"}, + {STATUS_INVALID_VIEW_SIZE, -EIO, "STATUS_INVALID_VIEW_SIZE"}, + {STATUS_INVALID_FILE_FOR_SECTION, -EIO, + "STATUS_INVALID_FILE_FOR_SECTION"}, + {STATUS_ALREADY_COMMITTED, -EIO, "STATUS_ALREADY_COMMITTED"}, + {STATUS_ACCESS_DENIED, -EACCES, "STATUS_ACCESS_DENIED"}, + {STATUS_BUFFER_TOO_SMALL, -EIO, "STATUS_BUFFER_TOO_SMALL"}, + {STATUS_OBJECT_TYPE_MISMATCH, -EIO, "STATUS_OBJECT_TYPE_MISMATCH"}, + {STATUS_NONCONTINUABLE_EXCEPTION, -EIO, + "STATUS_NONCONTINUABLE_EXCEPTION"}, + {STATUS_INVALID_DISPOSITION, -EIO, "STATUS_INVALID_DISPOSITION"}, + {STATUS_UNWIND, -EIO, "STATUS_UNWIND"}, + {STATUS_BAD_STACK, -EIO, "STATUS_BAD_STACK"}, + {STATUS_INVALID_UNWIND_TARGET, -EIO, "STATUS_INVALID_UNWIND_TARGET"}, + {STATUS_NOT_LOCKED, -EIO, "STATUS_NOT_LOCKED"}, + {STATUS_PARITY_ERROR, -EIO, "STATUS_PARITY_ERROR"}, + {STATUS_UNABLE_TO_DECOMMIT_VM, -EIO, "STATUS_UNABLE_TO_DECOMMIT_VM"}, + {STATUS_NOT_COMMITTED, -EIO, "STATUS_NOT_COMMITTED"}, + {STATUS_INVALID_PORT_ATTRIBUTES, -EIO, + "STATUS_INVALID_PORT_ATTRIBUTES"}, + {STATUS_PORT_MESSAGE_TOO_LONG, -EIO, "STATUS_PORT_MESSAGE_TOO_LONG"}, + {STATUS_INVALID_PARAMETER_MIX, -EINVAL, "STATUS_INVALID_PARAMETER_MIX"}, + {STATUS_INVALID_QUOTA_LOWER, -EIO, "STATUS_INVALID_QUOTA_LOWER"}, + {STATUS_DISK_CORRUPT_ERROR, -EIO, "STATUS_DISK_CORRUPT_ERROR"}, + {STATUS_OBJECT_NAME_INVALID, -ENOENT, "STATUS_OBJECT_NAME_INVALID"}, + {STATUS_OBJECT_NAME_NOT_FOUND, -ENOENT, "STATUS_OBJECT_NAME_NOT_FOUND"}, + {STATUS_OBJECT_NAME_COLLISION, -EEXIST, "STATUS_OBJECT_NAME_COLLISION"}, + {STATUS_PORT_DISCONNECTED, -EIO, "STATUS_PORT_DISCONNECTED"}, + {STATUS_DEVICE_ALREADY_ATTACHED, -EIO, + "STATUS_DEVICE_ALREADY_ATTACHED"}, + {STATUS_OBJECT_PATH_INVALID, -ENOTDIR, "STATUS_OBJECT_PATH_INVALID"}, + {STATUS_OBJECT_PATH_NOT_FOUND, -ENOENT, "STATUS_OBJECT_PATH_NOT_FOUND"}, + {STATUS_OBJECT_PATH_SYNTAX_BAD, -EIO, "STATUS_OBJECT_PATH_SYNTAX_BAD"}, + {STATUS_DATA_OVERRUN, -EIO, "STATUS_DATA_OVERRUN"}, + {STATUS_DATA_LATE_ERROR, -EIO, "STATUS_DATA_LATE_ERROR"}, + {STATUS_DATA_ERROR, -EIO, "STATUS_DATA_ERROR"}, + {STATUS_CRC_ERROR, -EIO, "STATUS_CRC_ERROR"}, + {STATUS_SECTION_TOO_BIG, -EIO, "STATUS_SECTION_TOO_BIG"}, + {STATUS_PORT_CONNECTION_REFUSED, -ECONNREFUSED, + "STATUS_PORT_CONNECTION_REFUSED"}, + {STATUS_INVALID_PORT_HANDLE, -EIO, "STATUS_INVALID_PORT_HANDLE"}, + {STATUS_SHARING_VIOLATION, -EBUSY, "STATUS_SHARING_VIOLATION"}, + {STATUS_QUOTA_EXCEEDED, -EDQUOT, "STATUS_QUOTA_EXCEEDED"}, + {STATUS_INVALID_PAGE_PROTECTION, -EIO, + "STATUS_INVALID_PAGE_PROTECTION"}, + {STATUS_MUTANT_NOT_OWNED, -EIO, "STATUS_MUTANT_NOT_OWNED"}, + {STATUS_SEMAPHORE_LIMIT_EXCEEDED, -EIO, + "STATUS_SEMAPHORE_LIMIT_EXCEEDED"}, + {STATUS_PORT_ALREADY_SET, -EIO, "STATUS_PORT_ALREADY_SET"}, + {STATUS_SECTION_NOT_IMAGE, -EIO, "STATUS_SECTION_NOT_IMAGE"}, + {STATUS_SUSPEND_COUNT_EXCEEDED, -EIO, "STATUS_SUSPEND_COUNT_EXCEEDED"}, + {STATUS_THREAD_IS_TERMINATING, -EIO, "STATUS_THREAD_IS_TERMINATING"}, + {STATUS_BAD_WORKING_SET_LIMIT, -EIO, "STATUS_BAD_WORKING_SET_LIMIT"}, + {STATUS_INCOMPATIBLE_FILE_MAP, -EIO, "STATUS_INCOMPATIBLE_FILE_MAP"}, + {STATUS_SECTION_PROTECTION, -EIO, "STATUS_SECTION_PROTECTION"}, + {STATUS_EAS_NOT_SUPPORTED, -EOPNOTSUPP, "STATUS_EAS_NOT_SUPPORTED"}, + {STATUS_EA_TOO_LARGE, -EIO, "STATUS_EA_TOO_LARGE"}, + {STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"}, + {STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"}, + {STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"}, + {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"}, + {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"}, + {STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"}, + {STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS, + "STATUS_CTL_FILE_NOT_SUPPORTED"}, + {STATUS_UNKNOWN_REVISION, -EIO, "STATUS_UNKNOWN_REVISION"}, + {STATUS_REVISION_MISMATCH, -EIO, "STATUS_REVISION_MISMATCH"}, + {STATUS_INVALID_OWNER, -EIO, "STATUS_INVALID_OWNER"}, + {STATUS_INVALID_PRIMARY_GROUP, -EIO, "STATUS_INVALID_PRIMARY_GROUP"}, + {STATUS_NO_IMPERSONATION_TOKEN, -EIO, "STATUS_NO_IMPERSONATION_TOKEN"}, + {STATUS_CANT_DISABLE_MANDATORY, -EIO, "STATUS_CANT_DISABLE_MANDATORY"}, + {STATUS_NO_LOGON_SERVERS, -EIO, "STATUS_NO_LOGON_SERVERS"}, + {STATUS_NO_SUCH_LOGON_SESSION, -EIO, "STATUS_NO_SUCH_LOGON_SESSION"}, + {STATUS_NO_SUCH_PRIVILEGE, -EIO, "STATUS_NO_SUCH_PRIVILEGE"}, + {STATUS_PRIVILEGE_NOT_HELD, -EIO, "STATUS_PRIVILEGE_NOT_HELD"}, + {STATUS_INVALID_ACCOUNT_NAME, -EIO, "STATUS_INVALID_ACCOUNT_NAME"}, + {STATUS_USER_EXISTS, -EIO, "STATUS_USER_EXISTS"}, + {STATUS_NO_SUCH_USER, -EIO, "STATUS_NO_SUCH_USER"}, + {STATUS_GROUP_EXISTS, -EIO, "STATUS_GROUP_EXISTS"}, + {STATUS_NO_SUCH_GROUP, -EIO, "STATUS_NO_SUCH_GROUP"}, + {STATUS_MEMBER_IN_GROUP, -EIO, "STATUS_MEMBER_IN_GROUP"}, + {STATUS_MEMBER_NOT_IN_GROUP, -EIO, "STATUS_MEMBER_NOT_IN_GROUP"}, + {STATUS_LAST_ADMIN, -EIO, "STATUS_LAST_ADMIN"}, + {STATUS_WRONG_PASSWORD, -EACCES, "STATUS_WRONG_PASSWORD"}, + {STATUS_ILL_FORMED_PASSWORD, -EINVAL, "STATUS_ILL_FORMED_PASSWORD"}, + {STATUS_PASSWORD_RESTRICTION, -EACCES, "STATUS_PASSWORD_RESTRICTION"}, + {STATUS_LOGON_FAILURE, -EACCES, "STATUS_LOGON_FAILURE"}, + {STATUS_ACCOUNT_RESTRICTION, -EACCES, "STATUS_ACCOUNT_RESTRICTION"}, + {STATUS_INVALID_LOGON_HOURS, -EACCES, "STATUS_INVALID_LOGON_HOURS"}, + {STATUS_INVALID_WORKSTATION, -EACCES, "STATUS_INVALID_WORKSTATION"}, + {STATUS_PASSWORD_EXPIRED, -EKEYEXPIRED, "STATUS_PASSWORD_EXPIRED"}, + {STATUS_ACCOUNT_DISABLED, -EKEYREVOKED, "STATUS_ACCOUNT_DISABLED"}, + {STATUS_NONE_MAPPED, -EIO, "STATUS_NONE_MAPPED"}, + {STATUS_TOO_MANY_LUIDS_REQUESTED, -EIO, + "STATUS_TOO_MANY_LUIDS_REQUESTED"}, + {STATUS_LUIDS_EXHAUSTED, -EIO, "STATUS_LUIDS_EXHAUSTED"}, + {STATUS_INVALID_SUB_AUTHORITY, -EIO, "STATUS_INVALID_SUB_AUTHORITY"}, + {STATUS_INVALID_ACL, -EIO, "STATUS_INVALID_ACL"}, + {STATUS_INVALID_SID, -EIO, "STATUS_INVALID_SID"}, + {STATUS_INVALID_SECURITY_DESCR, -EIO, "STATUS_INVALID_SECURITY_DESCR"}, + {STATUS_PROCEDURE_NOT_FOUND, -EIO, "STATUS_PROCEDURE_NOT_FOUND"}, + {STATUS_INVALID_IMAGE_FORMAT, -EIO, "STATUS_INVALID_IMAGE_FORMAT"}, + {STATUS_NO_TOKEN, -EIO, "STATUS_NO_TOKEN"}, + {STATUS_BAD_INHERITANCE_ACL, -EIO, "STATUS_BAD_INHERITANCE_ACL"}, + {STATUS_RANGE_NOT_LOCKED, -EIO, "STATUS_RANGE_NOT_LOCKED"}, + {STATUS_DISK_FULL, -ENOSPC, "STATUS_DISK_FULL"}, + {STATUS_SERVER_DISABLED, -EIO, "STATUS_SERVER_DISABLED"}, + {STATUS_SERVER_NOT_DISABLED, -EIO, "STATUS_SERVER_NOT_DISABLED"}, + {STATUS_TOO_MANY_GUIDS_REQUESTED, -EIO, + "STATUS_TOO_MANY_GUIDS_REQUESTED"}, + {STATUS_GUIDS_EXHAUSTED, -EIO, "STATUS_GUIDS_EXHAUSTED"}, + {STATUS_INVALID_ID_AUTHORITY, -EIO, "STATUS_INVALID_ID_AUTHORITY"}, + {STATUS_AGENTS_EXHAUSTED, -EIO, "STATUS_AGENTS_EXHAUSTED"}, + {STATUS_INVALID_VOLUME_LABEL, -EIO, "STATUS_INVALID_VOLUME_LABEL"}, + {STATUS_SECTION_NOT_EXTENDED, -EIO, "STATUS_SECTION_NOT_EXTENDED"}, + {STATUS_NOT_MAPPED_DATA, -EIO, "STATUS_NOT_MAPPED_DATA"}, + {STATUS_RESOURCE_DATA_NOT_FOUND, -EIO, + "STATUS_RESOURCE_DATA_NOT_FOUND"}, + {STATUS_RESOURCE_TYPE_NOT_FOUND, -EIO, + "STATUS_RESOURCE_TYPE_NOT_FOUND"}, + {STATUS_RESOURCE_NAME_NOT_FOUND, -EIO, + "STATUS_RESOURCE_NAME_NOT_FOUND"}, + {STATUS_ARRAY_BOUNDS_EXCEEDED, -EIO, "STATUS_ARRAY_BOUNDS_EXCEEDED"}, + {STATUS_FLOAT_DENORMAL_OPERAND, -EIO, "STATUS_FLOAT_DENORMAL_OPERAND"}, + {STATUS_FLOAT_DIVIDE_BY_ZERO, -EIO, "STATUS_FLOAT_DIVIDE_BY_ZERO"}, + {STATUS_FLOAT_INEXACT_RESULT, -EIO, "STATUS_FLOAT_INEXACT_RESULT"}, + {STATUS_FLOAT_INVALID_OPERATION, -EIO, + "STATUS_FLOAT_INVALID_OPERATION"}, + {STATUS_FLOAT_OVERFLOW, -EIO, "STATUS_FLOAT_OVERFLOW"}, + {STATUS_FLOAT_STACK_CHECK, -EIO, "STATUS_FLOAT_STACK_CHECK"}, + {STATUS_FLOAT_UNDERFLOW, -EIO, "STATUS_FLOAT_UNDERFLOW"}, + {STATUS_INTEGER_DIVIDE_BY_ZERO, -EIO, "STATUS_INTEGER_DIVIDE_BY_ZERO"}, + {STATUS_INTEGER_OVERFLOW, -EIO, "STATUS_INTEGER_OVERFLOW"}, + {STATUS_PRIVILEGED_INSTRUCTION, -EIO, "STATUS_PRIVILEGED_INSTRUCTION"}, + {STATUS_TOO_MANY_PAGING_FILES, -EIO, "STATUS_TOO_MANY_PAGING_FILES"}, + {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, + {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, + "STATUS_ALLOTTED_SPACE_EXCEEDED"}, + {STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"}, + {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, + {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, + {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, + {STATUS_DEVICE_POWER_FAILURE, -EIO, "STATUS_DEVICE_POWER_FAILURE"}, + {STATUS_FREE_VM_NOT_AT_BASE, -EIO, "STATUS_FREE_VM_NOT_AT_BASE"}, + {STATUS_MEMORY_NOT_ALLOCATED, -EFAULT, "STATUS_MEMORY_NOT_ALLOCATED"}, + {STATUS_WORKING_SET_QUOTA, -EIO, "STATUS_WORKING_SET_QUOTA"}, + {STATUS_MEDIA_WRITE_PROTECTED, -EROFS, "STATUS_MEDIA_WRITE_PROTECTED"}, + {STATUS_DEVICE_NOT_READY, -EIO, "STATUS_DEVICE_NOT_READY"}, + {STATUS_INVALID_GROUP_ATTRIBUTES, -EIO, + "STATUS_INVALID_GROUP_ATTRIBUTES"}, + {STATUS_BAD_IMPERSONATION_LEVEL, -EIO, + "STATUS_BAD_IMPERSONATION_LEVEL"}, + {STATUS_CANT_OPEN_ANONYMOUS, -EIO, "STATUS_CANT_OPEN_ANONYMOUS"}, + {STATUS_BAD_VALIDATION_CLASS, -EIO, "STATUS_BAD_VALIDATION_CLASS"}, + {STATUS_BAD_TOKEN_TYPE, -EIO, "STATUS_BAD_TOKEN_TYPE"}, + {STATUS_BAD_MASTER_BOOT_RECORD, -EIO, "STATUS_BAD_MASTER_BOOT_RECORD"}, + {STATUS_INSTRUCTION_MISALIGNMENT, -EIO, + "STATUS_INSTRUCTION_MISALIGNMENT"}, + {STATUS_INSTANCE_NOT_AVAILABLE, -EIO, "STATUS_INSTANCE_NOT_AVAILABLE"}, + {STATUS_PIPE_NOT_AVAILABLE, -EIO, "STATUS_PIPE_NOT_AVAILABLE"}, + {STATUS_INVALID_PIPE_STATE, -EIO, "STATUS_INVALID_PIPE_STATE"}, + {STATUS_PIPE_BUSY, -EBUSY, "STATUS_PIPE_BUSY"}, + {STATUS_ILLEGAL_FUNCTION, -EIO, "STATUS_ILLEGAL_FUNCTION"}, + {STATUS_PIPE_DISCONNECTED, -EPIPE, "STATUS_PIPE_DISCONNECTED"}, + {STATUS_PIPE_CLOSING, -EIO, "STATUS_PIPE_CLOSING"}, + {STATUS_PIPE_CONNECTED, -EIO, "STATUS_PIPE_CONNECTED"}, + {STATUS_PIPE_LISTENING, -EIO, "STATUS_PIPE_LISTENING"}, + {STATUS_INVALID_READ_MODE, -EIO, "STATUS_INVALID_READ_MODE"}, + {STATUS_IO_TIMEOUT, -ETIMEDOUT, "STATUS_IO_TIMEOUT"}, + {STATUS_FILE_FORCED_CLOSED, -EIO, "STATUS_FILE_FORCED_CLOSED"}, + {STATUS_PROFILING_NOT_STARTED, -EIO, "STATUS_PROFILING_NOT_STARTED"}, + {STATUS_PROFILING_NOT_STOPPED, -EIO, "STATUS_PROFILING_NOT_STOPPED"}, + {STATUS_COULD_NOT_INTERPRET, -EIO, "STATUS_COULD_NOT_INTERPRET"}, + {STATUS_FILE_IS_A_DIRECTORY, -EISDIR, "STATUS_FILE_IS_A_DIRECTORY"}, + {STATUS_NOT_SUPPORTED, -EOPNOTSUPP, "STATUS_NOT_SUPPORTED"}, + {STATUS_REMOTE_NOT_LISTENING, -EHOSTDOWN, + "STATUS_REMOTE_NOT_LISTENING"}, + {STATUS_DUPLICATE_NAME, -ENOTUNIQ, "STATUS_DUPLICATE_NAME"}, + {STATUS_BAD_NETWORK_PATH, -EINVAL, "STATUS_BAD_NETWORK_PATH"}, + {STATUS_NETWORK_BUSY, -EBUSY, "STATUS_NETWORK_BUSY"}, + {STATUS_DEVICE_DOES_NOT_EXIST, -ENODEV, "STATUS_DEVICE_DOES_NOT_EXIST"}, + {STATUS_TOO_MANY_COMMANDS, -EIO, "STATUS_TOO_MANY_COMMANDS"}, + {STATUS_ADAPTER_HARDWARE_ERROR, -EIO, "STATUS_ADAPTER_HARDWARE_ERROR"}, + {STATUS_INVALID_NETWORK_RESPONSE, -EIO, + "STATUS_INVALID_NETWORK_RESPONSE"}, + {STATUS_UNEXPECTED_NETWORK_ERROR, -EIO, + "STATUS_UNEXPECTED_NETWORK_ERROR"}, + {STATUS_BAD_REMOTE_ADAPTER, -EIO, "STATUS_BAD_REMOTE_ADAPTER"}, + {STATUS_PRINT_QUEUE_FULL, -EIO, "STATUS_PRINT_QUEUE_FULL"}, + {STATUS_NO_SPOOL_SPACE, -EIO, "STATUS_NO_SPOOL_SPACE"}, + {STATUS_PRINT_CANCELLED, -EIO, "STATUS_PRINT_CANCELLED"}, + {STATUS_NETWORK_NAME_DELETED, -EIO, "STATUS_NETWORK_NAME_DELETED"}, + {STATUS_NETWORK_ACCESS_DENIED, -EACCES, "STATUS_NETWORK_ACCESS_DENIED"}, + {STATUS_BAD_DEVICE_TYPE, -EIO, "STATUS_BAD_DEVICE_TYPE"}, + {STATUS_BAD_NETWORK_NAME, -ENOENT, "STATUS_BAD_NETWORK_NAME"}, + {STATUS_TOO_MANY_NAMES, -EIO, "STATUS_TOO_MANY_NAMES"}, + {STATUS_TOO_MANY_SESSIONS, -EIO, "STATUS_TOO_MANY_SESSIONS"}, + {STATUS_SHARING_PAUSED, -EIO, "STATUS_SHARING_PAUSED"}, + {STATUS_REQUEST_NOT_ACCEPTED, -EIO, "STATUS_REQUEST_NOT_ACCEPTED"}, + {STATUS_REDIRECTOR_PAUSED, -EIO, "STATUS_REDIRECTOR_PAUSED"}, + {STATUS_NET_WRITE_FAULT, -EIO, "STATUS_NET_WRITE_FAULT"}, + {STATUS_PROFILING_AT_LIMIT, -EIO, "STATUS_PROFILING_AT_LIMIT"}, + {STATUS_NOT_SAME_DEVICE, -EXDEV, "STATUS_NOT_SAME_DEVICE"}, + {STATUS_FILE_RENAMED, -EIO, "STATUS_FILE_RENAMED"}, + {STATUS_VIRTUAL_CIRCUIT_CLOSED, -EIO, "STATUS_VIRTUAL_CIRCUIT_CLOSED"}, + {STATUS_NO_SECURITY_ON_OBJECT, -EIO, "STATUS_NO_SECURITY_ON_OBJECT"}, + {STATUS_CANT_WAIT, -EIO, "STATUS_CANT_WAIT"}, + {STATUS_PIPE_EMPTY, -EIO, "STATUS_PIPE_EMPTY"}, + {STATUS_CANT_ACCESS_DOMAIN_INFO, -EIO, + "STATUS_CANT_ACCESS_DOMAIN_INFO"}, + {STATUS_CANT_TERMINATE_SELF, -EIO, "STATUS_CANT_TERMINATE_SELF"}, + {STATUS_INVALID_SERVER_STATE, -EIO, "STATUS_INVALID_SERVER_STATE"}, + {STATUS_INVALID_DOMAIN_STATE, -EIO, "STATUS_INVALID_DOMAIN_STATE"}, + {STATUS_INVALID_DOMAIN_ROLE, -EIO, "STATUS_INVALID_DOMAIN_ROLE"}, + {STATUS_NO_SUCH_DOMAIN, -EIO, "STATUS_NO_SUCH_DOMAIN"}, + {STATUS_DOMAIN_EXISTS, -EIO, "STATUS_DOMAIN_EXISTS"}, + {STATUS_DOMAIN_LIMIT_EXCEEDED, -EIO, "STATUS_DOMAIN_LIMIT_EXCEEDED"}, + {STATUS_OPLOCK_NOT_GRANTED, -EIO, "STATUS_OPLOCK_NOT_GRANTED"}, + {STATUS_INVALID_OPLOCK_PROTOCOL, -EIO, + "STATUS_INVALID_OPLOCK_PROTOCOL"}, + {STATUS_INTERNAL_DB_CORRUPTION, -EIO, "STATUS_INTERNAL_DB_CORRUPTION"}, + {STATUS_INTERNAL_ERROR, -EIO, "STATUS_INTERNAL_ERROR"}, + {STATUS_GENERIC_NOT_MAPPED, -EIO, "STATUS_GENERIC_NOT_MAPPED"}, + {STATUS_BAD_DESCRIPTOR_FORMAT, -EIO, "STATUS_BAD_DESCRIPTOR_FORMAT"}, + {STATUS_INVALID_USER_BUFFER, -EIO, "STATUS_INVALID_USER_BUFFER"}, + {STATUS_UNEXPECTED_IO_ERROR, -EIO, "STATUS_UNEXPECTED_IO_ERROR"}, + {STATUS_UNEXPECTED_MM_CREATE_ERR, -EIO, + "STATUS_UNEXPECTED_MM_CREATE_ERR"}, + {STATUS_UNEXPECTED_MM_MAP_ERROR, -EIO, + "STATUS_UNEXPECTED_MM_MAP_ERROR"}, + {STATUS_UNEXPECTED_MM_EXTEND_ERR, -EIO, + "STATUS_UNEXPECTED_MM_EXTEND_ERR"}, + {STATUS_NOT_LOGON_PROCESS, -EIO, "STATUS_NOT_LOGON_PROCESS"}, + {STATUS_LOGON_SESSION_EXISTS, -EIO, "STATUS_LOGON_SESSION_EXISTS"}, + {STATUS_INVALID_PARAMETER_1, -EINVAL, "STATUS_INVALID_PARAMETER_1"}, + {STATUS_INVALID_PARAMETER_2, -EINVAL, "STATUS_INVALID_PARAMETER_2"}, + {STATUS_INVALID_PARAMETER_3, -EINVAL, "STATUS_INVALID_PARAMETER_3"}, + {STATUS_INVALID_PARAMETER_4, -EINVAL, "STATUS_INVALID_PARAMETER_4"}, + {STATUS_INVALID_PARAMETER_5, -EINVAL, "STATUS_INVALID_PARAMETER_5"}, + {STATUS_INVALID_PARAMETER_6, -EINVAL, "STATUS_INVALID_PARAMETER_6"}, + {STATUS_INVALID_PARAMETER_7, -EINVAL, "STATUS_INVALID_PARAMETER_7"}, + {STATUS_INVALID_PARAMETER_8, -EINVAL, "STATUS_INVALID_PARAMETER_8"}, + {STATUS_INVALID_PARAMETER_9, -EINVAL, "STATUS_INVALID_PARAMETER_9"}, + {STATUS_INVALID_PARAMETER_10, -EINVAL, "STATUS_INVALID_PARAMETER_10"}, + {STATUS_INVALID_PARAMETER_11, -EINVAL, "STATUS_INVALID_PARAMETER_11"}, + {STATUS_INVALID_PARAMETER_12, -EINVAL, "STATUS_INVALID_PARAMETER_12"}, + {STATUS_REDIRECTOR_NOT_STARTED, -EIO, "STATUS_REDIRECTOR_NOT_STARTED"}, + {STATUS_REDIRECTOR_STARTED, -EIO, "STATUS_REDIRECTOR_STARTED"}, + {STATUS_STACK_OVERFLOW, -EIO, "STATUS_STACK_OVERFLOW"}, + {STATUS_NO_SUCH_PACKAGE, -EIO, "STATUS_NO_SUCH_PACKAGE"}, + {STATUS_BAD_FUNCTION_TABLE, -EIO, "STATUS_BAD_FUNCTION_TABLE"}, + {STATUS_VARIABLE_NOT_FOUND, -EIO, "STATUS_VARIABLE_NOT_FOUND"}, + {STATUS_DIRECTORY_NOT_EMPTY, -ENOTEMPTY, "STATUS_DIRECTORY_NOT_EMPTY"}, + {STATUS_FILE_CORRUPT_ERROR, -EIO, "STATUS_FILE_CORRUPT_ERROR"}, + {STATUS_NOT_A_DIRECTORY, -ENOTDIR, "STATUS_NOT_A_DIRECTORY"}, + {STATUS_BAD_LOGON_SESSION_STATE, -EIO, + "STATUS_BAD_LOGON_SESSION_STATE"}, + {STATUS_LOGON_SESSION_COLLISION, -EIO, + "STATUS_LOGON_SESSION_COLLISION"}, + {STATUS_NAME_TOO_LONG, -ENAMETOOLONG, "STATUS_NAME_TOO_LONG"}, + {STATUS_FILES_OPEN, -EIO, "STATUS_FILES_OPEN"}, + {STATUS_CONNECTION_IN_USE, -EIO, "STATUS_CONNECTION_IN_USE"}, + {STATUS_MESSAGE_NOT_FOUND, -EIO, "STATUS_MESSAGE_NOT_FOUND"}, + {STATUS_PROCESS_IS_TERMINATING, -EIO, "STATUS_PROCESS_IS_TERMINATING"}, + {STATUS_INVALID_LOGON_TYPE, -EIO, "STATUS_INVALID_LOGON_TYPE"}, + {STATUS_NO_GUID_TRANSLATION, -EIO, "STATUS_NO_GUID_TRANSLATION"}, + {STATUS_CANNOT_IMPERSONATE, -EIO, "STATUS_CANNOT_IMPERSONATE"}, + {STATUS_IMAGE_ALREADY_LOADED, -EIO, "STATUS_IMAGE_ALREADY_LOADED"}, + {STATUS_ABIOS_NOT_PRESENT, -EIO, "STATUS_ABIOS_NOT_PRESENT"}, + {STATUS_ABIOS_LID_NOT_EXIST, -EIO, "STATUS_ABIOS_LID_NOT_EXIST"}, + {STATUS_ABIOS_LID_ALREADY_OWNED, -EIO, + "STATUS_ABIOS_LID_ALREADY_OWNED"}, + {STATUS_ABIOS_NOT_LID_OWNER, -EIO, "STATUS_ABIOS_NOT_LID_OWNER"}, + {STATUS_ABIOS_INVALID_COMMAND, -EIO, "STATUS_ABIOS_INVALID_COMMAND"}, + {STATUS_ABIOS_INVALID_LID, -EIO, "STATUS_ABIOS_INVALID_LID"}, + {STATUS_ABIOS_SELECTOR_NOT_AVAILABLE, -EIO, + "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE"}, + {STATUS_ABIOS_INVALID_SELECTOR, -EIO, "STATUS_ABIOS_INVALID_SELECTOR"}, + {STATUS_NO_LDT, -EIO, "STATUS_NO_LDT"}, + {STATUS_INVALID_LDT_SIZE, -EIO, "STATUS_INVALID_LDT_SIZE"}, + {STATUS_INVALID_LDT_OFFSET, -EIO, "STATUS_INVALID_LDT_OFFSET"}, + {STATUS_INVALID_LDT_DESCRIPTOR, -EIO, "STATUS_INVALID_LDT_DESCRIPTOR"}, + {STATUS_INVALID_IMAGE_NE_FORMAT, -EIO, + "STATUS_INVALID_IMAGE_NE_FORMAT"}, + {STATUS_RXACT_INVALID_STATE, -EIO, "STATUS_RXACT_INVALID_STATE"}, + {STATUS_RXACT_COMMIT_FAILURE, -EIO, "STATUS_RXACT_COMMIT_FAILURE"}, + {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, + {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, + {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, + {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, + {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, + {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, + {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, + {STATUS_SPECIAL_GROUP, -EIO, "STATUS_SPECIAL_GROUP"}, + {STATUS_SPECIAL_USER, -EIO, "STATUS_SPECIAL_USER"}, + {STATUS_MEMBERS_PRIMARY_GROUP, -EIO, "STATUS_MEMBERS_PRIMARY_GROUP"}, + {STATUS_FILE_CLOSED, -EBADF, "STATUS_FILE_CLOSED"}, + {STATUS_TOO_MANY_THREADS, -EIO, "STATUS_TOO_MANY_THREADS"}, + {STATUS_THREAD_NOT_IN_PROCESS, -EIO, "STATUS_THREAD_NOT_IN_PROCESS"}, + {STATUS_TOKEN_ALREADY_IN_USE, -EIO, "STATUS_TOKEN_ALREADY_IN_USE"}, + {STATUS_PAGEFILE_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_PAGEFILE_QUOTA_EXCEEDED"}, + {STATUS_COMMITMENT_LIMIT, -EIO, "STATUS_COMMITMENT_LIMIT"}, + {STATUS_INVALID_IMAGE_LE_FORMAT, -EIO, + "STATUS_INVALID_IMAGE_LE_FORMAT"}, + {STATUS_INVALID_IMAGE_NOT_MZ, -EIO, "STATUS_INVALID_IMAGE_NOT_MZ"}, + {STATUS_INVALID_IMAGE_PROTECT, -EIO, "STATUS_INVALID_IMAGE_PROTECT"}, + {STATUS_INVALID_IMAGE_WIN_16, -EIO, "STATUS_INVALID_IMAGE_WIN_16"}, + {STATUS_LOGON_SERVER_CONFLICT, -EIO, "STATUS_LOGON_SERVER_CONFLICT"}, + {STATUS_TIME_DIFFERENCE_AT_DC, -EIO, "STATUS_TIME_DIFFERENCE_AT_DC"}, + {STATUS_SYNCHRONIZATION_REQUIRED, -EIO, + "STATUS_SYNCHRONIZATION_REQUIRED"}, + {STATUS_DLL_NOT_FOUND, -ENOENT, "STATUS_DLL_NOT_FOUND"}, + {STATUS_OPEN_FAILED, -EIO, "STATUS_OPEN_FAILED"}, + {STATUS_IO_PRIVILEGE_FAILED, -EIO, "STATUS_IO_PRIVILEGE_FAILED"}, + {STATUS_ORDINAL_NOT_FOUND, -EIO, "STATUS_ORDINAL_NOT_FOUND"}, + {STATUS_ENTRYPOINT_NOT_FOUND, -EIO, "STATUS_ENTRYPOINT_NOT_FOUND"}, + {STATUS_CONTROL_C_EXIT, -EIO, "STATUS_CONTROL_C_EXIT"}, + {STATUS_LOCAL_DISCONNECT, -EIO, "STATUS_LOCAL_DISCONNECT"}, + {STATUS_REMOTE_DISCONNECT, -ESHUTDOWN, "STATUS_REMOTE_DISCONNECT"}, + {STATUS_REMOTE_RESOURCES, -EIO, "STATUS_REMOTE_RESOURCES"}, + {STATUS_LINK_FAILED, -EXDEV, "STATUS_LINK_FAILED"}, + {STATUS_LINK_TIMEOUT, -ETIMEDOUT, "STATUS_LINK_TIMEOUT"}, + {STATUS_INVALID_CONNECTION, -EIO, "STATUS_INVALID_CONNECTION"}, + {STATUS_INVALID_ADDRESS, -EIO, "STATUS_INVALID_ADDRESS"}, + {STATUS_DLL_INIT_FAILED, -EIO, "STATUS_DLL_INIT_FAILED"}, + {STATUS_MISSING_SYSTEMFILE, -EIO, "STATUS_MISSING_SYSTEMFILE"}, + {STATUS_UNHANDLED_EXCEPTION, -EIO, "STATUS_UNHANDLED_EXCEPTION"}, + {STATUS_APP_INIT_FAILURE, -EIO, "STATUS_APP_INIT_FAILURE"}, + {STATUS_PAGEFILE_CREATE_FAILED, -EIO, "STATUS_PAGEFILE_CREATE_FAILED"}, + {STATUS_NO_PAGEFILE, -EIO, "STATUS_NO_PAGEFILE"}, + {STATUS_INVALID_LEVEL, -EIO, "STATUS_INVALID_LEVEL"}, + {STATUS_WRONG_PASSWORD_CORE, -EIO, "STATUS_WRONG_PASSWORD_CORE"}, + {STATUS_ILLEGAL_FLOAT_CONTEXT, -EIO, "STATUS_ILLEGAL_FLOAT_CONTEXT"}, + {STATUS_PIPE_BROKEN, -EPIPE, "STATUS_PIPE_BROKEN"}, + {STATUS_REGISTRY_CORRUPT, -EIO, "STATUS_REGISTRY_CORRUPT"}, + {STATUS_REGISTRY_IO_FAILED, -EIO, "STATUS_REGISTRY_IO_FAILED"}, + {STATUS_NO_EVENT_PAIR, -EIO, "STATUS_NO_EVENT_PAIR"}, + {STATUS_UNRECOGNIZED_VOLUME, -EIO, "STATUS_UNRECOGNIZED_VOLUME"}, + {STATUS_SERIAL_NO_DEVICE_INITED, -EIO, + "STATUS_SERIAL_NO_DEVICE_INITED"}, + {STATUS_NO_SUCH_ALIAS, -EIO, "STATUS_NO_SUCH_ALIAS"}, + {STATUS_MEMBER_NOT_IN_ALIAS, -EIO, "STATUS_MEMBER_NOT_IN_ALIAS"}, + {STATUS_MEMBER_IN_ALIAS, -EIO, "STATUS_MEMBER_IN_ALIAS"}, + {STATUS_ALIAS_EXISTS, -EIO, "STATUS_ALIAS_EXISTS"}, + {STATUS_LOGON_NOT_GRANTED, -EIO, "STATUS_LOGON_NOT_GRANTED"}, + {STATUS_TOO_MANY_SECRETS, -EIO, "STATUS_TOO_MANY_SECRETS"}, + {STATUS_SECRET_TOO_LONG, -EIO, "STATUS_SECRET_TOO_LONG"}, + {STATUS_INTERNAL_DB_ERROR, -EIO, "STATUS_INTERNAL_DB_ERROR"}, + {STATUS_FULLSCREEN_MODE, -EIO, "STATUS_FULLSCREEN_MODE"}, + {STATUS_TOO_MANY_CONTEXT_IDS, -EIO, "STATUS_TOO_MANY_CONTEXT_IDS"}, + {STATUS_LOGON_TYPE_NOT_GRANTED, -EIO, "STATUS_LOGON_TYPE_NOT_GRANTED"}, + {STATUS_NOT_REGISTRY_FILE, -EIO, "STATUS_NOT_REGISTRY_FILE"}, + {STATUS_NT_CROSS_ENCRYPTION_REQUIRED, -EIO, + "STATUS_NT_CROSS_ENCRYPTION_REQUIRED"}, + {STATUS_DOMAIN_CTRLR_CONFIG_ERROR, -EIO, + "STATUS_DOMAIN_CTRLR_CONFIG_ERROR"}, + {STATUS_FT_MISSING_MEMBER, -EIO, "STATUS_FT_MISSING_MEMBER"}, + {STATUS_ILL_FORMED_SERVICE_ENTRY, -EIO, + "STATUS_ILL_FORMED_SERVICE_ENTRY"}, + {STATUS_ILLEGAL_CHARACTER, -EIO, "STATUS_ILLEGAL_CHARACTER"}, + {STATUS_UNMAPPABLE_CHARACTER, -EIO, "STATUS_UNMAPPABLE_CHARACTER"}, + {STATUS_UNDEFINED_CHARACTER, -EIO, "STATUS_UNDEFINED_CHARACTER"}, + {STATUS_FLOPPY_VOLUME, -EIO, "STATUS_FLOPPY_VOLUME"}, + {STATUS_FLOPPY_ID_MARK_NOT_FOUND, -EIO, + "STATUS_FLOPPY_ID_MARK_NOT_FOUND"}, + {STATUS_FLOPPY_WRONG_CYLINDER, -EIO, "STATUS_FLOPPY_WRONG_CYLINDER"}, + {STATUS_FLOPPY_UNKNOWN_ERROR, -EIO, "STATUS_FLOPPY_UNKNOWN_ERROR"}, + {STATUS_FLOPPY_BAD_REGISTERS, -EIO, "STATUS_FLOPPY_BAD_REGISTERS"}, + {STATUS_DISK_RECALIBRATE_FAILED, -EIO, + "STATUS_DISK_RECALIBRATE_FAILED"}, + {STATUS_DISK_OPERATION_FAILED, -EIO, "STATUS_DISK_OPERATION_FAILED"}, + {STATUS_DISK_RESET_FAILED, -EIO, "STATUS_DISK_RESET_FAILED"}, + {STATUS_SHARED_IRQ_BUSY, -EBUSY, "STATUS_SHARED_IRQ_BUSY"}, + {STATUS_FT_ORPHANING, -EIO, "STATUS_FT_ORPHANING"}, + {STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT, -EIO, + "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT"}, + {STATUS_PARTITION_FAILURE, -EIO, "STATUS_PARTITION_FAILURE"}, + {STATUS_INVALID_BLOCK_LENGTH, -EIO, "STATUS_INVALID_BLOCK_LENGTH"}, + {STATUS_DEVICE_NOT_PARTITIONED, -EIO, "STATUS_DEVICE_NOT_PARTITIONED"}, + {STATUS_UNABLE_TO_LOCK_MEDIA, -EIO, "STATUS_UNABLE_TO_LOCK_MEDIA"}, + {STATUS_UNABLE_TO_UNLOAD_MEDIA, -EIO, "STATUS_UNABLE_TO_UNLOAD_MEDIA"}, + {STATUS_EOM_OVERFLOW, -EIO, "STATUS_EOM_OVERFLOW"}, + {STATUS_NO_MEDIA, -EIO, "STATUS_NO_MEDIA"}, + {STATUS_NO_SUCH_MEMBER, -EIO, "STATUS_NO_SUCH_MEMBER"}, + {STATUS_INVALID_MEMBER, -EIO, "STATUS_INVALID_MEMBER"}, + {STATUS_KEY_DELETED, -EIO, "STATUS_KEY_DELETED"}, + {STATUS_NO_LOG_SPACE, -EIO, "STATUS_NO_LOG_SPACE"}, + {STATUS_TOO_MANY_SIDS, -EIO, "STATUS_TOO_MANY_SIDS"}, + {STATUS_LM_CROSS_ENCRYPTION_REQUIRED, -EIO, + "STATUS_LM_CROSS_ENCRYPTION_REQUIRED"}, + {STATUS_KEY_HAS_CHILDREN, -EIO, "STATUS_KEY_HAS_CHILDREN"}, + {STATUS_CHILD_MUST_BE_VOLATILE, -EIO, "STATUS_CHILD_MUST_BE_VOLATILE"}, + {STATUS_DEVICE_CONFIGURATION_ERROR, -EIO, + "STATUS_DEVICE_CONFIGURATION_ERROR"}, + {STATUS_DRIVER_INTERNAL_ERROR, -EIO, "STATUS_DRIVER_INTERNAL_ERROR"}, + {STATUS_INVALID_DEVICE_STATE, -EIO, "STATUS_INVALID_DEVICE_STATE"}, + {STATUS_IO_DEVICE_ERROR, -EIO, "STATUS_IO_DEVICE_ERROR"}, + {STATUS_DEVICE_PROTOCOL_ERROR, -EIO, "STATUS_DEVICE_PROTOCOL_ERROR"}, + {STATUS_BACKUP_CONTROLLER, -EIO, "STATUS_BACKUP_CONTROLLER"}, + {STATUS_LOG_FILE_FULL, -EIO, "STATUS_LOG_FILE_FULL"}, + {STATUS_TOO_LATE, -EIO, "STATUS_TOO_LATE"}, + {STATUS_NO_TRUST_LSA_SECRET, -EIO, "STATUS_NO_TRUST_LSA_SECRET"}, + {STATUS_NO_TRUST_SAM_ACCOUNT, -EIO, "STATUS_NO_TRUST_SAM_ACCOUNT"}, + {STATUS_TRUSTED_DOMAIN_FAILURE, -EIO, "STATUS_TRUSTED_DOMAIN_FAILURE"}, + {STATUS_TRUSTED_RELATIONSHIP_FAILURE, -EIO, + "STATUS_TRUSTED_RELATIONSHIP_FAILURE"}, + {STATUS_EVENTLOG_FILE_CORRUPT, -EIO, "STATUS_EVENTLOG_FILE_CORRUPT"}, + {STATUS_EVENTLOG_CANT_START, -EIO, "STATUS_EVENTLOG_CANT_START"}, + {STATUS_TRUST_FAILURE, -EIO, "STATUS_TRUST_FAILURE"}, + {STATUS_MUTANT_LIMIT_EXCEEDED, -EIO, "STATUS_MUTANT_LIMIT_EXCEEDED"}, + {STATUS_NETLOGON_NOT_STARTED, -EIO, "STATUS_NETLOGON_NOT_STARTED"}, + {STATUS_ACCOUNT_EXPIRED, -EKEYEXPIRED, "STATUS_ACCOUNT_EXPIRED"}, + {STATUS_POSSIBLE_DEADLOCK, -EIO, "STATUS_POSSIBLE_DEADLOCK"}, + {STATUS_NETWORK_CREDENTIAL_CONFLICT, -EIO, + "STATUS_NETWORK_CREDENTIAL_CONFLICT"}, + {STATUS_REMOTE_SESSION_LIMIT, -EIO, "STATUS_REMOTE_SESSION_LIMIT"}, + {STATUS_EVENTLOG_FILE_CHANGED, -EIO, "STATUS_EVENTLOG_FILE_CHANGED"}, + {STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT, -EIO, + "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT"}, + {STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT, -EIO, + "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT"}, + {STATUS_NOLOGON_SERVER_TRUST_ACCOUNT, -EIO, + "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT"}, + {STATUS_DOMAIN_TRUST_INCONSISTENT, -EIO, + "STATUS_DOMAIN_TRUST_INCONSISTENT"}, + {STATUS_FS_DRIVER_REQUIRED, -EIO, "STATUS_FS_DRIVER_REQUIRED"}, + {STATUS_IMAGE_ALREADY_LOADED_AS_DLL, -EIO, + "STATUS_IMAGE_ALREADY_LOADED_AS_DLL"}, + {STATUS_NETWORK_OPEN_RESTRICTION, -EIO, + "STATUS_NETWORK_OPEN_RESTRICTION"}, + {STATUS_NO_USER_SESSION_KEY, -EIO, "STATUS_NO_USER_SESSION_KEY"}, + {STATUS_USER_SESSION_DELETED, -EIO, "STATUS_USER_SESSION_DELETED"}, + {STATUS_RESOURCE_LANG_NOT_FOUND, -EIO, + "STATUS_RESOURCE_LANG_NOT_FOUND"}, + {STATUS_INSUFF_SERVER_RESOURCES, -EIO, + "STATUS_INSUFF_SERVER_RESOURCES"}, + {STATUS_INVALID_BUFFER_SIZE, -EIO, "STATUS_INVALID_BUFFER_SIZE"}, + {STATUS_INVALID_ADDRESS_COMPONENT, -EIO, + "STATUS_INVALID_ADDRESS_COMPONENT"}, + {STATUS_INVALID_ADDRESS_WILDCARD, -EIO, + "STATUS_INVALID_ADDRESS_WILDCARD"}, + {STATUS_TOO_MANY_ADDRESSES, -EIO, "STATUS_TOO_MANY_ADDRESSES"}, + {STATUS_ADDRESS_ALREADY_EXISTS, -EADDRINUSE, + "STATUS_ADDRESS_ALREADY_EXISTS"}, + {STATUS_ADDRESS_CLOSED, -EIO, "STATUS_ADDRESS_CLOSED"}, + {STATUS_CONNECTION_DISCONNECTED, -ECONNABORTED, + "STATUS_CONNECTION_DISCONNECTED"}, + {STATUS_CONNECTION_RESET, -ENETRESET, "STATUS_CONNECTION_RESET"}, + {STATUS_TOO_MANY_NODES, -EIO, "STATUS_TOO_MANY_NODES"}, + {STATUS_TRANSACTION_ABORTED, -EIO, "STATUS_TRANSACTION_ABORTED"}, + {STATUS_TRANSACTION_TIMED_OUT, -EIO, "STATUS_TRANSACTION_TIMED_OUT"}, + {STATUS_TRANSACTION_NO_RELEASE, -EIO, "STATUS_TRANSACTION_NO_RELEASE"}, + {STATUS_TRANSACTION_NO_MATCH, -EIO, "STATUS_TRANSACTION_NO_MATCH"}, + {STATUS_TRANSACTION_RESPONDED, -EIO, "STATUS_TRANSACTION_RESPONDED"}, + {STATUS_TRANSACTION_INVALID_ID, -EIO, "STATUS_TRANSACTION_INVALID_ID"}, + {STATUS_TRANSACTION_INVALID_TYPE, -EIO, + "STATUS_TRANSACTION_INVALID_TYPE"}, + {STATUS_NOT_SERVER_SESSION, -EIO, "STATUS_NOT_SERVER_SESSION"}, + {STATUS_NOT_CLIENT_SESSION, -EIO, "STATUS_NOT_CLIENT_SESSION"}, + {STATUS_CANNOT_LOAD_REGISTRY_FILE, -EIO, + "STATUS_CANNOT_LOAD_REGISTRY_FILE"}, + {STATUS_DEBUG_ATTACH_FAILED, -EIO, "STATUS_DEBUG_ATTACH_FAILED"}, + {STATUS_SYSTEM_PROCESS_TERMINATED, -EIO, + "STATUS_SYSTEM_PROCESS_TERMINATED"}, + {STATUS_DATA_NOT_ACCEPTED, -EIO, "STATUS_DATA_NOT_ACCEPTED"}, + {STATUS_NO_BROWSER_SERVERS_FOUND, -EIO, + "STATUS_NO_BROWSER_SERVERS_FOUND"}, + {STATUS_VDM_HARD_ERROR, -EIO, "STATUS_VDM_HARD_ERROR"}, + {STATUS_DRIVER_CANCEL_TIMEOUT, -EIO, "STATUS_DRIVER_CANCEL_TIMEOUT"}, + {STATUS_REPLY_MESSAGE_MISMATCH, -EIO, "STATUS_REPLY_MESSAGE_MISMATCH"}, + {STATUS_MAPPED_ALIGNMENT, -EIO, "STATUS_MAPPED_ALIGNMENT"}, + {STATUS_IMAGE_CHECKSUM_MISMATCH, -EIO, + "STATUS_IMAGE_CHECKSUM_MISMATCH"}, + {STATUS_LOST_WRITEBEHIND_DATA, -EIO, "STATUS_LOST_WRITEBEHIND_DATA"}, + {STATUS_CLIENT_SERVER_PARAMETERS_INVALID, -EIO, + "STATUS_CLIENT_SERVER_PARAMETERS_INVALID"}, + {STATUS_PASSWORD_MUST_CHANGE, -EIO, "STATUS_PASSWORD_MUST_CHANGE"}, + {STATUS_NOT_FOUND, -ENOENT, "STATUS_NOT_FOUND"}, + {STATUS_NOT_TINY_STREAM, -EIO, "STATUS_NOT_TINY_STREAM"}, + {STATUS_RECOVERY_FAILURE, -EIO, "STATUS_RECOVERY_FAILURE"}, + {STATUS_STACK_OVERFLOW_READ, -EIO, "STATUS_STACK_OVERFLOW_READ"}, + {STATUS_FAIL_CHECK, -EIO, "STATUS_FAIL_CHECK"}, + {STATUS_DUPLICATE_OBJECTID, -EIO, "STATUS_DUPLICATE_OBJECTID"}, + {STATUS_OBJECTID_EXISTS, -EIO, "STATUS_OBJECTID_EXISTS"}, + {STATUS_CONVERT_TO_LARGE, -EIO, "STATUS_CONVERT_TO_LARGE"}, + {STATUS_RETRY, -EAGAIN, "STATUS_RETRY"}, + {STATUS_FOUND_OUT_OF_SCOPE, -EIO, "STATUS_FOUND_OUT_OF_SCOPE"}, + {STATUS_ALLOCATE_BUCKET, -EIO, "STATUS_ALLOCATE_BUCKET"}, + {STATUS_PROPSET_NOT_FOUND, -EIO, "STATUS_PROPSET_NOT_FOUND"}, + {STATUS_MARSHALL_OVERFLOW, -EIO, "STATUS_MARSHALL_OVERFLOW"}, + {STATUS_INVALID_VARIANT, -EIO, "STATUS_INVALID_VARIANT"}, + {STATUS_DOMAIN_CONTROLLER_NOT_FOUND, -EIO, + "STATUS_DOMAIN_CONTROLLER_NOT_FOUND"}, + {STATUS_ACCOUNT_LOCKED_OUT, -EIO, "STATUS_ACCOUNT_LOCKED_OUT"}, + {STATUS_HANDLE_NOT_CLOSABLE, -EIO, "STATUS_HANDLE_NOT_CLOSABLE"}, + {STATUS_CONNECTION_REFUSED, -EIO, "STATUS_CONNECTION_REFUSED"}, + {STATUS_GRACEFUL_DISCONNECT, -EIO, "STATUS_GRACEFUL_DISCONNECT"}, + {STATUS_ADDRESS_ALREADY_ASSOCIATED, -EIO, + "STATUS_ADDRESS_ALREADY_ASSOCIATED"}, + {STATUS_ADDRESS_NOT_ASSOCIATED, -EIO, "STATUS_ADDRESS_NOT_ASSOCIATED"}, + {STATUS_CONNECTION_INVALID, -EIO, "STATUS_CONNECTION_INVALID"}, + {STATUS_CONNECTION_ACTIVE, -EIO, "STATUS_CONNECTION_ACTIVE"}, + {STATUS_NETWORK_UNREACHABLE, -ENETUNREACH, + "STATUS_NETWORK_UNREACHABLE"}, + {STATUS_HOST_UNREACHABLE, -EHOSTDOWN, "STATUS_HOST_UNREACHABLE"}, + {STATUS_PROTOCOL_UNREACHABLE, -ENETUNREACH, + "STATUS_PROTOCOL_UNREACHABLE"}, + {STATUS_PORT_UNREACHABLE, -ENETUNREACH, "STATUS_PORT_UNREACHABLE"}, + {STATUS_REQUEST_ABORTED, -EIO, "STATUS_REQUEST_ABORTED"}, + {STATUS_CONNECTION_ABORTED, -ECONNABORTED, "STATUS_CONNECTION_ABORTED"}, + {STATUS_BAD_COMPRESSION_BUFFER, -EIO, "STATUS_BAD_COMPRESSION_BUFFER"}, + {STATUS_USER_MAPPED_FILE, -EIO, "STATUS_USER_MAPPED_FILE"}, + {STATUS_AUDIT_FAILED, -EIO, "STATUS_AUDIT_FAILED"}, + {STATUS_TIMER_RESOLUTION_NOT_SET, -EIO, + "STATUS_TIMER_RESOLUTION_NOT_SET"}, + {STATUS_CONNECTION_COUNT_LIMIT, -EIO, "STATUS_CONNECTION_COUNT_LIMIT"}, + {STATUS_LOGIN_TIME_RESTRICTION, -EACCES, + "STATUS_LOGIN_TIME_RESTRICTION"}, + {STATUS_LOGIN_WKSTA_RESTRICTION, -EACCES, + "STATUS_LOGIN_WKSTA_RESTRICTION"}, + {STATUS_IMAGE_MP_UP_MISMATCH, -EIO, "STATUS_IMAGE_MP_UP_MISMATCH"}, + {STATUS_INSUFFICIENT_LOGON_INFO, -EIO, + "STATUS_INSUFFICIENT_LOGON_INFO"}, + {STATUS_BAD_DLL_ENTRYPOINT, -EIO, "STATUS_BAD_DLL_ENTRYPOINT"}, + {STATUS_BAD_SERVICE_ENTRYPOINT, -EIO, "STATUS_BAD_SERVICE_ENTRYPOINT"}, + {STATUS_LPC_REPLY_LOST, -EIO, "STATUS_LPC_REPLY_LOST"}, + {STATUS_IP_ADDRESS_CONFLICT1, -EIO, "STATUS_IP_ADDRESS_CONFLICT1"}, + {STATUS_IP_ADDRESS_CONFLICT2, -EIO, "STATUS_IP_ADDRESS_CONFLICT2"}, + {STATUS_REGISTRY_QUOTA_LIMIT, -EDQUOT, "STATUS_REGISTRY_QUOTA_LIMIT"}, + {STATUS_PATH_NOT_COVERED, -EREMOTE, "STATUS_PATH_NOT_COVERED"}, + {STATUS_NO_CALLBACK_ACTIVE, -EIO, "STATUS_NO_CALLBACK_ACTIVE"}, + {STATUS_LICENSE_QUOTA_EXCEEDED, -EACCES, + "STATUS_LICENSE_QUOTA_EXCEEDED"}, + {STATUS_PWD_TOO_SHORT, -EIO, "STATUS_PWD_TOO_SHORT"}, + {STATUS_PWD_TOO_RECENT, -EIO, "STATUS_PWD_TOO_RECENT"}, + {STATUS_PWD_HISTORY_CONFLICT, -EIO, "STATUS_PWD_HISTORY_CONFLICT"}, + {STATUS_PLUGPLAY_NO_DEVICE, -EIO, "STATUS_PLUGPLAY_NO_DEVICE"}, + {STATUS_UNSUPPORTED_COMPRESSION, -EIO, + "STATUS_UNSUPPORTED_COMPRESSION"}, + {STATUS_INVALID_HW_PROFILE, -EIO, "STATUS_INVALID_HW_PROFILE"}, + {STATUS_INVALID_PLUGPLAY_DEVICE_PATH, -EIO, + "STATUS_INVALID_PLUGPLAY_DEVICE_PATH"}, + {STATUS_DRIVER_ORDINAL_NOT_FOUND, -EIO, + "STATUS_DRIVER_ORDINAL_NOT_FOUND"}, + {STATUS_DRIVER_ENTRYPOINT_NOT_FOUND, -EIO, + "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND"}, + {STATUS_RESOURCE_NOT_OWNED, -EIO, "STATUS_RESOURCE_NOT_OWNED"}, + {STATUS_TOO_MANY_LINKS, -EMLINK, "STATUS_TOO_MANY_LINKS"}, + {STATUS_QUOTA_LIST_INCONSISTENT, -EIO, + "STATUS_QUOTA_LIST_INCONSISTENT"}, + {STATUS_FILE_IS_OFFLINE, -EIO, "STATUS_FILE_IS_OFFLINE"}, + {STATUS_EVALUATION_EXPIRATION, -EIO, "STATUS_EVALUATION_EXPIRATION"}, + {STATUS_ILLEGAL_DLL_RELOCATION, -EIO, "STATUS_ILLEGAL_DLL_RELOCATION"}, + {STATUS_LICENSE_VIOLATION, -EIO, "STATUS_LICENSE_VIOLATION"}, + {STATUS_DLL_INIT_FAILED_LOGOFF, -EIO, "STATUS_DLL_INIT_FAILED_LOGOFF"}, + {STATUS_DRIVER_UNABLE_TO_LOAD, -EIO, "STATUS_DRIVER_UNABLE_TO_LOAD"}, + {STATUS_DFS_UNAVAILABLE, -EIO, "STATUS_DFS_UNAVAILABLE"}, + {STATUS_VOLUME_DISMOUNTED, -EIO, "STATUS_VOLUME_DISMOUNTED"}, + {STATUS_WX86_INTERNAL_ERROR, -EIO, "STATUS_WX86_INTERNAL_ERROR"}, + {STATUS_WX86_FLOAT_STACK_CHECK, -EIO, "STATUS_WX86_FLOAT_STACK_CHECK"}, + {STATUS_VALIDATE_CONTINUE, -EIO, "STATUS_VALIDATE_CONTINUE"}, + {STATUS_NO_MATCH, -EIO, "STATUS_NO_MATCH"}, + {STATUS_NO_MORE_MATCHES, -EIO, "STATUS_NO_MORE_MATCHES"}, + {STATUS_NOT_A_REPARSE_POINT, -EIO, "STATUS_NOT_A_REPARSE_POINT"}, + {STATUS_IO_REPARSE_TAG_INVALID, -EIO, "STATUS_IO_REPARSE_TAG_INVALID"}, + {STATUS_IO_REPARSE_TAG_MISMATCH, -EIO, + "STATUS_IO_REPARSE_TAG_MISMATCH"}, + {STATUS_IO_REPARSE_DATA_INVALID, -EIO, + "STATUS_IO_REPARSE_DATA_INVALID"}, + {STATUS_IO_REPARSE_TAG_NOT_HANDLED, -EIO, + "STATUS_IO_REPARSE_TAG_NOT_HANDLED"}, + {STATUS_REPARSE_POINT_NOT_RESOLVED, -EIO, + "STATUS_REPARSE_POINT_NOT_RESOLVED"}, + {STATUS_DIRECTORY_IS_A_REPARSE_POINT, -EIO, + "STATUS_DIRECTORY_IS_A_REPARSE_POINT"}, + {STATUS_RANGE_LIST_CONFLICT, -EIO, "STATUS_RANGE_LIST_CONFLICT"}, + {STATUS_SOURCE_ELEMENT_EMPTY, -EIO, "STATUS_SOURCE_ELEMENT_EMPTY"}, + {STATUS_DESTINATION_ELEMENT_FULL, -EIO, + "STATUS_DESTINATION_ELEMENT_FULL"}, + {STATUS_ILLEGAL_ELEMENT_ADDRESS, -EIO, + "STATUS_ILLEGAL_ELEMENT_ADDRESS"}, + {STATUS_MAGAZINE_NOT_PRESENT, -EIO, "STATUS_MAGAZINE_NOT_PRESENT"}, + {STATUS_REINITIALIZATION_NEEDED, -EIO, + "STATUS_REINITIALIZATION_NEEDED"}, + {STATUS_ENCRYPTION_FAILED, -EIO, "STATUS_ENCRYPTION_FAILED"}, + {STATUS_DECRYPTION_FAILED, -EIO, "STATUS_DECRYPTION_FAILED"}, + {STATUS_RANGE_NOT_FOUND, -EIO, "STATUS_RANGE_NOT_FOUND"}, + {STATUS_NO_RECOVERY_POLICY, -EIO, "STATUS_NO_RECOVERY_POLICY"}, + {STATUS_NO_EFS, -EIO, "STATUS_NO_EFS"}, + {STATUS_WRONG_EFS, -EIO, "STATUS_WRONG_EFS"}, + {STATUS_NO_USER_KEYS, -EIO, "STATUS_NO_USER_KEYS"}, + {STATUS_FILE_NOT_ENCRYPTED, -EIO, "STATUS_FILE_NOT_ENCRYPTED"}, + {STATUS_NOT_EXPORT_FORMAT, -EIO, "STATUS_NOT_EXPORT_FORMAT"}, + {STATUS_FILE_ENCRYPTED, -EIO, "STATUS_FILE_ENCRYPTED"}, + {STATUS_WMI_GUID_NOT_FOUND, -EIO, "STATUS_WMI_GUID_NOT_FOUND"}, + {STATUS_WMI_INSTANCE_NOT_FOUND, -EIO, "STATUS_WMI_INSTANCE_NOT_FOUND"}, + {STATUS_WMI_ITEMID_NOT_FOUND, -EIO, "STATUS_WMI_ITEMID_NOT_FOUND"}, + {STATUS_WMI_TRY_AGAIN, -EIO, "STATUS_WMI_TRY_AGAIN"}, + {STATUS_SHARED_POLICY, -EIO, "STATUS_SHARED_POLICY"}, + {STATUS_POLICY_OBJECT_NOT_FOUND, -EIO, + "STATUS_POLICY_OBJECT_NOT_FOUND"}, + {STATUS_POLICY_ONLY_IN_DS, -EIO, "STATUS_POLICY_ONLY_IN_DS"}, + {STATUS_VOLUME_NOT_UPGRADED, -EIO, "STATUS_VOLUME_NOT_UPGRADED"}, + {STATUS_REMOTE_STORAGE_NOT_ACTIVE, -EIO, + "STATUS_REMOTE_STORAGE_NOT_ACTIVE"}, + {STATUS_REMOTE_STORAGE_MEDIA_ERROR, -EIO, + "STATUS_REMOTE_STORAGE_MEDIA_ERROR"}, + {STATUS_NO_TRACKING_SERVICE, -EIO, "STATUS_NO_TRACKING_SERVICE"}, + {STATUS_SERVER_SID_MISMATCH, -EIO, "STATUS_SERVER_SID_MISMATCH"}, + {STATUS_DS_NO_ATTRIBUTE_OR_VALUE, -EIO, + "STATUS_DS_NO_ATTRIBUTE_OR_VALUE"}, + {STATUS_DS_INVALID_ATTRIBUTE_SYNTAX, -EIO, + "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX"}, + {STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED, -EIO, + "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED"}, + {STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS, -EIO, + "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS"}, + {STATUS_DS_BUSY, -EBUSY, "STATUS_DS_BUSY"}, + {STATUS_DS_UNAVAILABLE, -EIO, "STATUS_DS_UNAVAILABLE"}, + {STATUS_DS_NO_RIDS_ALLOCATED, -EIO, "STATUS_DS_NO_RIDS_ALLOCATED"}, + {STATUS_DS_NO_MORE_RIDS, -EIO, "STATUS_DS_NO_MORE_RIDS"}, + {STATUS_DS_INCORRECT_ROLE_OWNER, -EIO, + "STATUS_DS_INCORRECT_ROLE_OWNER"}, + {STATUS_DS_RIDMGR_INIT_ERROR, -EIO, "STATUS_DS_RIDMGR_INIT_ERROR"}, + {STATUS_DS_OBJ_CLASS_VIOLATION, -EIO, "STATUS_DS_OBJ_CLASS_VIOLATION"}, + {STATUS_DS_CANT_ON_NON_LEAF, -EIO, "STATUS_DS_CANT_ON_NON_LEAF"}, + {STATUS_DS_CANT_ON_RDN, -EIO, "STATUS_DS_CANT_ON_RDN"}, + {STATUS_DS_CANT_MOD_OBJ_CLASS, -EIO, "STATUS_DS_CANT_MOD_OBJ_CLASS"}, + {STATUS_DS_CROSS_DOM_MOVE_FAILED, -EIO, + "STATUS_DS_CROSS_DOM_MOVE_FAILED"}, + {STATUS_DS_GC_NOT_AVAILABLE, -EIO, "STATUS_DS_GC_NOT_AVAILABLE"}, + {STATUS_DIRECTORY_SERVICE_REQUIRED, -EIO, + "STATUS_DIRECTORY_SERVICE_REQUIRED"}, + {STATUS_REPARSE_ATTRIBUTE_CONFLICT, -EIO, + "STATUS_REPARSE_ATTRIBUTE_CONFLICT"}, + {STATUS_CANT_ENABLE_DENY_ONLY, -EIO, "STATUS_CANT_ENABLE_DENY_ONLY"}, + {STATUS_FLOAT_MULTIPLE_FAULTS, -EIO, "STATUS_FLOAT_MULTIPLE_FAULTS"}, + {STATUS_FLOAT_MULTIPLE_TRAPS, -EIO, "STATUS_FLOAT_MULTIPLE_TRAPS"}, + {STATUS_DEVICE_REMOVED, -EIO, "STATUS_DEVICE_REMOVED"}, + {STATUS_JOURNAL_DELETE_IN_PROGRESS, -EIO, + "STATUS_JOURNAL_DELETE_IN_PROGRESS"}, + {STATUS_JOURNAL_NOT_ACTIVE, -EIO, "STATUS_JOURNAL_NOT_ACTIVE"}, + {STATUS_NOINTERFACE, -EIO, "STATUS_NOINTERFACE"}, + {STATUS_DS_ADMIN_LIMIT_EXCEEDED, -EIO, + "STATUS_DS_ADMIN_LIMIT_EXCEEDED"}, + {STATUS_DRIVER_FAILED_SLEEP, -EIO, "STATUS_DRIVER_FAILED_SLEEP"}, + {STATUS_MUTUAL_AUTHENTICATION_FAILED, -EIO, + "STATUS_MUTUAL_AUTHENTICATION_FAILED"}, + {STATUS_CORRUPT_SYSTEM_FILE, -EIO, "STATUS_CORRUPT_SYSTEM_FILE"}, + {STATUS_DATATYPE_MISALIGNMENT_ERROR, -EIO, + "STATUS_DATATYPE_MISALIGNMENT_ERROR"}, + {STATUS_WMI_READ_ONLY, -EROFS, "STATUS_WMI_READ_ONLY"}, + {STATUS_WMI_SET_FAILURE, -EIO, "STATUS_WMI_SET_FAILURE"}, + {STATUS_COMMITMENT_MINIMUM, -EIO, "STATUS_COMMITMENT_MINIMUM"}, + {STATUS_REG_NAT_CONSUMPTION, -EIO, "STATUS_REG_NAT_CONSUMPTION"}, + {STATUS_TRANSPORT_FULL, -EIO, "STATUS_TRANSPORT_FULL"}, + {STATUS_DS_SAM_INIT_FAILURE, -EIO, "STATUS_DS_SAM_INIT_FAILURE"}, + {STATUS_ONLY_IF_CONNECTED, -EIO, "STATUS_ONLY_IF_CONNECTED"}, + {STATUS_DS_SENSITIVE_GROUP_VIOLATION, -EIO, + "STATUS_DS_SENSITIVE_GROUP_VIOLATION"}, + {STATUS_PNP_RESTART_ENUMERATION, -EIO, + "STATUS_PNP_RESTART_ENUMERATION"}, + {STATUS_JOURNAL_ENTRY_DELETED, -EIO, "STATUS_JOURNAL_ENTRY_DELETED"}, + {STATUS_DS_CANT_MOD_PRIMARYGROUPID, -EIO, + "STATUS_DS_CANT_MOD_PRIMARYGROUPID"}, + {STATUS_SYSTEM_IMAGE_BAD_SIGNATURE, -EIO, + "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE"}, + {STATUS_PNP_REBOOT_REQUIRED, -EIO, "STATUS_PNP_REBOOT_REQUIRED"}, + {STATUS_POWER_STATE_INVALID, -EIO, "STATUS_POWER_STATE_INVALID"}, + {STATUS_DS_INVALID_GROUP_TYPE, -EIO, "STATUS_DS_INVALID_GROUP_TYPE"}, + {STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN, -EIO, + "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN"}, + {STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN, -EIO, + "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN"}, + {STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER, -EIO, + "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER"}, + {STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER, -EIO, + "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER"}, + {STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER, -EIO, + "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER"}, + {STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER, -EIO, + "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER"}, + {STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER, -EIO, + "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER"}, + {STATUS_DS_HAVE_PRIMARY_MEMBERS, -EIO, + "STATUS_DS_HAVE_PRIMARY_MEMBERS"}, + {STATUS_WMI_NOT_SUPPORTED, -EOPNOTSUPP, "STATUS_WMI_NOT_SUPPORTED"}, + {STATUS_INSUFFICIENT_POWER, -EIO, "STATUS_INSUFFICIENT_POWER"}, + {STATUS_SAM_NEED_BOOTKEY_PASSWORD, -EIO, + "STATUS_SAM_NEED_BOOTKEY_PASSWORD"}, + {STATUS_SAM_NEED_BOOTKEY_FLOPPY, -EIO, + "STATUS_SAM_NEED_BOOTKEY_FLOPPY"}, + {STATUS_DS_CANT_START, -EIO, "STATUS_DS_CANT_START"}, + {STATUS_DS_INIT_FAILURE, -EIO, "STATUS_DS_INIT_FAILURE"}, + {STATUS_SAM_INIT_FAILURE, -EIO, "STATUS_SAM_INIT_FAILURE"}, + {STATUS_DS_GC_REQUIRED, -EIO, "STATUS_DS_GC_REQUIRED"}, + {STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY, -EIO, + "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY"}, + {STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS, -EIO, + "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS"}, + {STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED"}, + {STATUS_MULTIPLE_FAULT_VIOLATION, -EIO, + "STATUS_MULTIPLE_FAULT_VIOLATION"}, + {STATUS_CURRENT_DOMAIN_NOT_ALLOWED, -EIO, + "STATUS_CURRENT_DOMAIN_NOT_ALLOWED"}, + {STATUS_CANNOT_MAKE, -EIO, "STATUS_CANNOT_MAKE"}, + {STATUS_SYSTEM_SHUTDOWN, -EIO, "STATUS_SYSTEM_SHUTDOWN"}, + {STATUS_DS_INIT_FAILURE_CONSOLE, -EIO, + "STATUS_DS_INIT_FAILURE_CONSOLE"}, + {STATUS_DS_SAM_INIT_FAILURE_CONSOLE, -EIO, + "STATUS_DS_SAM_INIT_FAILURE_CONSOLE"}, + {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, + "STATUS_UNFINISHED_CONTEXT_DELETED"}, + {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, + {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, + {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, + "STATUS_WRONG_CREDENTIAL_HANDLE"}, + {STATUS_CRYPTO_SYSTEM_INVALID, -EIO, "STATUS_CRYPTO_SYSTEM_INVALID"}, + {STATUS_MAX_REFERRALS_EXCEEDED, -EIO, "STATUS_MAX_REFERRALS_EXCEEDED"}, + {STATUS_MUST_BE_KDC, -EIO, "STATUS_MUST_BE_KDC"}, + {STATUS_STRONG_CRYPTO_NOT_SUPPORTED, -EIO, + "STATUS_STRONG_CRYPTO_NOT_SUPPORTED"}, + {STATUS_TOO_MANY_PRINCIPALS, -EIO, "STATUS_TOO_MANY_PRINCIPALS"}, + {STATUS_NO_PA_DATA, -EIO, "STATUS_NO_PA_DATA"}, + {STATUS_PKINIT_NAME_MISMATCH, -EIO, "STATUS_PKINIT_NAME_MISMATCH"}, + {STATUS_SMARTCARD_LOGON_REQUIRED, -EIO, + "STATUS_SMARTCARD_LOGON_REQUIRED"}, + {STATUS_KDC_INVALID_REQUEST, -EIO, "STATUS_KDC_INVALID_REQUEST"}, + {STATUS_KDC_UNABLE_TO_REFER, -EIO, "STATUS_KDC_UNABLE_TO_REFER"}, + {STATUS_KDC_UNKNOWN_ETYPE, -EIO, "STATUS_KDC_UNKNOWN_ETYPE"}, + {STATUS_SHUTDOWN_IN_PROGRESS, -EIO, "STATUS_SHUTDOWN_IN_PROGRESS"}, + {STATUS_SERVER_SHUTDOWN_IN_PROGRESS, -EIO, + "STATUS_SERVER_SHUTDOWN_IN_PROGRESS"}, + {STATUS_NOT_SUPPORTED_ON_SBS, -EOPNOTSUPP, + "STATUS_NOT_SUPPORTED_ON_SBS"}, + {STATUS_WMI_GUID_DISCONNECTED, -EIO, "STATUS_WMI_GUID_DISCONNECTED"}, + {STATUS_WMI_ALREADY_DISABLED, -EIO, "STATUS_WMI_ALREADY_DISABLED"}, + {STATUS_WMI_ALREADY_ENABLED, -EIO, "STATUS_WMI_ALREADY_ENABLED"}, + {STATUS_MFT_TOO_FRAGMENTED, -EIO, "STATUS_MFT_TOO_FRAGMENTED"}, + {STATUS_COPY_PROTECTION_FAILURE, -EIO, + "STATUS_COPY_PROTECTION_FAILURE"}, + {STATUS_CSS_AUTHENTICATION_FAILURE, -EIO, + "STATUS_CSS_AUTHENTICATION_FAILURE"}, + {STATUS_CSS_KEY_NOT_PRESENT, -EIO, "STATUS_CSS_KEY_NOT_PRESENT"}, + {STATUS_CSS_KEY_NOT_ESTABLISHED, -EIO, + "STATUS_CSS_KEY_NOT_ESTABLISHED"}, + {STATUS_CSS_SCRAMBLED_SECTOR, -EIO, "STATUS_CSS_SCRAMBLED_SECTOR"}, + {STATUS_CSS_REGION_MISMATCH, -EIO, "STATUS_CSS_REGION_MISMATCH"}, + {STATUS_CSS_RESETS_EXHAUSTED, -EIO, "STATUS_CSS_RESETS_EXHAUSTED"}, + {STATUS_PKINIT_FAILURE, -EIO, "STATUS_PKINIT_FAILURE"}, + {STATUS_SMARTCARD_SUBSYSTEM_FAILURE, -EIO, + "STATUS_SMARTCARD_SUBSYSTEM_FAILURE"}, + {STATUS_NO_KERB_KEY, -EIO, "STATUS_NO_KERB_KEY"}, + {STATUS_HOST_DOWN, -EIO, "STATUS_HOST_DOWN"}, + {STATUS_UNSUPPORTED_PREAUTH, -EIO, "STATUS_UNSUPPORTED_PREAUTH"}, + {STATUS_EFS_ALG_BLOB_TOO_BIG, -EIO, "STATUS_EFS_ALG_BLOB_TOO_BIG"}, + {STATUS_PORT_NOT_SET, -EIO, "STATUS_PORT_NOT_SET"}, + {STATUS_DEBUGGER_INACTIVE, -EIO, "STATUS_DEBUGGER_INACTIVE"}, + {STATUS_DS_VERSION_CHECK_FAILURE, -EIO, + "STATUS_DS_VERSION_CHECK_FAILURE"}, + {STATUS_AUDITING_DISABLED, -EIO, "STATUS_AUDITING_DISABLED"}, + {STATUS_PRENT4_MACHINE_ACCOUNT, -EIO, "STATUS_PRENT4_MACHINE_ACCOUNT"}, + {STATUS_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER, -EIO, + "STATUS_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER"}, + {STATUS_INVALID_IMAGE_WIN_32, -EIO, "STATUS_INVALID_IMAGE_WIN_32"}, + {STATUS_INVALID_IMAGE_WIN_64, -EIO, "STATUS_INVALID_IMAGE_WIN_64"}, + {STATUS_BAD_BINDINGS, -EIO, "STATUS_BAD_BINDINGS"}, + {STATUS_NETWORK_SESSION_EXPIRED, -EIO, + "STATUS_NETWORK_SESSION_EXPIRED"}, + {STATUS_APPHELP_BLOCK, -EIO, "STATUS_APPHELP_BLOCK"}, + {STATUS_ALL_SIDS_FILTERED, -EIO, "STATUS_ALL_SIDS_FILTERED"}, + {STATUS_NOT_SAFE_MODE_DRIVER, -EIO, "STATUS_NOT_SAFE_MODE_DRIVER"}, + {STATUS_ACCESS_DISABLED_BY_POLICY_DEFAULT, -EACCES, + "STATUS_ACCESS_DISABLED_BY_POLICY_DEFAULT"}, + {STATUS_ACCESS_DISABLED_BY_POLICY_PATH, -EACCES, + "STATUS_ACCESS_DISABLED_BY_POLICY_PATH"}, + {STATUS_ACCESS_DISABLED_BY_POLICY_PUBLISHER, -EACCES, + "STATUS_ACCESS_DISABLED_BY_POLICY_PUBLISHER"}, + {STATUS_ACCESS_DISABLED_BY_POLICY_OTHER, -EACCES, + "STATUS_ACCESS_DISABLED_BY_POLICY_OTHER"}, + {STATUS_FAILED_DRIVER_ENTRY, -EIO, "STATUS_FAILED_DRIVER_ENTRY"}, + {STATUS_DEVICE_ENUMERATION_ERROR, -EIO, + "STATUS_DEVICE_ENUMERATION_ERROR"}, + {STATUS_MOUNT_POINT_NOT_RESOLVED, -EIO, + "STATUS_MOUNT_POINT_NOT_RESOLVED"}, + {STATUS_INVALID_DEVICE_OBJECT_PARAMETER, -EIO, + "STATUS_INVALID_DEVICE_OBJECT_PARAMETER"}, + {STATUS_MCA_OCCURED, -EIO, "STATUS_MCA_OCCURED"}, + {STATUS_DRIVER_BLOCKED_CRITICAL, -EIO, + "STATUS_DRIVER_BLOCKED_CRITICAL"}, + {STATUS_DRIVER_BLOCKED, -EIO, "STATUS_DRIVER_BLOCKED"}, + {STATUS_DRIVER_DATABASE_ERROR, -EIO, "STATUS_DRIVER_DATABASE_ERROR"}, + {STATUS_SYSTEM_HIVE_TOO_LARGE, -EIO, "STATUS_SYSTEM_HIVE_TOO_LARGE"}, + {STATUS_INVALID_IMPORT_OF_NON_DLL, -EIO, + "STATUS_INVALID_IMPORT_OF_NON_DLL"}, + {STATUS_NO_SECRETS, -EIO, "STATUS_NO_SECRETS"}, + {STATUS_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY, -EACCES, + "STATUS_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY"}, + {STATUS_FAILED_STACK_SWITCH, -EIO, "STATUS_FAILED_STACK_SWITCH"}, + {STATUS_HEAP_CORRUPTION, -EIO, "STATUS_HEAP_CORRUPTION"}, + {STATUS_SMARTCARD_WRONG_PIN, -EIO, "STATUS_SMARTCARD_WRONG_PIN"}, + {STATUS_SMARTCARD_CARD_BLOCKED, -EIO, "STATUS_SMARTCARD_CARD_BLOCKED"}, + {STATUS_SMARTCARD_CARD_NOT_AUTHENTICATED, -EIO, + "STATUS_SMARTCARD_CARD_NOT_AUTHENTICATED"}, + {STATUS_SMARTCARD_NO_CARD, -EIO, "STATUS_SMARTCARD_NO_CARD"}, + {STATUS_SMARTCARD_NO_KEY_CONTAINER, -EIO, + "STATUS_SMARTCARD_NO_KEY_CONTAINER"}, + {STATUS_SMARTCARD_NO_CERTIFICATE, -EIO, + "STATUS_SMARTCARD_NO_CERTIFICATE"}, + {STATUS_SMARTCARD_NO_KEYSET, -EIO, "STATUS_SMARTCARD_NO_KEYSET"}, + {STATUS_SMARTCARD_IO_ERROR, -EIO, "STATUS_SMARTCARD_IO_ERROR"}, + {STATUS_DOWNGRADE_DETECTED, -EIO, "STATUS_DOWNGRADE_DETECTED"}, + {STATUS_SMARTCARD_CERT_REVOKED, -EIO, "STATUS_SMARTCARD_CERT_REVOKED"}, + {STATUS_ISSUING_CA_UNTRUSTED, -EIO, "STATUS_ISSUING_CA_UNTRUSTED"}, + {STATUS_REVOCATION_OFFLINE_C, -EIO, "STATUS_REVOCATION_OFFLINE_C"}, + {STATUS_PKINIT_CLIENT_FAILURE, -EIO, "STATUS_PKINIT_CLIENT_FAILURE"}, + {STATUS_SMARTCARD_CERT_EXPIRED, -EIO, "STATUS_SMARTCARD_CERT_EXPIRED"}, + {STATUS_DRIVER_FAILED_PRIOR_UNLOAD, -EIO, + "STATUS_DRIVER_FAILED_PRIOR_UNLOAD"}, + {STATUS_SMARTCARD_SILENT_CONTEXT, -EIO, + "STATUS_SMARTCARD_SILENT_CONTEXT"}, + {STATUS_PER_USER_TRUST_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_PER_USER_TRUST_QUOTA_EXCEEDED"}, + {STATUS_ALL_USER_TRUST_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_ALL_USER_TRUST_QUOTA_EXCEEDED"}, + {STATUS_USER_DELETE_TRUST_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_USER_DELETE_TRUST_QUOTA_EXCEEDED"}, + {STATUS_DS_NAME_NOT_UNIQUE, -EIO, "STATUS_DS_NAME_NOT_UNIQUE"}, + {STATUS_DS_DUPLICATE_ID_FOUND, -EIO, "STATUS_DS_DUPLICATE_ID_FOUND"}, + {STATUS_DS_GROUP_CONVERSION_ERROR, -EIO, + "STATUS_DS_GROUP_CONVERSION_ERROR"}, + {STATUS_VOLSNAP_PREPARE_HIBERNATE, -EIO, + "STATUS_VOLSNAP_PREPARE_HIBERNATE"}, + {STATUS_USER2USER_REQUIRED, -EIO, "STATUS_USER2USER_REQUIRED"}, + {STATUS_STACK_BUFFER_OVERRUN, -EIO, "STATUS_STACK_BUFFER_OVERRUN"}, + {STATUS_NO_S4U_PROT_SUPPORT, -EIO, "STATUS_NO_S4U_PROT_SUPPORT"}, + {STATUS_CROSSREALM_DELEGATION_FAILURE, -EIO, + "STATUS_CROSSREALM_DELEGATION_FAILURE"}, + {STATUS_REVOCATION_OFFLINE_KDC, -EIO, "STATUS_REVOCATION_OFFLINE_KDC"}, + {STATUS_ISSUING_CA_UNTRUSTED_KDC, -EIO, + "STATUS_ISSUING_CA_UNTRUSTED_KDC"}, + {STATUS_KDC_CERT_EXPIRED, -EIO, "STATUS_KDC_CERT_EXPIRED"}, + {STATUS_KDC_CERT_REVOKED, -EIO, "STATUS_KDC_CERT_REVOKED"}, + {STATUS_PARAMETER_QUOTA_EXCEEDED, -EDQUOT, + "STATUS_PARAMETER_QUOTA_EXCEEDED"}, + {STATUS_HIBERNATION_FAILURE, -EIO, "STATUS_HIBERNATION_FAILURE"}, + {STATUS_DELAY_LOAD_FAILED, -EIO, "STATUS_DELAY_LOAD_FAILED"}, + {STATUS_AUTHENTICATION_FIREWALL_FAILED, -EIO, + "STATUS_AUTHENTICATION_FIREWALL_FAILED"}, + {STATUS_VDM_DISALLOWED, -EIO, "STATUS_VDM_DISALLOWED"}, + {STATUS_HUNG_DISPLAY_DRIVER_THREAD, -EIO, + "STATUS_HUNG_DISPLAY_DRIVER_THREAD"}, + {STATUS_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE, -EIO, + "STATUS_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE"}, + {STATUS_INVALID_CRUNTIME_PARAMETER, -EIO, + "STATUS_INVALID_CRUNTIME_PARAMETER"}, + {STATUS_NTLM_BLOCKED, -EIO, "STATUS_NTLM_BLOCKED"}, + {STATUS_ASSERTION_FAILURE, -EIO, "STATUS_ASSERTION_FAILURE"}, + {STATUS_VERIFIER_STOP, -EIO, "STATUS_VERIFIER_STOP"}, + {STATUS_CALLBACK_POP_STACK, -EIO, "STATUS_CALLBACK_POP_STACK"}, + {STATUS_INCOMPATIBLE_DRIVER_BLOCKED, -EIO, + "STATUS_INCOMPATIBLE_DRIVER_BLOCKED"}, + {STATUS_HIVE_UNLOADED, -EIO, "STATUS_HIVE_UNLOADED"}, + {STATUS_COMPRESSION_DISABLED, -EIO, "STATUS_COMPRESSION_DISABLED"}, + {STATUS_FILE_SYSTEM_LIMITATION, -EIO, "STATUS_FILE_SYSTEM_LIMITATION"}, + {STATUS_INVALID_IMAGE_HASH, -EIO, "STATUS_INVALID_IMAGE_HASH"}, + {STATUS_NOT_CAPABLE, -EIO, "STATUS_NOT_CAPABLE"}, + {STATUS_REQUEST_OUT_OF_SEQUENCE, -EIO, + "STATUS_REQUEST_OUT_OF_SEQUENCE"}, + {STATUS_IMPLEMENTATION_LIMIT, -EIO, "STATUS_IMPLEMENTATION_LIMIT"}, + {STATUS_ELEVATION_REQUIRED, -EIO, "STATUS_ELEVATION_REQUIRED"}, + {STATUS_BEYOND_VDL, -EIO, "STATUS_BEYOND_VDL"}, + {STATUS_ENCOUNTERED_WRITE_IN_PROGRESS, -EIO, + "STATUS_ENCOUNTERED_WRITE_IN_PROGRESS"}, + {STATUS_PTE_CHANGED, -EIO, "STATUS_PTE_CHANGED"}, + {STATUS_PURGE_FAILED, -EIO, "STATUS_PURGE_FAILED"}, + {STATUS_CRED_REQUIRES_CONFIRMATION, -EIO, + "STATUS_CRED_REQUIRES_CONFIRMATION"}, + {STATUS_CS_ENCRYPTION_INVALID_SERVER_RESPONSE, -EIO, + "STATUS_CS_ENCRYPTION_INVALID_SERVER_RESPONSE"}, + {STATUS_CS_ENCRYPTION_UNSUPPORTED_SERVER, -EIO, + "STATUS_CS_ENCRYPTION_UNSUPPORTED_SERVER"}, + {STATUS_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE, -EIO, + "STATUS_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE"}, + {STATUS_CS_ENCRYPTION_NEW_ENCRYPTED_FILE, -EIO, + "STATUS_CS_ENCRYPTION_NEW_ENCRYPTED_FILE"}, + {STATUS_CS_ENCRYPTION_FILE_NOT_CSE, -EIO, + "STATUS_CS_ENCRYPTION_FILE_NOT_CSE"}, + {STATUS_INVALID_LABEL, -EIO, "STATUS_INVALID_LABEL"}, + {STATUS_DRIVER_PROCESS_TERMINATED, -EIO, + "STATUS_DRIVER_PROCESS_TERMINATED"}, + {STATUS_AMBIGUOUS_SYSTEM_DEVICE, -EIO, + "STATUS_AMBIGUOUS_SYSTEM_DEVICE"}, + {STATUS_SYSTEM_DEVICE_NOT_FOUND, -EIO, + "STATUS_SYSTEM_DEVICE_NOT_FOUND"}, + {STATUS_RESTART_BOOT_APPLICATION, -EIO, + "STATUS_RESTART_BOOT_APPLICATION"}, + {STATUS_INVALID_TASK_NAME, -EIO, "STATUS_INVALID_TASK_NAME"}, + {STATUS_INVALID_TASK_INDEX, -EIO, "STATUS_INVALID_TASK_INDEX"}, + {STATUS_THREAD_ALREADY_IN_TASK, -EIO, "STATUS_THREAD_ALREADY_IN_TASK"}, + {STATUS_CALLBACK_BYPASS, -EIO, "STATUS_CALLBACK_BYPASS"}, + {STATUS_PORT_CLOSED, -EIO, "STATUS_PORT_CLOSED"}, + {STATUS_MESSAGE_LOST, -EIO, "STATUS_MESSAGE_LOST"}, + {STATUS_INVALID_MESSAGE, -EIO, "STATUS_INVALID_MESSAGE"}, + {STATUS_REQUEST_CANCELED, -EIO, "STATUS_REQUEST_CANCELED"}, + {STATUS_RECURSIVE_DISPATCH, -EIO, "STATUS_RECURSIVE_DISPATCH"}, + {STATUS_LPC_RECEIVE_BUFFER_EXPECTED, -EIO, + "STATUS_LPC_RECEIVE_BUFFER_EXPECTED"}, + {STATUS_LPC_INVALID_CONNECTION_USAGE, -EIO, + "STATUS_LPC_INVALID_CONNECTION_USAGE"}, + {STATUS_LPC_REQUESTS_NOT_ALLOWED, -EIO, + "STATUS_LPC_REQUESTS_NOT_ALLOWED"}, + {STATUS_RESOURCE_IN_USE, -EIO, "STATUS_RESOURCE_IN_USE"}, + {STATUS_HARDWARE_MEMORY_ERROR, -EIO, "STATUS_HARDWARE_MEMORY_ERROR"}, + {STATUS_THREADPOOL_HANDLE_EXCEPTION, -EIO, + "STATUS_THREADPOOL_HANDLE_EXCEPTION"}, + {STATUS_THREADPOOL_SET_EVENT_ON_COMPLETION_FAILED, -EIO, + "STATUS_THREADPOOL_SET_EVENT_ON_COMPLETION_FAILED"}, + {STATUS_THREADPOOL_RELEASE_SEMAPHORE_ON_COMPLETION_FAILED, -EIO, + "STATUS_THREADPOOL_RELEASE_SEMAPHORE_ON_COMPLETION_FAILED"}, + {STATUS_THREADPOOL_RELEASE_MUTEX_ON_COMPLETION_FAILED, -EIO, + "STATUS_THREADPOOL_RELEASE_MUTEX_ON_COMPLETION_FAILED"}, + {STATUS_THREADPOOL_FREE_LIBRARY_ON_COMPLETION_FAILED, -EIO, + "STATUS_THREADPOOL_FREE_LIBRARY_ON_COMPLETION_FAILED"}, + {STATUS_THREADPOOL_RELEASED_DURING_OPERATION, -EIO, + "STATUS_THREADPOOL_RELEASED_DURING_OPERATION"}, + {STATUS_CALLBACK_RETURNED_WHILE_IMPERSONATING, -EIO, + "STATUS_CALLBACK_RETURNED_WHILE_IMPERSONATING"}, + {STATUS_APC_RETURNED_WHILE_IMPERSONATING, -EIO, + "STATUS_APC_RETURNED_WHILE_IMPERSONATING"}, + {STATUS_PROCESS_IS_PROTECTED, -EIO, "STATUS_PROCESS_IS_PROTECTED"}, + {STATUS_MCA_EXCEPTION, -EIO, "STATUS_MCA_EXCEPTION"}, + {STATUS_CERTIFICATE_MAPPING_NOT_UNIQUE, -EIO, + "STATUS_CERTIFICATE_MAPPING_NOT_UNIQUE"}, + {STATUS_SYMLINK_CLASS_DISABLED, -EIO, "STATUS_SYMLINK_CLASS_DISABLED"}, + {STATUS_INVALID_IDN_NORMALIZATION, -EIO, + "STATUS_INVALID_IDN_NORMALIZATION"}, + {STATUS_NO_UNICODE_TRANSLATION, -EIO, "STATUS_NO_UNICODE_TRANSLATION"}, + {STATUS_ALREADY_REGISTERED, -EIO, "STATUS_ALREADY_REGISTERED"}, + {STATUS_CONTEXT_MISMATCH, -EIO, "STATUS_CONTEXT_MISMATCH"}, + {STATUS_PORT_ALREADY_HAS_COMPLETION_LIST, -EIO, + "STATUS_PORT_ALREADY_HAS_COMPLETION_LIST"}, + {STATUS_CALLBACK_RETURNED_THREAD_PRIORITY, -EIO, + "STATUS_CALLBACK_RETURNED_THREAD_PRIORITY"}, + {STATUS_INVALID_THREAD, -EIO, "STATUS_INVALID_THREAD"}, + {STATUS_CALLBACK_RETURNED_TRANSACTION, -EIO, + "STATUS_CALLBACK_RETURNED_TRANSACTION"}, + {STATUS_CALLBACK_RETURNED_LDR_LOCK, -EIO, + "STATUS_CALLBACK_RETURNED_LDR_LOCK"}, + {STATUS_CALLBACK_RETURNED_LANG, -EIO, "STATUS_CALLBACK_RETURNED_LANG"}, + {STATUS_CALLBACK_RETURNED_PRI_BACK, -EIO, + "STATUS_CALLBACK_RETURNED_PRI_BACK"}, + {STATUS_CALLBACK_RETURNED_THREAD_AFFINITY, -EIO, + "STATUS_CALLBACK_RETURNED_THREAD_AFFINITY"}, + {STATUS_DISK_REPAIR_DISABLED, -EIO, "STATUS_DISK_REPAIR_DISABLED"}, + {STATUS_DS_DOMAIN_RENAME_IN_PROGRESS, -EIO, + "STATUS_DS_DOMAIN_RENAME_IN_PROGRESS"}, + {STATUS_DISK_QUOTA_EXCEEDED, -EDQUOT, "STATUS_DISK_QUOTA_EXCEEDED"}, + {STATUS_CONTENT_BLOCKED, -EIO, "STATUS_CONTENT_BLOCKED"}, + {STATUS_BAD_CLUSTERS, -EIO, "STATUS_BAD_CLUSTERS"}, + {STATUS_VOLUME_DIRTY, -EIO, "STATUS_VOLUME_DIRTY"}, + {STATUS_FILE_CHECKED_OUT, -EIO, "STATUS_FILE_CHECKED_OUT"}, + {STATUS_CHECKOUT_REQUIRED, -EIO, "STATUS_CHECKOUT_REQUIRED"}, + {STATUS_BAD_FILE_TYPE, -EIO, "STATUS_BAD_FILE_TYPE"}, + {STATUS_FILE_TOO_LARGE, -EIO, "STATUS_FILE_TOO_LARGE"}, + {STATUS_FORMS_AUTH_REQUIRED, -EIO, "STATUS_FORMS_AUTH_REQUIRED"}, + {STATUS_VIRUS_INFECTED, -EIO, "STATUS_VIRUS_INFECTED"}, + {STATUS_VIRUS_DELETED, -EIO, "STATUS_VIRUS_DELETED"}, + {STATUS_BAD_MCFG_TABLE, -EIO, "STATUS_BAD_MCFG_TABLE"}, + {STATUS_WOW_ASSERTION, -EIO, "STATUS_WOW_ASSERTION"}, + {STATUS_INVALID_SIGNATURE, -EIO, "STATUS_INVALID_SIGNATURE"}, + {STATUS_HMAC_NOT_SUPPORTED, -EIO, "STATUS_HMAC_NOT_SUPPORTED"}, + {STATUS_IPSEC_QUEUE_OVERFLOW, -EIO, "STATUS_IPSEC_QUEUE_OVERFLOW"}, + {STATUS_ND_QUEUE_OVERFLOW, -EIO, "STATUS_ND_QUEUE_OVERFLOW"}, + {STATUS_HOPLIMIT_EXCEEDED, -EIO, "STATUS_HOPLIMIT_EXCEEDED"}, + {STATUS_PROTOCOL_NOT_SUPPORTED, -EOPNOTSUPP, + "STATUS_PROTOCOL_NOT_SUPPORTED"}, + {STATUS_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED, -EIO, + "STATUS_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED"}, + {STATUS_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR, -EIO, + "STATUS_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR"}, + {STATUS_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR, -EIO, + "STATUS_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR"}, + {STATUS_XML_PARSE_ERROR, -EIO, "STATUS_XML_PARSE_ERROR"}, + {STATUS_XMLDSIG_ERROR, -EIO, "STATUS_XMLDSIG_ERROR"}, + {STATUS_WRONG_COMPARTMENT, -EIO, "STATUS_WRONG_COMPARTMENT"}, + {STATUS_AUTHIP_FAILURE, -EIO, "STATUS_AUTHIP_FAILURE"}, + {DBG_NO_STATE_CHANGE, -EIO, "DBG_NO_STATE_CHANGE"}, + {DBG_APP_NOT_IDLE, -EIO, "DBG_APP_NOT_IDLE"}, + {RPC_NT_INVALID_STRING_BINDING, -EIO, "RPC_NT_INVALID_STRING_BINDING"}, + {RPC_NT_WRONG_KIND_OF_BINDING, -EIO, "RPC_NT_WRONG_KIND_OF_BINDING"}, + {RPC_NT_INVALID_BINDING, -EIO, "RPC_NT_INVALID_BINDING"}, + {RPC_NT_PROTSEQ_NOT_SUPPORTED, -EOPNOTSUPP, + "RPC_NT_PROTSEQ_NOT_SUPPORTED"}, + {RPC_NT_INVALID_RPC_PROTSEQ, -EIO, "RPC_NT_INVALID_RPC_PROTSEQ"}, + {RPC_NT_INVALID_STRING_UUID, -EIO, "RPC_NT_INVALID_STRING_UUID"}, + {RPC_NT_INVALID_ENDPOINT_FORMAT, -EIO, + "RPC_NT_INVALID_ENDPOINT_FORMAT"}, + {RPC_NT_INVALID_NET_ADDR, -EIO, "RPC_NT_INVALID_NET_ADDR"}, + {RPC_NT_NO_ENDPOINT_FOUND, -EIO, "RPC_NT_NO_ENDPOINT_FOUND"}, + {RPC_NT_INVALID_TIMEOUT, -EINVAL, "RPC_NT_INVALID_TIMEOUT"}, + {RPC_NT_OBJECT_NOT_FOUND, -ENOENT, "RPC_NT_OBJECT_NOT_FOUND"}, + {RPC_NT_ALREADY_REGISTERED, -EIO, "RPC_NT_ALREADY_REGISTERED"}, + {RPC_NT_TYPE_ALREADY_REGISTERED, -EIO, + "RPC_NT_TYPE_ALREADY_REGISTERED"}, + {RPC_NT_ALREADY_LISTENING, -EIO, "RPC_NT_ALREADY_LISTENING"}, + {RPC_NT_NO_PROTSEQS_REGISTERED, -EIO, "RPC_NT_NO_PROTSEQS_REGISTERED"}, + {RPC_NT_NOT_LISTENING, -EIO, "RPC_NT_NOT_LISTENING"}, + {RPC_NT_UNKNOWN_MGR_TYPE, -EIO, "RPC_NT_UNKNOWN_MGR_TYPE"}, + {RPC_NT_UNKNOWN_IF, -EIO, "RPC_NT_UNKNOWN_IF"}, + {RPC_NT_NO_BINDINGS, -EIO, "RPC_NT_NO_BINDINGS"}, + {RPC_NT_NO_PROTSEQS, -EIO, "RPC_NT_NO_PROTSEQS"}, + {RPC_NT_CANT_CREATE_ENDPOINT, -EIO, "RPC_NT_CANT_CREATE_ENDPOINT"}, + {RPC_NT_OUT_OF_RESOURCES, -EIO, "RPC_NT_OUT_OF_RESOURCES"}, + {RPC_NT_SERVER_UNAVAILABLE, -EIO, "RPC_NT_SERVER_UNAVAILABLE"}, + {RPC_NT_SERVER_TOO_BUSY, -EBUSY, "RPC_NT_SERVER_TOO_BUSY"}, + {RPC_NT_INVALID_NETWORK_OPTIONS, -EIO, + "RPC_NT_INVALID_NETWORK_OPTIONS"}, + {RPC_NT_NO_CALL_ACTIVE, -EIO, "RPC_NT_NO_CALL_ACTIVE"}, + {RPC_NT_CALL_FAILED, -EIO, "RPC_NT_CALL_FAILED"}, + {RPC_NT_CALL_FAILED_DNE, -EIO, "RPC_NT_CALL_FAILED_DNE"}, + {RPC_NT_PROTOCOL_ERROR, -EIO, "RPC_NT_PROTOCOL_ERROR"}, + {RPC_NT_UNSUPPORTED_TRANS_SYN, -EIO, "RPC_NT_UNSUPPORTED_TRANS_SYN"}, + {RPC_NT_UNSUPPORTED_TYPE, -EIO, "RPC_NT_UNSUPPORTED_TYPE"}, + {RPC_NT_INVALID_TAG, -EIO, "RPC_NT_INVALID_TAG"}, + {RPC_NT_INVALID_BOUND, -EIO, "RPC_NT_INVALID_BOUND"}, + {RPC_NT_NO_ENTRY_NAME, -EIO, "RPC_NT_NO_ENTRY_NAME"}, + {RPC_NT_INVALID_NAME_SYNTAX, -EIO, "RPC_NT_INVALID_NAME_SYNTAX"}, + {RPC_NT_UNSUPPORTED_NAME_SYNTAX, -EIO, + "RPC_NT_UNSUPPORTED_NAME_SYNTAX"}, + {RPC_NT_UUID_NO_ADDRESS, -EIO, "RPC_NT_UUID_NO_ADDRESS"}, + {RPC_NT_DUPLICATE_ENDPOINT, -ENOTUNIQ, "RPC_NT_DUPLICATE_ENDPOINT"}, + {RPC_NT_UNKNOWN_AUTHN_TYPE, -EIO, "RPC_NT_UNKNOWN_AUTHN_TYPE"}, + {RPC_NT_MAX_CALLS_TOO_SMALL, -EIO, "RPC_NT_MAX_CALLS_TOO_SMALL"}, + {RPC_NT_STRING_TOO_LONG, -EIO, "RPC_NT_STRING_TOO_LONG"}, + {RPC_NT_PROTSEQ_NOT_FOUND, -EIO, "RPC_NT_PROTSEQ_NOT_FOUND"}, + {RPC_NT_PROCNUM_OUT_OF_RANGE, -EIO, "RPC_NT_PROCNUM_OUT_OF_RANGE"}, + {RPC_NT_BINDING_HAS_NO_AUTH, -EIO, "RPC_NT_BINDING_HAS_NO_AUTH"}, + {RPC_NT_UNKNOWN_AUTHN_SERVICE, -EIO, "RPC_NT_UNKNOWN_AUTHN_SERVICE"}, + {RPC_NT_UNKNOWN_AUTHN_LEVEL, -EIO, "RPC_NT_UNKNOWN_AUTHN_LEVEL"}, + {RPC_NT_INVALID_AUTH_IDENTITY, -EIO, "RPC_NT_INVALID_AUTH_IDENTITY"}, + {RPC_NT_UNKNOWN_AUTHZ_SERVICE, -EIO, "RPC_NT_UNKNOWN_AUTHZ_SERVICE"}, + {EPT_NT_INVALID_ENTRY, -EIO, "EPT_NT_INVALID_ENTRY"}, + {EPT_NT_CANT_PERFORM_OP, -EIO, "EPT_NT_CANT_PERFORM_OP"}, + {EPT_NT_NOT_REGISTERED, -EIO, "EPT_NT_NOT_REGISTERED"}, + {RPC_NT_NOTHING_TO_EXPORT, -EIO, "RPC_NT_NOTHING_TO_EXPORT"}, + {RPC_NT_INCOMPLETE_NAME, -EIO, "RPC_NT_INCOMPLETE_NAME"}, + {RPC_NT_INVALID_VERS_OPTION, -EIO, "RPC_NT_INVALID_VERS_OPTION"}, + {RPC_NT_NO_MORE_MEMBERS, -EIO, "RPC_NT_NO_MORE_MEMBERS"}, + {RPC_NT_NOT_ALL_OBJS_UNEXPORTED, -EIO, + "RPC_NT_NOT_ALL_OBJS_UNEXPORTED"}, + {RPC_NT_INTERFACE_NOT_FOUND, -EIO, "RPC_NT_INTERFACE_NOT_FOUND"}, + {RPC_NT_ENTRY_ALREADY_EXISTS, -EIO, "RPC_NT_ENTRY_ALREADY_EXISTS"}, + {RPC_NT_ENTRY_NOT_FOUND, -EIO, "RPC_NT_ENTRY_NOT_FOUND"}, + {RPC_NT_NAME_SERVICE_UNAVAILABLE, -EIO, + "RPC_NT_NAME_SERVICE_UNAVAILABLE"}, + {RPC_NT_INVALID_NAF_ID, -EIO, "RPC_NT_INVALID_NAF_ID"}, + {RPC_NT_CANNOT_SUPPORT, -EOPNOTSUPP, "RPC_NT_CANNOT_SUPPORT"}, + {RPC_NT_NO_CONTEXT_AVAILABLE, -EIO, "RPC_NT_NO_CONTEXT_AVAILABLE"}, + {RPC_NT_INTERNAL_ERROR, -EIO, "RPC_NT_INTERNAL_ERROR"}, + {RPC_NT_ZERO_DIVIDE, -EIO, "RPC_NT_ZERO_DIVIDE"}, + {RPC_NT_ADDRESS_ERROR, -EIO, "RPC_NT_ADDRESS_ERROR"}, + {RPC_NT_FP_DIV_ZERO, -EIO, "RPC_NT_FP_DIV_ZERO"}, + {RPC_NT_FP_UNDERFLOW, -EIO, "RPC_NT_FP_UNDERFLOW"}, + {RPC_NT_FP_OVERFLOW, -EIO, "RPC_NT_FP_OVERFLOW"}, + {RPC_NT_CALL_IN_PROGRESS, -EIO, "RPC_NT_CALL_IN_PROGRESS"}, + {RPC_NT_NO_MORE_BINDINGS, -EIO, "RPC_NT_NO_MORE_BINDINGS"}, + {RPC_NT_GROUP_MEMBER_NOT_FOUND, -EIO, "RPC_NT_GROUP_MEMBER_NOT_FOUND"}, + {EPT_NT_CANT_CREATE, -EIO, "EPT_NT_CANT_CREATE"}, + {RPC_NT_INVALID_OBJECT, -EIO, "RPC_NT_INVALID_OBJECT"}, + {RPC_NT_NO_INTERFACES, -EIO, "RPC_NT_NO_INTERFACES"}, + {RPC_NT_CALL_CANCELLED, -EIO, "RPC_NT_CALL_CANCELLED"}, + {RPC_NT_BINDING_INCOMPLETE, -EIO, "RPC_NT_BINDING_INCOMPLETE"}, + {RPC_NT_COMM_FAILURE, -EIO, "RPC_NT_COMM_FAILURE"}, + {RPC_NT_UNSUPPORTED_AUTHN_LEVEL, -EIO, + "RPC_NT_UNSUPPORTED_AUTHN_LEVEL"}, + {RPC_NT_NO_PRINC_NAME, -EIO, "RPC_NT_NO_PRINC_NAME"}, + {RPC_NT_NOT_RPC_ERROR, -EIO, "RPC_NT_NOT_RPC_ERROR"}, + {RPC_NT_SEC_PKG_ERROR, -EIO, "RPC_NT_SEC_PKG_ERROR"}, + {RPC_NT_NOT_CANCELLED, -EIO, "RPC_NT_NOT_CANCELLED"}, + {RPC_NT_INVALID_ASYNC_HANDLE, -EIO, "RPC_NT_INVALID_ASYNC_HANDLE"}, + {RPC_NT_INVALID_ASYNC_CALL, -EIO, "RPC_NT_INVALID_ASYNC_CALL"}, + {RPC_NT_PROXY_ACCESS_DENIED, -EACCES, "RPC_NT_PROXY_ACCESS_DENIED"}, + {RPC_NT_NO_MORE_ENTRIES, -EIO, "RPC_NT_NO_MORE_ENTRIES"}, + {RPC_NT_SS_CHAR_TRANS_OPEN_FAIL, -EIO, + "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL"}, + {RPC_NT_SS_CHAR_TRANS_SHORT_FILE, -EIO, + "RPC_NT_SS_CHAR_TRANS_SHORT_FILE"}, + {RPC_NT_SS_IN_NULL_CONTEXT, -EIO, "RPC_NT_SS_IN_NULL_CONTEXT"}, + {RPC_NT_SS_CONTEXT_MISMATCH, -EIO, "RPC_NT_SS_CONTEXT_MISMATCH"}, + {RPC_NT_SS_CONTEXT_DAMAGED, -EIO, "RPC_NT_SS_CONTEXT_DAMAGED"}, + {RPC_NT_SS_HANDLES_MISMATCH, -EIO, "RPC_NT_SS_HANDLES_MISMATCH"}, + {RPC_NT_SS_CANNOT_GET_CALL_HANDLE, -EIO, + "RPC_NT_SS_CANNOT_GET_CALL_HANDLE"}, + {RPC_NT_NULL_REF_POINTER, -EIO, "RPC_NT_NULL_REF_POINTER"}, + {RPC_NT_ENUM_VALUE_OUT_OF_RANGE, -EIO, + "RPC_NT_ENUM_VALUE_OUT_OF_RANGE"}, + {RPC_NT_BYTE_COUNT_TOO_SMALL, -EIO, "RPC_NT_BYTE_COUNT_TOO_SMALL"}, + {RPC_NT_BAD_STUB_DATA, -EIO, "RPC_NT_BAD_STUB_DATA"}, + {RPC_NT_INVALID_ES_ACTION, -EIO, "RPC_NT_INVALID_ES_ACTION"}, + {RPC_NT_WRONG_ES_VERSION, -EIO, "RPC_NT_WRONG_ES_VERSION"}, + {RPC_NT_WRONG_STUB_VERSION, -EIO, "RPC_NT_WRONG_STUB_VERSION"}, + {RPC_NT_INVALID_PIPE_OBJECT, -EIO, "RPC_NT_INVALID_PIPE_OBJECT"}, + {RPC_NT_INVALID_PIPE_OPERATION, -EIO, "RPC_NT_INVALID_PIPE_OPERATION"}, + {RPC_NT_WRONG_PIPE_VERSION, -EIO, "RPC_NT_WRONG_PIPE_VERSION"}, + {RPC_NT_PIPE_CLOSED, -EIO, "RPC_NT_PIPE_CLOSED"}, + {RPC_NT_PIPE_DISCIPLINE_ERROR, -EIO, "RPC_NT_PIPE_DISCIPLINE_ERROR"}, + {RPC_NT_PIPE_EMPTY, -EIO, "RPC_NT_PIPE_EMPTY"}, + {STATUS_PNP_BAD_MPS_TABLE, -EIO, "STATUS_PNP_BAD_MPS_TABLE"}, + {STATUS_PNP_TRANSLATION_FAILED, -EIO, "STATUS_PNP_TRANSLATION_FAILED"}, + {STATUS_PNP_IRQ_TRANSLATION_FAILED, -EIO, + "STATUS_PNP_IRQ_TRANSLATION_FAILED"}, + {STATUS_PNP_INVALID_ID, -EIO, "STATUS_PNP_INVALID_ID"}, + {STATUS_IO_REISSUE_AS_CACHED, -EIO, "STATUS_IO_REISSUE_AS_CACHED"}, + {STATUS_CTX_WINSTATION_NAME_INVALID, -EIO, + "STATUS_CTX_WINSTATION_NAME_INVALID"}, + {STATUS_CTX_INVALID_PD, -EIO, "STATUS_CTX_INVALID_PD"}, + {STATUS_CTX_PD_NOT_FOUND, -EIO, "STATUS_CTX_PD_NOT_FOUND"}, + {STATUS_CTX_CLOSE_PENDING, -EIO, "STATUS_CTX_CLOSE_PENDING"}, + {STATUS_CTX_NO_OUTBUF, -EIO, "STATUS_CTX_NO_OUTBUF"}, + {STATUS_CTX_MODEM_INF_NOT_FOUND, -EIO, + "STATUS_CTX_MODEM_INF_NOT_FOUND"}, + {STATUS_CTX_INVALID_MODEMNAME, -EIO, "STATUS_CTX_INVALID_MODEMNAME"}, + {STATUS_CTX_RESPONSE_ERROR, -EIO, "STATUS_CTX_RESPONSE_ERROR"}, + {STATUS_CTX_MODEM_RESPONSE_TIMEOUT, -ETIMEDOUT, + "STATUS_CTX_MODEM_RESPONSE_TIMEOUT"}, + {STATUS_CTX_MODEM_RESPONSE_NO_CARRIER, -EIO, + "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER"}, + {STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE, -EIO, + "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE"}, + {STATUS_CTX_MODEM_RESPONSE_BUSY, -EBUSY, + "STATUS_CTX_MODEM_RESPONSE_BUSY"}, + {STATUS_CTX_MODEM_RESPONSE_VOICE, -EIO, + "STATUS_CTX_MODEM_RESPONSE_VOICE"}, + {STATUS_CTX_TD_ERROR, -EIO, "STATUS_CTX_TD_ERROR"}, + {STATUS_CTX_LICENSE_CLIENT_INVALID, -EIO, + "STATUS_CTX_LICENSE_CLIENT_INVALID"}, + {STATUS_CTX_LICENSE_NOT_AVAILABLE, -EIO, + "STATUS_CTX_LICENSE_NOT_AVAILABLE"}, + {STATUS_CTX_LICENSE_EXPIRED, -EIO, "STATUS_CTX_LICENSE_EXPIRED"}, + {STATUS_CTX_WINSTATION_NOT_FOUND, -EIO, + "STATUS_CTX_WINSTATION_NOT_FOUND"}, + {STATUS_CTX_WINSTATION_NAME_COLLISION, -EIO, + "STATUS_CTX_WINSTATION_NAME_COLLISION"}, + {STATUS_CTX_WINSTATION_BUSY, -EBUSY, "STATUS_CTX_WINSTATION_BUSY"}, + {STATUS_CTX_BAD_VIDEO_MODE, -EIO, "STATUS_CTX_BAD_VIDEO_MODE"}, + {STATUS_CTX_GRAPHICS_INVALID, -EIO, "STATUS_CTX_GRAPHICS_INVALID"}, + {STATUS_CTX_NOT_CONSOLE, -EIO, "STATUS_CTX_NOT_CONSOLE"}, + {STATUS_CTX_CLIENT_QUERY_TIMEOUT, -EIO, + "STATUS_CTX_CLIENT_QUERY_TIMEOUT"}, + {STATUS_CTX_CONSOLE_DISCONNECT, -EIO, "STATUS_CTX_CONSOLE_DISCONNECT"}, + {STATUS_CTX_CONSOLE_CONNECT, -EIO, "STATUS_CTX_CONSOLE_CONNECT"}, + {STATUS_CTX_SHADOW_DENIED, -EIO, "STATUS_CTX_SHADOW_DENIED"}, + {STATUS_CTX_WINSTATION_ACCESS_DENIED, -EACCES, + "STATUS_CTX_WINSTATION_ACCESS_DENIED"}, + {STATUS_CTX_INVALID_WD, -EIO, "STATUS_CTX_INVALID_WD"}, + {STATUS_CTX_WD_NOT_FOUND, -EIO, "STATUS_CTX_WD_NOT_FOUND"}, + {STATUS_CTX_SHADOW_INVALID, -EIO, "STATUS_CTX_SHADOW_INVALID"}, + {STATUS_CTX_SHADOW_DISABLED, -EIO, "STATUS_CTX_SHADOW_DISABLED"}, + {STATUS_RDP_PROTOCOL_ERROR, -EIO, "STATUS_RDP_PROTOCOL_ERROR"}, + {STATUS_CTX_CLIENT_LICENSE_NOT_SET, -EIO, + "STATUS_CTX_CLIENT_LICENSE_NOT_SET"}, + {STATUS_CTX_CLIENT_LICENSE_IN_USE, -EIO, + "STATUS_CTX_CLIENT_LICENSE_IN_USE"}, + {STATUS_CTX_SHADOW_ENDED_BY_MODE_CHANGE, -EIO, + "STATUS_CTX_SHADOW_ENDED_BY_MODE_CHANGE"}, + {STATUS_CTX_SHADOW_NOT_RUNNING, -EIO, "STATUS_CTX_SHADOW_NOT_RUNNING"}, + {STATUS_CTX_LOGON_DISABLED, -EIO, "STATUS_CTX_LOGON_DISABLED"}, + {STATUS_CTX_SECURITY_LAYER_ERROR, -EIO, + "STATUS_CTX_SECURITY_LAYER_ERROR"}, + {STATUS_TS_INCOMPATIBLE_SESSIONS, -EIO, + "STATUS_TS_INCOMPATIBLE_SESSIONS"}, + {STATUS_MUI_FILE_NOT_FOUND, -EIO, "STATUS_MUI_FILE_NOT_FOUND"}, + {STATUS_MUI_INVALID_FILE, -EIO, "STATUS_MUI_INVALID_FILE"}, + {STATUS_MUI_INVALID_RC_CONFIG, -EIO, "STATUS_MUI_INVALID_RC_CONFIG"}, + {STATUS_MUI_INVALID_LOCALE_NAME, -EIO, + "STATUS_MUI_INVALID_LOCALE_NAME"}, + {STATUS_MUI_INVALID_ULTIMATEFALLBACK_NAME, -EIO, + "STATUS_MUI_INVALID_ULTIMATEFALLBACK_NAME"}, + {STATUS_MUI_FILE_NOT_LOADED, -EIO, "STATUS_MUI_FILE_NOT_LOADED"}, + {STATUS_RESOURCE_ENUM_USER_STOP, -EIO, + "STATUS_RESOURCE_ENUM_USER_STOP"}, + {STATUS_CLUSTER_INVALID_NODE, -EIO, "STATUS_CLUSTER_INVALID_NODE"}, + {STATUS_CLUSTER_NODE_EXISTS, -EIO, "STATUS_CLUSTER_NODE_EXISTS"}, + {STATUS_CLUSTER_JOIN_IN_PROGRESS, -EIO, + "STATUS_CLUSTER_JOIN_IN_PROGRESS"}, + {STATUS_CLUSTER_NODE_NOT_FOUND, -EIO, "STATUS_CLUSTER_NODE_NOT_FOUND"}, + {STATUS_CLUSTER_LOCAL_NODE_NOT_FOUND, -EIO, + "STATUS_CLUSTER_LOCAL_NODE_NOT_FOUND"}, + {STATUS_CLUSTER_NETWORK_EXISTS, -EIO, "STATUS_CLUSTER_NETWORK_EXISTS"}, + {STATUS_CLUSTER_NETWORK_NOT_FOUND, -EIO, + "STATUS_CLUSTER_NETWORK_NOT_FOUND"}, + {STATUS_CLUSTER_NETINTERFACE_EXISTS, -EIO, + "STATUS_CLUSTER_NETINTERFACE_EXISTS"}, + {STATUS_CLUSTER_NETINTERFACE_NOT_FOUND, -EIO, + "STATUS_CLUSTER_NETINTERFACE_NOT_FOUND"}, + {STATUS_CLUSTER_INVALID_REQUEST, -EIO, + "STATUS_CLUSTER_INVALID_REQUEST"}, + {STATUS_CLUSTER_INVALID_NETWORK_PROVIDER, -EIO, + "STATUS_CLUSTER_INVALID_NETWORK_PROVIDER"}, + {STATUS_CLUSTER_NODE_DOWN, -EIO, "STATUS_CLUSTER_NODE_DOWN"}, + {STATUS_CLUSTER_NODE_UNREACHABLE, -EIO, + "STATUS_CLUSTER_NODE_UNREACHABLE"}, + {STATUS_CLUSTER_NODE_NOT_MEMBER, -EIO, + "STATUS_CLUSTER_NODE_NOT_MEMBER"}, + {STATUS_CLUSTER_JOIN_NOT_IN_PROGRESS, -EIO, + "STATUS_CLUSTER_JOIN_NOT_IN_PROGRESS"}, + {STATUS_CLUSTER_INVALID_NETWORK, -EIO, + "STATUS_CLUSTER_INVALID_NETWORK"}, + {STATUS_CLUSTER_NO_NET_ADAPTERS, -EIO, + "STATUS_CLUSTER_NO_NET_ADAPTERS"}, + {STATUS_CLUSTER_NODE_UP, -EIO, "STATUS_CLUSTER_NODE_UP"}, + {STATUS_CLUSTER_NODE_PAUSED, -EIO, "STATUS_CLUSTER_NODE_PAUSED"}, + {STATUS_CLUSTER_NODE_NOT_PAUSED, -EIO, + "STATUS_CLUSTER_NODE_NOT_PAUSED"}, + {STATUS_CLUSTER_NO_SECURITY_CONTEXT, -EIO, + "STATUS_CLUSTER_NO_SECURITY_CONTEXT"}, + {STATUS_CLUSTER_NETWORK_NOT_INTERNAL, -EIO, + "STATUS_CLUSTER_NETWORK_NOT_INTERNAL"}, + {STATUS_CLUSTER_POISONED, -EIO, "STATUS_CLUSTER_POISONED"}, + {STATUS_ACPI_INVALID_OPCODE, -EIO, "STATUS_ACPI_INVALID_OPCODE"}, + {STATUS_ACPI_STACK_OVERFLOW, -EIO, "STATUS_ACPI_STACK_OVERFLOW"}, + {STATUS_ACPI_ASSERT_FAILED, -EIO, "STATUS_ACPI_ASSERT_FAILED"}, + {STATUS_ACPI_INVALID_INDEX, -EIO, "STATUS_ACPI_INVALID_INDEX"}, + {STATUS_ACPI_INVALID_ARGUMENT, -EIO, "STATUS_ACPI_INVALID_ARGUMENT"}, + {STATUS_ACPI_FATAL, -EIO, "STATUS_ACPI_FATAL"}, + {STATUS_ACPI_INVALID_SUPERNAME, -EIO, "STATUS_ACPI_INVALID_SUPERNAME"}, + {STATUS_ACPI_INVALID_ARGTYPE, -EIO, "STATUS_ACPI_INVALID_ARGTYPE"}, + {STATUS_ACPI_INVALID_OBJTYPE, -EIO, "STATUS_ACPI_INVALID_OBJTYPE"}, + {STATUS_ACPI_INVALID_TARGETTYPE, -EIO, + "STATUS_ACPI_INVALID_TARGETTYPE"}, + {STATUS_ACPI_INCORRECT_ARGUMENT_COUNT, -EIO, + "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT"}, + {STATUS_ACPI_ADDRESS_NOT_MAPPED, -EIO, + "STATUS_ACPI_ADDRESS_NOT_MAPPED"}, + {STATUS_ACPI_INVALID_EVENTTYPE, -EIO, "STATUS_ACPI_INVALID_EVENTTYPE"}, + {STATUS_ACPI_HANDLER_COLLISION, -EIO, "STATUS_ACPI_HANDLER_COLLISION"}, + {STATUS_ACPI_INVALID_DATA, -EIO, "STATUS_ACPI_INVALID_DATA"}, + {STATUS_ACPI_INVALID_REGION, -EIO, "STATUS_ACPI_INVALID_REGION"}, + {STATUS_ACPI_INVALID_ACCESS_SIZE, -EIO, + "STATUS_ACPI_INVALID_ACCESS_SIZE"}, + {STATUS_ACPI_ACQUIRE_GLOBAL_LOCK, -EIO, + "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK"}, + {STATUS_ACPI_ALREADY_INITIALIZED, -EIO, + "STATUS_ACPI_ALREADY_INITIALIZED"}, + {STATUS_ACPI_NOT_INITIALIZED, -EIO, "STATUS_ACPI_NOT_INITIALIZED"}, + {STATUS_ACPI_INVALID_MUTEX_LEVEL, -EIO, + "STATUS_ACPI_INVALID_MUTEX_LEVEL"}, + {STATUS_ACPI_MUTEX_NOT_OWNED, -EIO, "STATUS_ACPI_MUTEX_NOT_OWNED"}, + {STATUS_ACPI_MUTEX_NOT_OWNER, -EIO, "STATUS_ACPI_MUTEX_NOT_OWNER"}, + {STATUS_ACPI_RS_ACCESS, -EIO, "STATUS_ACPI_RS_ACCESS"}, + {STATUS_ACPI_INVALID_TABLE, -EIO, "STATUS_ACPI_INVALID_TABLE"}, + {STATUS_ACPI_REG_HANDLER_FAILED, -EIO, + "STATUS_ACPI_REG_HANDLER_FAILED"}, + {STATUS_ACPI_POWER_REQUEST_FAILED, -EIO, + "STATUS_ACPI_POWER_REQUEST_FAILED"}, + {STATUS_SXS_SECTION_NOT_FOUND, -EIO, "STATUS_SXS_SECTION_NOT_FOUND"}, + {STATUS_SXS_CANT_GEN_ACTCTX, -EIO, "STATUS_SXS_CANT_GEN_ACTCTX"}, + {STATUS_SXS_INVALID_ACTCTXDATA_FORMAT, -EIO, + "STATUS_SXS_INVALID_ACTCTXDATA_FORMAT"}, + {STATUS_SXS_ASSEMBLY_NOT_FOUND, -EIO, "STATUS_SXS_ASSEMBLY_NOT_FOUND"}, + {STATUS_SXS_MANIFEST_FORMAT_ERROR, -EIO, + "STATUS_SXS_MANIFEST_FORMAT_ERROR"}, + {STATUS_SXS_MANIFEST_PARSE_ERROR, -EIO, + "STATUS_SXS_MANIFEST_PARSE_ERROR"}, + {STATUS_SXS_ACTIVATION_CONTEXT_DISABLED, -EIO, + "STATUS_SXS_ACTIVATION_CONTEXT_DISABLED"}, + {STATUS_SXS_KEY_NOT_FOUND, -EIO, "STATUS_SXS_KEY_NOT_FOUND"}, + {STATUS_SXS_VERSION_CONFLICT, -EIO, "STATUS_SXS_VERSION_CONFLICT"}, + {STATUS_SXS_WRONG_SECTION_TYPE, -EIO, "STATUS_SXS_WRONG_SECTION_TYPE"}, + {STATUS_SXS_THREAD_QUERIES_DISABLED, -EIO, + "STATUS_SXS_THREAD_QUERIES_DISABLED"}, + {STATUS_SXS_ASSEMBLY_MISSING, -EIO, "STATUS_SXS_ASSEMBLY_MISSING"}, + {STATUS_SXS_PROCESS_DEFAULT_ALREADY_SET, -EIO, + "STATUS_SXS_PROCESS_DEFAULT_ALREADY_SET"}, + {STATUS_SXS_EARLY_DEACTIVATION, -EIO, "STATUS_SXS_EARLY_DEACTIVATION"}, + {STATUS_SXS_INVALID_DEACTIVATION, -EIO, + "STATUS_SXS_INVALID_DEACTIVATION"}, + {STATUS_SXS_MULTIPLE_DEACTIVATION, -EIO, + "STATUS_SXS_MULTIPLE_DEACTIVATION"}, + {STATUS_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY, -EIO, + "STATUS_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY"}, + {STATUS_SXS_PROCESS_TERMINATION_REQUESTED, -EIO, + "STATUS_SXS_PROCESS_TERMINATION_REQUESTED"}, + {STATUS_SXS_CORRUPT_ACTIVATION_STACK, -EIO, + "STATUS_SXS_CORRUPT_ACTIVATION_STACK"}, + {STATUS_SXS_CORRUPTION, -EIO, "STATUS_SXS_CORRUPTION"}, + {STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE, -EIO, + "STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE"}, + {STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME, -EIO, + "STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME"}, + {STATUS_SXS_IDENTITY_DUPLICATE_ATTRIBUTE, -EIO, + "STATUS_SXS_IDENTITY_DUPLICATE_ATTRIBUTE"}, + {STATUS_SXS_IDENTITY_PARSE_ERROR, -EIO, + "STATUS_SXS_IDENTITY_PARSE_ERROR"}, + {STATUS_SXS_COMPONENT_STORE_CORRUPT, -EIO, + "STATUS_SXS_COMPONENT_STORE_CORRUPT"}, + {STATUS_SXS_FILE_HASH_MISMATCH, -EIO, "STATUS_SXS_FILE_HASH_MISMATCH"}, + {STATUS_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT, -EIO, + "STATUS_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT"}, + {STATUS_SXS_IDENTITIES_DIFFERENT, -EIO, + "STATUS_SXS_IDENTITIES_DIFFERENT"}, + {STATUS_SXS_ASSEMBLY_IS_NOT_A_DEPLOYMENT, -EIO, + "STATUS_SXS_ASSEMBLY_IS_NOT_A_DEPLOYMENT"}, + {STATUS_SXS_FILE_NOT_PART_OF_ASSEMBLY, -EIO, + "STATUS_SXS_FILE_NOT_PART_OF_ASSEMBLY"}, + {STATUS_ADVANCED_INSTALLER_FAILED, -EIO, + "STATUS_ADVANCED_INSTALLER_FAILED"}, + {STATUS_XML_ENCODING_MISMATCH, -EIO, "STATUS_XML_ENCODING_MISMATCH"}, + {STATUS_SXS_MANIFEST_TOO_BIG, -EIO, "STATUS_SXS_MANIFEST_TOO_BIG"}, + {STATUS_SXS_SETTING_NOT_REGISTERED, -EIO, + "STATUS_SXS_SETTING_NOT_REGISTERED"}, + {STATUS_SXS_TRANSACTION_CLOSURE_INCOMPLETE, -EIO, + "STATUS_SXS_TRANSACTION_CLOSURE_INCOMPLETE"}, + {STATUS_SMI_PRIMITIVE_INSTALLER_FAILED, -EIO, + "STATUS_SMI_PRIMITIVE_INSTALLER_FAILED"}, + {STATUS_GENERIC_COMMAND_FAILED, -EIO, "STATUS_GENERIC_COMMAND_FAILED"}, + {STATUS_SXS_FILE_HASH_MISSING, -EIO, "STATUS_SXS_FILE_HASH_MISSING"}, + {STATUS_TRANSACTIONAL_CONFLICT, -EIO, "STATUS_TRANSACTIONAL_CONFLICT"}, + {STATUS_INVALID_TRANSACTION, -EIO, "STATUS_INVALID_TRANSACTION"}, + {STATUS_TRANSACTION_NOT_ACTIVE, -EIO, "STATUS_TRANSACTION_NOT_ACTIVE"}, + {STATUS_TM_INITIALIZATION_FAILED, -EIO, + "STATUS_TM_INITIALIZATION_FAILED"}, + {STATUS_RM_NOT_ACTIVE, -EIO, "STATUS_RM_NOT_ACTIVE"}, + {STATUS_RM_METADATA_CORRUPT, -EIO, "STATUS_RM_METADATA_CORRUPT"}, + {STATUS_TRANSACTION_NOT_JOINED, -EIO, "STATUS_TRANSACTION_NOT_JOINED"}, + {STATUS_DIRECTORY_NOT_RM, -EIO, "STATUS_DIRECTORY_NOT_RM"}, + {STATUS_TRANSACTIONS_UNSUPPORTED_REMOTE, -EIO, + "STATUS_TRANSACTIONS_UNSUPPORTED_REMOTE"}, + {STATUS_LOG_RESIZE_INVALID_SIZE, -EIO, + "STATUS_LOG_RESIZE_INVALID_SIZE"}, + {STATUS_REMOTE_FILE_VERSION_MISMATCH, -EIO, + "STATUS_REMOTE_FILE_VERSION_MISMATCH"}, + {STATUS_CRM_PROTOCOL_ALREADY_EXISTS, -EIO, + "STATUS_CRM_PROTOCOL_ALREADY_EXISTS"}, + {STATUS_TRANSACTION_PROPAGATION_FAILED, -EIO, + "STATUS_TRANSACTION_PROPAGATION_FAILED"}, + {STATUS_CRM_PROTOCOL_NOT_FOUND, -EIO, "STATUS_CRM_PROTOCOL_NOT_FOUND"}, + {STATUS_TRANSACTION_SUPERIOR_EXISTS, -EIO, + "STATUS_TRANSACTION_SUPERIOR_EXISTS"}, + {STATUS_TRANSACTION_REQUEST_NOT_VALID, -EIO, + "STATUS_TRANSACTION_REQUEST_NOT_VALID"}, + {STATUS_TRANSACTION_NOT_REQUESTED, -EIO, + "STATUS_TRANSACTION_NOT_REQUESTED"}, + {STATUS_TRANSACTION_ALREADY_ABORTED, -EIO, + "STATUS_TRANSACTION_ALREADY_ABORTED"}, + {STATUS_TRANSACTION_ALREADY_COMMITTED, -EIO, + "STATUS_TRANSACTION_ALREADY_COMMITTED"}, + {STATUS_TRANSACTION_INVALID_MARSHALL_BUFFER, -EIO, + "STATUS_TRANSACTION_INVALID_MARSHALL_BUFFER"}, + {STATUS_CURRENT_TRANSACTION_NOT_VALID, -EIO, + "STATUS_CURRENT_TRANSACTION_NOT_VALID"}, + {STATUS_LOG_GROWTH_FAILED, -EIO, "STATUS_LOG_GROWTH_FAILED"}, + {STATUS_OBJECT_NO_LONGER_EXISTS, -EIO, + "STATUS_OBJECT_NO_LONGER_EXISTS"}, + {STATUS_STREAM_MINIVERSION_NOT_FOUND, -EIO, + "STATUS_STREAM_MINIVERSION_NOT_FOUND"}, + {STATUS_STREAM_MINIVERSION_NOT_VALID, -EIO, + "STATUS_STREAM_MINIVERSION_NOT_VALID"}, + {STATUS_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION, -EIO, + "STATUS_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION"}, + {STATUS_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT, -EIO, + "STATUS_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT"}, + {STATUS_CANT_CREATE_MORE_STREAM_MINIVERSIONS, -EIO, + "STATUS_CANT_CREATE_MORE_STREAM_MINIVERSIONS"}, + {STATUS_HANDLE_NO_LONGER_VALID, -EIO, "STATUS_HANDLE_NO_LONGER_VALID"}, + {STATUS_LOG_CORRUPTION_DETECTED, -EIO, + "STATUS_LOG_CORRUPTION_DETECTED"}, + {STATUS_RM_DISCONNECTED, -EIO, "STATUS_RM_DISCONNECTED"}, + {STATUS_ENLISTMENT_NOT_SUPERIOR, -EIO, + "STATUS_ENLISTMENT_NOT_SUPERIOR"}, + {STATUS_FILE_IDENTITY_NOT_PERSISTENT, -EIO, + "STATUS_FILE_IDENTITY_NOT_PERSISTENT"}, + {STATUS_CANT_BREAK_TRANSACTIONAL_DEPENDENCY, -EIO, + "STATUS_CANT_BREAK_TRANSACTIONAL_DEPENDENCY"}, + {STATUS_CANT_CROSS_RM_BOUNDARY, -EIO, "STATUS_CANT_CROSS_RM_BOUNDARY"}, + {STATUS_TXF_DIR_NOT_EMPTY, -EIO, "STATUS_TXF_DIR_NOT_EMPTY"}, + {STATUS_INDOUBT_TRANSACTIONS_EXIST, -EIO, + "STATUS_INDOUBT_TRANSACTIONS_EXIST"}, + {STATUS_TM_VOLATILE, -EIO, "STATUS_TM_VOLATILE"}, + {STATUS_ROLLBACK_TIMER_EXPIRED, -EIO, "STATUS_ROLLBACK_TIMER_EXPIRED"}, + {STATUS_TXF_ATTRIBUTE_CORRUPT, -EIO, "STATUS_TXF_ATTRIBUTE_CORRUPT"}, + {STATUS_EFS_NOT_ALLOWED_IN_TRANSACTION, -EIO, + "STATUS_EFS_NOT_ALLOWED_IN_TRANSACTION"}, + {STATUS_TRANSACTIONAL_OPEN_NOT_ALLOWED, -EIO, + "STATUS_TRANSACTIONAL_OPEN_NOT_ALLOWED"}, + {STATUS_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE, -EIO, + "STATUS_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE"}, + {STATUS_TRANSACTION_REQUIRED_PROMOTION, -EIO, + "STATUS_TRANSACTION_REQUIRED_PROMOTION"}, + {STATUS_CANNOT_EXECUTE_FILE_IN_TRANSACTION, -EIO, + "STATUS_CANNOT_EXECUTE_FILE_IN_TRANSACTION"}, + {STATUS_TRANSACTIONS_NOT_FROZEN, -EIO, + "STATUS_TRANSACTIONS_NOT_FROZEN"}, + {STATUS_TRANSACTION_FREEZE_IN_PROGRESS, -EIO, + "STATUS_TRANSACTION_FREEZE_IN_PROGRESS"}, + {STATUS_NOT_SNAPSHOT_VOLUME, -EIO, "STATUS_NOT_SNAPSHOT_VOLUME"}, + {STATUS_NO_SAVEPOINT_WITH_OPEN_FILES, -EIO, + "STATUS_NO_SAVEPOINT_WITH_OPEN_FILES"}, + {STATUS_SPARSE_NOT_ALLOWED_IN_TRANSACTION, -EIO, + "STATUS_SPARSE_NOT_ALLOWED_IN_TRANSACTION"}, + {STATUS_TM_IDENTITY_MISMATCH, -EIO, "STATUS_TM_IDENTITY_MISMATCH"}, + {STATUS_FLOATED_SECTION, -EIO, "STATUS_FLOATED_SECTION"}, + {STATUS_CANNOT_ACCEPT_TRANSACTED_WORK, -EIO, + "STATUS_CANNOT_ACCEPT_TRANSACTED_WORK"}, + {STATUS_CANNOT_ABORT_TRANSACTIONS, -EIO, + "STATUS_CANNOT_ABORT_TRANSACTIONS"}, + {STATUS_TRANSACTION_NOT_FOUND, -EIO, "STATUS_TRANSACTION_NOT_FOUND"}, + {STATUS_RESOURCEMANAGER_NOT_FOUND, -EIO, + "STATUS_RESOURCEMANAGER_NOT_FOUND"}, + {STATUS_ENLISTMENT_NOT_FOUND, -EIO, "STATUS_ENLISTMENT_NOT_FOUND"}, + {STATUS_TRANSACTIONMANAGER_NOT_FOUND, -EIO, + "STATUS_TRANSACTIONMANAGER_NOT_FOUND"}, + {STATUS_TRANSACTIONMANAGER_NOT_ONLINE, -EIO, + "STATUS_TRANSACTIONMANAGER_NOT_ONLINE"}, + {STATUS_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION, -EIO, + "STATUS_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION"}, + {STATUS_TRANSACTION_NOT_ROOT, -EIO, "STATUS_TRANSACTION_NOT_ROOT"}, + {STATUS_TRANSACTION_OBJECT_EXPIRED, -EIO, + "STATUS_TRANSACTION_OBJECT_EXPIRED"}, + {STATUS_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION, -EIO, + "STATUS_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION"}, + {STATUS_TRANSACTION_RESPONSE_NOT_ENLISTED, -EIO, + "STATUS_TRANSACTION_RESPONSE_NOT_ENLISTED"}, + {STATUS_TRANSACTION_RECORD_TOO_LONG, -EIO, + "STATUS_TRANSACTION_RECORD_TOO_LONG"}, + {STATUS_NO_LINK_TRACKING_IN_TRANSACTION, -EIO, + "STATUS_NO_LINK_TRACKING_IN_TRANSACTION"}, + {STATUS_OPERATION_NOT_SUPPORTED_IN_TRANSACTION, -EOPNOTSUPP, + "STATUS_OPERATION_NOT_SUPPORTED_IN_TRANSACTION"}, + {STATUS_TRANSACTION_INTEGRITY_VIOLATED, -EIO, + "STATUS_TRANSACTION_INTEGRITY_VIOLATED"}, + {STATUS_LOG_SECTOR_INVALID, -EIO, "STATUS_LOG_SECTOR_INVALID"}, + {STATUS_LOG_SECTOR_PARITY_INVALID, -EIO, + "STATUS_LOG_SECTOR_PARITY_INVALID"}, + {STATUS_LOG_SECTOR_REMAPPED, -EIO, "STATUS_LOG_SECTOR_REMAPPED"}, + {STATUS_LOG_BLOCK_INCOMPLETE, -EIO, "STATUS_LOG_BLOCK_INCOMPLETE"}, + {STATUS_LOG_INVALID_RANGE, -EIO, "STATUS_LOG_INVALID_RANGE"}, + {STATUS_LOG_BLOCKS_EXHAUSTED, -EIO, "STATUS_LOG_BLOCKS_EXHAUSTED"}, + {STATUS_LOG_READ_CONTEXT_INVALID, -EIO, + "STATUS_LOG_READ_CONTEXT_INVALID"}, + {STATUS_LOG_RESTART_INVALID, -EIO, "STATUS_LOG_RESTART_INVALID"}, + {STATUS_LOG_BLOCK_VERSION, -EIO, "STATUS_LOG_BLOCK_VERSION"}, + {STATUS_LOG_BLOCK_INVALID, -EIO, "STATUS_LOG_BLOCK_INVALID"}, + {STATUS_LOG_READ_MODE_INVALID, -EIO, "STATUS_LOG_READ_MODE_INVALID"}, + {STATUS_LOG_METADATA_CORRUPT, -EIO, "STATUS_LOG_METADATA_CORRUPT"}, + {STATUS_LOG_METADATA_INVALID, -EIO, "STATUS_LOG_METADATA_INVALID"}, + {STATUS_LOG_METADATA_INCONSISTENT, -EIO, + "STATUS_LOG_METADATA_INCONSISTENT"}, + {STATUS_LOG_RESERVATION_INVALID, -EIO, + "STATUS_LOG_RESERVATION_INVALID"}, + {STATUS_LOG_CANT_DELETE, -EIO, "STATUS_LOG_CANT_DELETE"}, + {STATUS_LOG_CONTAINER_LIMIT_EXCEEDED, -EIO, + "STATUS_LOG_CONTAINER_LIMIT_EXCEEDED"}, + {STATUS_LOG_START_OF_LOG, -EIO, "STATUS_LOG_START_OF_LOG"}, + {STATUS_LOG_POLICY_ALREADY_INSTALLED, -EIO, + "STATUS_LOG_POLICY_ALREADY_INSTALLED"}, + {STATUS_LOG_POLICY_NOT_INSTALLED, -EIO, + "STATUS_LOG_POLICY_NOT_INSTALLED"}, + {STATUS_LOG_POLICY_INVALID, -EIO, "STATUS_LOG_POLICY_INVALID"}, + {STATUS_LOG_POLICY_CONFLICT, -EIO, "STATUS_LOG_POLICY_CONFLICT"}, + {STATUS_LOG_PINNED_ARCHIVE_TAIL, -EIO, + "STATUS_LOG_PINNED_ARCHIVE_TAIL"}, + {STATUS_LOG_RECORD_NONEXISTENT, -EIO, "STATUS_LOG_RECORD_NONEXISTENT"}, + {STATUS_LOG_RECORDS_RESERVED_INVALID, -EIO, + "STATUS_LOG_RECORDS_RESERVED_INVALID"}, + {STATUS_LOG_SPACE_RESERVED_INVALID, -EIO, + "STATUS_LOG_SPACE_RESERVED_INVALID"}, + {STATUS_LOG_TAIL_INVALID, -EIO, "STATUS_LOG_TAIL_INVALID"}, + {STATUS_LOG_FULL, -EIO, "STATUS_LOG_FULL"}, + {STATUS_LOG_MULTIPLEXED, -EIO, "STATUS_LOG_MULTIPLEXED"}, + {STATUS_LOG_DEDICATED, -EIO, "STATUS_LOG_DEDICATED"}, + {STATUS_LOG_ARCHIVE_NOT_IN_PROGRESS, -EIO, + "STATUS_LOG_ARCHIVE_NOT_IN_PROGRESS"}, + {STATUS_LOG_ARCHIVE_IN_PROGRESS, -EIO, + "STATUS_LOG_ARCHIVE_IN_PROGRESS"}, + {STATUS_LOG_EPHEMERAL, -EIO, "STATUS_LOG_EPHEMERAL"}, + {STATUS_LOG_NOT_ENOUGH_CONTAINERS, -EIO, + "STATUS_LOG_NOT_ENOUGH_CONTAINERS"}, + {STATUS_LOG_CLIENT_ALREADY_REGISTERED, -EIO, + "STATUS_LOG_CLIENT_ALREADY_REGISTERED"}, + {STATUS_LOG_CLIENT_NOT_REGISTERED, -EIO, + "STATUS_LOG_CLIENT_NOT_REGISTERED"}, + {STATUS_LOG_FULL_HANDLER_IN_PROGRESS, -EIO, + "STATUS_LOG_FULL_HANDLER_IN_PROGRESS"}, + {STATUS_LOG_CONTAINER_READ_FAILED, -EIO, + "STATUS_LOG_CONTAINER_READ_FAILED"}, + {STATUS_LOG_CONTAINER_WRITE_FAILED, -EIO, + "STATUS_LOG_CONTAINER_WRITE_FAILED"}, + {STATUS_LOG_CONTAINER_OPEN_FAILED, -EIO, + "STATUS_LOG_CONTAINER_OPEN_FAILED"}, + {STATUS_LOG_CONTAINER_STATE_INVALID, -EIO, + "STATUS_LOG_CONTAINER_STATE_INVALID"}, + {STATUS_LOG_STATE_INVALID, -EIO, "STATUS_LOG_STATE_INVALID"}, + {STATUS_LOG_PINNED, -EIO, "STATUS_LOG_PINNED"}, + {STATUS_LOG_METADATA_FLUSH_FAILED, -EIO, + "STATUS_LOG_METADATA_FLUSH_FAILED"}, + {STATUS_LOG_INCONSISTENT_SECURITY, -EIO, + "STATUS_LOG_INCONSISTENT_SECURITY"}, + {STATUS_LOG_APPENDED_FLUSH_FAILED, -EIO, + "STATUS_LOG_APPENDED_FLUSH_FAILED"}, + {STATUS_LOG_PINNED_RESERVATION, -EIO, "STATUS_LOG_PINNED_RESERVATION"}, + {STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD, -EIO, + "STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD"}, + {STATUS_FLT_NO_HANDLER_DEFINED, -EIO, "STATUS_FLT_NO_HANDLER_DEFINED"}, + {STATUS_FLT_CONTEXT_ALREADY_DEFINED, -EIO, + "STATUS_FLT_CONTEXT_ALREADY_DEFINED"}, + {STATUS_FLT_INVALID_ASYNCHRONOUS_REQUEST, -EIO, + "STATUS_FLT_INVALID_ASYNCHRONOUS_REQUEST"}, + {STATUS_FLT_DISALLOW_FAST_IO, -EIO, "STATUS_FLT_DISALLOW_FAST_IO"}, + {STATUS_FLT_INVALID_NAME_REQUEST, -EIO, + "STATUS_FLT_INVALID_NAME_REQUEST"}, + {STATUS_FLT_NOT_SAFE_TO_POST_OPERATION, -EIO, + "STATUS_FLT_NOT_SAFE_TO_POST_OPERATION"}, + {STATUS_FLT_NOT_INITIALIZED, -EIO, "STATUS_FLT_NOT_INITIALIZED"}, + {STATUS_FLT_FILTER_NOT_READY, -EIO, "STATUS_FLT_FILTER_NOT_READY"}, + {STATUS_FLT_POST_OPERATION_CLEANUP, -EIO, + "STATUS_FLT_POST_OPERATION_CLEANUP"}, + {STATUS_FLT_INTERNAL_ERROR, -EIO, "STATUS_FLT_INTERNAL_ERROR"}, + {STATUS_FLT_DELETING_OBJECT, -EIO, "STATUS_FLT_DELETING_OBJECT"}, + {STATUS_FLT_MUST_BE_NONPAGED_POOL, -EIO, + "STATUS_FLT_MUST_BE_NONPAGED_POOL"}, + {STATUS_FLT_DUPLICATE_ENTRY, -EIO, "STATUS_FLT_DUPLICATE_ENTRY"}, + {STATUS_FLT_CBDQ_DISABLED, -EIO, "STATUS_FLT_CBDQ_DISABLED"}, + {STATUS_FLT_DO_NOT_ATTACH, -EIO, "STATUS_FLT_DO_NOT_ATTACH"}, + {STATUS_FLT_DO_NOT_DETACH, -EIO, "STATUS_FLT_DO_NOT_DETACH"}, + {STATUS_FLT_INSTANCE_ALTITUDE_COLLISION, -EIO, + "STATUS_FLT_INSTANCE_ALTITUDE_COLLISION"}, + {STATUS_FLT_INSTANCE_NAME_COLLISION, -EIO, + "STATUS_FLT_INSTANCE_NAME_COLLISION"}, + {STATUS_FLT_FILTER_NOT_FOUND, -EIO, "STATUS_FLT_FILTER_NOT_FOUND"}, + {STATUS_FLT_VOLUME_NOT_FOUND, -EIO, "STATUS_FLT_VOLUME_NOT_FOUND"}, + {STATUS_FLT_INSTANCE_NOT_FOUND, -EIO, "STATUS_FLT_INSTANCE_NOT_FOUND"}, + {STATUS_FLT_CONTEXT_ALLOCATION_NOT_FOUND, -EIO, + "STATUS_FLT_CONTEXT_ALLOCATION_NOT_FOUND"}, + {STATUS_FLT_INVALID_CONTEXT_REGISTRATION, -EIO, + "STATUS_FLT_INVALID_CONTEXT_REGISTRATION"}, + {STATUS_FLT_NAME_CACHE_MISS, -EIO, "STATUS_FLT_NAME_CACHE_MISS"}, + {STATUS_FLT_NO_DEVICE_OBJECT, -EIO, "STATUS_FLT_NO_DEVICE_OBJECT"}, + {STATUS_FLT_VOLUME_ALREADY_MOUNTED, -EIO, + "STATUS_FLT_VOLUME_ALREADY_MOUNTED"}, + {STATUS_FLT_ALREADY_ENLISTED, -EIO, "STATUS_FLT_ALREADY_ENLISTED"}, + {STATUS_FLT_CONTEXT_ALREADY_LINKED, -EIO, + "STATUS_FLT_CONTEXT_ALREADY_LINKED"}, + {STATUS_FLT_NO_WAITER_FOR_REPLY, -EIO, + "STATUS_FLT_NO_WAITER_FOR_REPLY"}, + {STATUS_MONITOR_NO_DESCRIPTOR, -EIO, "STATUS_MONITOR_NO_DESCRIPTOR"}, + {STATUS_MONITOR_UNKNOWN_DESCRIPTOR_FORMAT, -EIO, + "STATUS_MONITOR_UNKNOWN_DESCRIPTOR_FORMAT"}, + {STATUS_MONITOR_INVALID_DESCRIPTOR_CHECKSUM, -EIO, + "STATUS_MONITOR_INVALID_DESCRIPTOR_CHECKSUM"}, + {STATUS_MONITOR_INVALID_STANDARD_TIMING_BLOCK, -EIO, + "STATUS_MONITOR_INVALID_STANDARD_TIMING_BLOCK"}, + {STATUS_MONITOR_WMI_DATABLOCK_REGISTRATION_FAILED, -EIO, + "STATUS_MONITOR_WMI_DATABLOCK_REGISTRATION_FAILED"}, + {STATUS_MONITOR_INVALID_SERIAL_NUMBER_MONDSC_BLOCK, -EIO, + "STATUS_MONITOR_INVALID_SERIAL_NUMBER_MONDSC_BLOCK"}, + {STATUS_MONITOR_INVALID_USER_FRIENDLY_MONDSC_BLOCK, -EIO, + "STATUS_MONITOR_INVALID_USER_FRIENDLY_MONDSC_BLOCK"}, + {STATUS_MONITOR_NO_MORE_DESCRIPTOR_DATA, -EIO, + "STATUS_MONITOR_NO_MORE_DESCRIPTOR_DATA"}, + {STATUS_MONITOR_INVALID_DETAILED_TIMING_BLOCK, -EIO, + "STATUS_MONITOR_INVALID_DETAILED_TIMING_BLOCK"}, + {STATUS_GRAPHICS_NOT_EXCLUSIVE_MODE_OWNER, -EIO, + "STATUS_GRAPHICS_NOT_EXCLUSIVE_MODE_OWNER"}, + {STATUS_GRAPHICS_INSUFFICIENT_DMA_BUFFER, -EIO, + "STATUS_GRAPHICS_INSUFFICIENT_DMA_BUFFER"}, + {STATUS_GRAPHICS_INVALID_DISPLAY_ADAPTER, -EIO, + "STATUS_GRAPHICS_INVALID_DISPLAY_ADAPTER"}, + {STATUS_GRAPHICS_ADAPTER_WAS_RESET, -EIO, + "STATUS_GRAPHICS_ADAPTER_WAS_RESET"}, + {STATUS_GRAPHICS_INVALID_DRIVER_MODEL, -EIO, + "STATUS_GRAPHICS_INVALID_DRIVER_MODEL"}, + {STATUS_GRAPHICS_PRESENT_MODE_CHANGED, -EIO, + "STATUS_GRAPHICS_PRESENT_MODE_CHANGED"}, + {STATUS_GRAPHICS_PRESENT_OCCLUDED, -EIO, + "STATUS_GRAPHICS_PRESENT_OCCLUDED"}, + {STATUS_GRAPHICS_PRESENT_DENIED, -EIO, + "STATUS_GRAPHICS_PRESENT_DENIED"}, + {STATUS_GRAPHICS_CANNOTCOLORCONVERT, -EIO, + "STATUS_GRAPHICS_CANNOTCOLORCONVERT"}, + {STATUS_GRAPHICS_NO_VIDEO_MEMORY, -EIO, + "STATUS_GRAPHICS_NO_VIDEO_MEMORY"}, + {STATUS_GRAPHICS_CANT_LOCK_MEMORY, -EIO, + "STATUS_GRAPHICS_CANT_LOCK_MEMORY"}, + {STATUS_GRAPHICS_ALLOCATION_BUSY, -EBUSY, + "STATUS_GRAPHICS_ALLOCATION_BUSY"}, + {STATUS_GRAPHICS_TOO_MANY_REFERENCES, -EIO, + "STATUS_GRAPHICS_TOO_MANY_REFERENCES"}, + {STATUS_GRAPHICS_TRY_AGAIN_LATER, -EIO, + "STATUS_GRAPHICS_TRY_AGAIN_LATER"}, + {STATUS_GRAPHICS_TRY_AGAIN_NOW, -EIO, "STATUS_GRAPHICS_TRY_AGAIN_NOW"}, + {STATUS_GRAPHICS_ALLOCATION_INVALID, -EIO, + "STATUS_GRAPHICS_ALLOCATION_INVALID"}, + {STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNAVAILABLE, -EIO, + "STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNAVAILABLE"}, + {STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNSUPPORTED, -EIO, + "STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNSUPPORTED"}, + {STATUS_GRAPHICS_CANT_EVICT_PINNED_ALLOCATION, -EIO, + "STATUS_GRAPHICS_CANT_EVICT_PINNED_ALLOCATION"}, + {STATUS_GRAPHICS_INVALID_ALLOCATION_USAGE, -EIO, + "STATUS_GRAPHICS_INVALID_ALLOCATION_USAGE"}, + {STATUS_GRAPHICS_CANT_RENDER_LOCKED_ALLOCATION, -EIO, + "STATUS_GRAPHICS_CANT_RENDER_LOCKED_ALLOCATION"}, + {STATUS_GRAPHICS_ALLOCATION_CLOSED, -EIO, + "STATUS_GRAPHICS_ALLOCATION_CLOSED"}, + {STATUS_GRAPHICS_INVALID_ALLOCATION_INSTANCE, -EIO, + "STATUS_GRAPHICS_INVALID_ALLOCATION_INSTANCE"}, + {STATUS_GRAPHICS_INVALID_ALLOCATION_HANDLE, -EIO, + "STATUS_GRAPHICS_INVALID_ALLOCATION_HANDLE"}, + {STATUS_GRAPHICS_WRONG_ALLOCATION_DEVICE, -EIO, + "STATUS_GRAPHICS_WRONG_ALLOCATION_DEVICE"}, + {STATUS_GRAPHICS_ALLOCATION_CONTENT_LOST, -EIO, + "STATUS_GRAPHICS_ALLOCATION_CONTENT_LOST"}, + {STATUS_GRAPHICS_GPU_EXCEPTION_ON_DEVICE, -EIO, + "STATUS_GRAPHICS_GPU_EXCEPTION_ON_DEVICE"}, + {STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY"}, + {STATUS_GRAPHICS_VIDPN_TOPOLOGY_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_VIDPN_TOPOLOGY_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_VIDPN_TOPOLOGY_CURRENTLY_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_VIDPN_TOPOLOGY_CURRENTLY_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_INVALID_VIDPN, -EIO, "STATUS_GRAPHICS_INVALID_VIDPN"}, + {STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE"}, + {STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET"}, + {STATUS_GRAPHICS_VIDPN_MODALITY_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_VIDPN_MODALITY_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_INVALID_VIDPN_SOURCEMODESET, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_SOURCEMODESET"}, + {STATUS_GRAPHICS_INVALID_VIDPN_TARGETMODESET, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_TARGETMODESET"}, + {STATUS_GRAPHICS_INVALID_FREQUENCY, -EIO, + "STATUS_GRAPHICS_INVALID_FREQUENCY"}, + {STATUS_GRAPHICS_INVALID_ACTIVE_REGION, -EIO, + "STATUS_GRAPHICS_INVALID_ACTIVE_REGION"}, + {STATUS_GRAPHICS_INVALID_TOTAL_REGION, -EIO, + "STATUS_GRAPHICS_INVALID_TOTAL_REGION"}, + {STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE_MODE, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE_MODE"}, + {STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET_MODE, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET_MODE"}, + {STATUS_GRAPHICS_PINNED_MODE_MUST_REMAIN_IN_SET, -EIO, + "STATUS_GRAPHICS_PINNED_MODE_MUST_REMAIN_IN_SET"}, + {STATUS_GRAPHICS_PATH_ALREADY_IN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_PATH_ALREADY_IN_TOPOLOGY"}, + {STATUS_GRAPHICS_MODE_ALREADY_IN_MODESET, -EIO, + "STATUS_GRAPHICS_MODE_ALREADY_IN_MODESET"}, + {STATUS_GRAPHICS_INVALID_VIDEOPRESENTSOURCESET, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEOPRESENTSOURCESET"}, + {STATUS_GRAPHICS_INVALID_VIDEOPRESENTTARGETSET, -EIO, + "STATUS_GRAPHICS_INVALID_VIDEOPRESENTTARGETSET"}, + {STATUS_GRAPHICS_SOURCE_ALREADY_IN_SET, -EIO, + "STATUS_GRAPHICS_SOURCE_ALREADY_IN_SET"}, + {STATUS_GRAPHICS_TARGET_ALREADY_IN_SET, -EIO, + "STATUS_GRAPHICS_TARGET_ALREADY_IN_SET"}, + {STATUS_GRAPHICS_INVALID_VIDPN_PRESENT_PATH, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_PRESENT_PATH"}, + {STATUS_GRAPHICS_NO_RECOMMENDED_VIDPN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_NO_RECOMMENDED_VIDPN_TOPOLOGY"}, + {STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGESET, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGESET"}, + {STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE"}, + {STATUS_GRAPHICS_FREQUENCYRANGE_NOT_IN_SET, -EIO, + "STATUS_GRAPHICS_FREQUENCYRANGE_NOT_IN_SET"}, + {STATUS_GRAPHICS_FREQUENCYRANGE_ALREADY_IN_SET, -EIO, + "STATUS_GRAPHICS_FREQUENCYRANGE_ALREADY_IN_SET"}, + {STATUS_GRAPHICS_STALE_MODESET, -EIO, "STATUS_GRAPHICS_STALE_MODESET"}, + {STATUS_GRAPHICS_INVALID_MONITOR_SOURCEMODESET, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_SOURCEMODESET"}, + {STATUS_GRAPHICS_INVALID_MONITOR_SOURCE_MODE, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_SOURCE_MODE"}, + {STATUS_GRAPHICS_NO_RECOMMENDED_FUNCTIONAL_VIDPN, -EIO, + "STATUS_GRAPHICS_NO_RECOMMENDED_FUNCTIONAL_VIDPN"}, + {STATUS_GRAPHICS_MODE_ID_MUST_BE_UNIQUE, -EIO, + "STATUS_GRAPHICS_MODE_ID_MUST_BE_UNIQUE"}, + {STATUS_GRAPHICS_EMPTY_ADAPTER_MONITOR_MODE_SUPPORT_INTERSECTION, -EIO, + "STATUS_GRAPHICS_EMPTY_ADAPTER_MONITOR_MODE_SUPPORT_INTERSECTION"}, + {STATUS_GRAPHICS_VIDEO_PRESENT_TARGETS_LESS_THAN_SOURCES, -EIO, + "STATUS_GRAPHICS_VIDEO_PRESENT_TARGETS_LESS_THAN_SOURCES"}, + {STATUS_GRAPHICS_PATH_NOT_IN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_PATH_NOT_IN_TOPOLOGY"}, + {STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_SOURCE, -EIO, + "STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_SOURCE"}, + {STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_TARGET, -EIO, + "STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_TARGET"}, + {STATUS_GRAPHICS_INVALID_MONITORDESCRIPTORSET, -EIO, + "STATUS_GRAPHICS_INVALID_MONITORDESCRIPTORSET"}, + {STATUS_GRAPHICS_INVALID_MONITORDESCRIPTOR, -EIO, + "STATUS_GRAPHICS_INVALID_MONITORDESCRIPTOR"}, + {STATUS_GRAPHICS_MONITORDESCRIPTOR_NOT_IN_SET, -EIO, + "STATUS_GRAPHICS_MONITORDESCRIPTOR_NOT_IN_SET"}, + {STATUS_GRAPHICS_MONITORDESCRIPTOR_ALREADY_IN_SET, -EIO, + "STATUS_GRAPHICS_MONITORDESCRIPTOR_ALREADY_IN_SET"}, + {STATUS_GRAPHICS_MONITORDESCRIPTOR_ID_MUST_BE_UNIQUE, -EIO, + "STATUS_GRAPHICS_MONITORDESCRIPTOR_ID_MUST_BE_UNIQUE"}, + {STATUS_GRAPHICS_INVALID_VIDPN_TARGET_SUBSET_TYPE, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_TARGET_SUBSET_TYPE"}, + {STATUS_GRAPHICS_RESOURCES_NOT_RELATED, -EIO, + "STATUS_GRAPHICS_RESOURCES_NOT_RELATED"}, + {STATUS_GRAPHICS_SOURCE_ID_MUST_BE_UNIQUE, -EIO, + "STATUS_GRAPHICS_SOURCE_ID_MUST_BE_UNIQUE"}, + {STATUS_GRAPHICS_TARGET_ID_MUST_BE_UNIQUE, -EIO, + "STATUS_GRAPHICS_TARGET_ID_MUST_BE_UNIQUE"}, + {STATUS_GRAPHICS_NO_AVAILABLE_VIDPN_TARGET, -EIO, + "STATUS_GRAPHICS_NO_AVAILABLE_VIDPN_TARGET"}, + {STATUS_GRAPHICS_MONITOR_COULD_NOT_BE_ASSOCIATED_WITH_ADAPTER, -EIO, + "STATUS_GRAPHICS_MONITOR_COULD_NOT_BE_ASSOCIATED_WITH_ADAPTER"}, + {STATUS_GRAPHICS_NO_VIDPNMGR, -EIO, "STATUS_GRAPHICS_NO_VIDPNMGR"}, + {STATUS_GRAPHICS_NO_ACTIVE_VIDPN, -EIO, + "STATUS_GRAPHICS_NO_ACTIVE_VIDPN"}, + {STATUS_GRAPHICS_STALE_VIDPN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_STALE_VIDPN_TOPOLOGY"}, + {STATUS_GRAPHICS_MONITOR_NOT_CONNECTED, -EIO, + "STATUS_GRAPHICS_MONITOR_NOT_CONNECTED"}, + {STATUS_GRAPHICS_SOURCE_NOT_IN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_SOURCE_NOT_IN_TOPOLOGY"}, + {STATUS_GRAPHICS_INVALID_PRIMARYSURFACE_SIZE, -EIO, + "STATUS_GRAPHICS_INVALID_PRIMARYSURFACE_SIZE"}, + {STATUS_GRAPHICS_INVALID_VISIBLEREGION_SIZE, -EIO, + "STATUS_GRAPHICS_INVALID_VISIBLEREGION_SIZE"}, + {STATUS_GRAPHICS_INVALID_STRIDE, -EIO, + "STATUS_GRAPHICS_INVALID_STRIDE"}, + {STATUS_GRAPHICS_INVALID_PIXELFORMAT, -EIO, + "STATUS_GRAPHICS_INVALID_PIXELFORMAT"}, + {STATUS_GRAPHICS_INVALID_COLORBASIS, -EIO, + "STATUS_GRAPHICS_INVALID_COLORBASIS"}, + {STATUS_GRAPHICS_INVALID_PIXELVALUEACCESSMODE, -EIO, + "STATUS_GRAPHICS_INVALID_PIXELVALUEACCESSMODE"}, + {STATUS_GRAPHICS_TARGET_NOT_IN_TOPOLOGY, -EIO, + "STATUS_GRAPHICS_TARGET_NOT_IN_TOPOLOGY"}, + {STATUS_GRAPHICS_NO_DISPLAY_MODE_MANAGEMENT_SUPPORT, -EIO, + "STATUS_GRAPHICS_NO_DISPLAY_MODE_MANAGEMENT_SUPPORT"}, + {STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE, -EIO, + "STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE"}, + {STATUS_GRAPHICS_CANT_ACCESS_ACTIVE_VIDPN, -EIO, + "STATUS_GRAPHICS_CANT_ACCESS_ACTIVE_VIDPN"}, + {STATUS_GRAPHICS_INVALID_PATH_IMPORTANCE_ORDINAL, -EIO, + "STATUS_GRAPHICS_INVALID_PATH_IMPORTANCE_ORDINAL"}, + {STATUS_GRAPHICS_INVALID_PATH_CONTENT_GEOMETRY_TRANSFORMATION, -EIO, + "STATUS_GRAPHICS_INVALID_PATH_CONTENT_GEOMETRY_TRANSFORMATION"}, + {STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_SUPPORTED, + -EIO, + "STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_INVALID_GAMMA_RAMP, -EIO, + "STATUS_GRAPHICS_INVALID_GAMMA_RAMP"}, + {STATUS_GRAPHICS_GAMMA_RAMP_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_GAMMA_RAMP_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_MULTISAMPLING_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_MULTISAMPLING_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_MODE_NOT_IN_MODESET, -EIO, + "STATUS_GRAPHICS_MODE_NOT_IN_MODESET"}, + {STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY_RECOMMENDATION_REASON, -EIO, + "STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY_RECOMMENDATION_REASON"}, + {STATUS_GRAPHICS_INVALID_PATH_CONTENT_TYPE, -EIO, + "STATUS_GRAPHICS_INVALID_PATH_CONTENT_TYPE"}, + {STATUS_GRAPHICS_INVALID_COPYPROTECTION_TYPE, -EIO, + "STATUS_GRAPHICS_INVALID_COPYPROTECTION_TYPE"}, + {STATUS_GRAPHICS_UNASSIGNED_MODESET_ALREADY_EXISTS, -EIO, + "STATUS_GRAPHICS_UNASSIGNED_MODESET_ALREADY_EXISTS"}, + {STATUS_GRAPHICS_INVALID_SCANLINE_ORDERING, -EIO, + "STATUS_GRAPHICS_INVALID_SCANLINE_ORDERING"}, + {STATUS_GRAPHICS_TOPOLOGY_CHANGES_NOT_ALLOWED, -EIO, + "STATUS_GRAPHICS_TOPOLOGY_CHANGES_NOT_ALLOWED"}, + {STATUS_GRAPHICS_NO_AVAILABLE_IMPORTANCE_ORDINALS, -EIO, + "STATUS_GRAPHICS_NO_AVAILABLE_IMPORTANCE_ORDINALS"}, + {STATUS_GRAPHICS_INCOMPATIBLE_PRIVATE_FORMAT, -EIO, + "STATUS_GRAPHICS_INCOMPATIBLE_PRIVATE_FORMAT"}, + {STATUS_GRAPHICS_INVALID_MODE_PRUNING_ALGORITHM, -EIO, + "STATUS_GRAPHICS_INVALID_MODE_PRUNING_ALGORITHM"}, + {STATUS_GRAPHICS_INVALID_MONITOR_CAPABILITY_ORIGIN, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_CAPABILITY_ORIGIN"}, + {STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE_CONSTRAINT, -EIO, + "STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE_CONSTRAINT"}, + {STATUS_GRAPHICS_MAX_NUM_PATHS_REACHED, -EIO, + "STATUS_GRAPHICS_MAX_NUM_PATHS_REACHED"}, + {STATUS_GRAPHICS_CANCEL_VIDPN_TOPOLOGY_AUGMENTATION, -EIO, + "STATUS_GRAPHICS_CANCEL_VIDPN_TOPOLOGY_AUGMENTATION"}, + {STATUS_GRAPHICS_INVALID_CLIENT_TYPE, -EIO, + "STATUS_GRAPHICS_INVALID_CLIENT_TYPE"}, + {STATUS_GRAPHICS_CLIENTVIDPN_NOT_SET, -EIO, + "STATUS_GRAPHICS_CLIENTVIDPN_NOT_SET"}, + {STATUS_GRAPHICS_SPECIFIED_CHILD_ALREADY_CONNECTED, -EIO, + "STATUS_GRAPHICS_SPECIFIED_CHILD_ALREADY_CONNECTED"}, + {STATUS_GRAPHICS_CHILD_DESCRIPTOR_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_CHILD_DESCRIPTOR_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_NOT_A_LINKED_ADAPTER, -EIO, + "STATUS_GRAPHICS_NOT_A_LINKED_ADAPTER"}, + {STATUS_GRAPHICS_LEADLINK_NOT_ENUMERATED, -EIO, + "STATUS_GRAPHICS_LEADLINK_NOT_ENUMERATED"}, + {STATUS_GRAPHICS_CHAINLINKS_NOT_ENUMERATED, -EIO, + "STATUS_GRAPHICS_CHAINLINKS_NOT_ENUMERATED"}, + {STATUS_GRAPHICS_ADAPTER_CHAIN_NOT_READY, -EIO, + "STATUS_GRAPHICS_ADAPTER_CHAIN_NOT_READY"}, + {STATUS_GRAPHICS_CHAINLINKS_NOT_STARTED, -EIO, + "STATUS_GRAPHICS_CHAINLINKS_NOT_STARTED"}, + {STATUS_GRAPHICS_CHAINLINKS_NOT_POWERED_ON, -EIO, + "STATUS_GRAPHICS_CHAINLINKS_NOT_POWERED_ON"}, + {STATUS_GRAPHICS_INCONSISTENT_DEVICE_LINK_STATE, -EIO, + "STATUS_GRAPHICS_INCONSISTENT_DEVICE_LINK_STATE"}, + {STATUS_GRAPHICS_NOT_POST_DEVICE_DRIVER, -EIO, + "STATUS_GRAPHICS_NOT_POST_DEVICE_DRIVER"}, + {STATUS_GRAPHICS_ADAPTER_ACCESS_NOT_EXCLUDED, -EIO, + "STATUS_GRAPHICS_ADAPTER_ACCESS_NOT_EXCLUDED"}, + {STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_COPP_SEMANTICS, + -EIO, + "STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_COPP_SEMANTICS"}, + {STATUS_GRAPHICS_OPM_INVALID_INFORMATION_REQUEST, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_INFORMATION_REQUEST"}, + {STATUS_GRAPHICS_OPM_DRIVER_INTERNAL_ERROR, -EIO, + "STATUS_GRAPHICS_OPM_DRIVER_INTERNAL_ERROR"}, + {STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_OPM_SEMANTICS, -EIO, + "STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_OPM_SEMANTICS"}, + {STATUS_GRAPHICS_OPM_SIGNALING_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_OPM_SIGNALING_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_OPM_INVALID_CONFIGURATION_REQUEST, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_CONFIGURATION_REQUEST"}, + {STATUS_GRAPHICS_OPM_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_OPM_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_COPP_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_COPP_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_UAB_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_UAB_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_OPM_INVALID_ENCRYPTED_PARAMETERS, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_ENCRYPTED_PARAMETERS"}, + {STATUS_GRAPHICS_OPM_PARAMETER_ARRAY_TOO_SMALL, -EIO, + "STATUS_GRAPHICS_OPM_PARAMETER_ARRAY_TOO_SMALL"}, + {STATUS_GRAPHICS_OPM_NO_PROTECTED_OUTPUTS_EXIST, -EIO, + "STATUS_GRAPHICS_OPM_NO_PROTECTED_OUTPUTS_EXIST"}, + {STATUS_GRAPHICS_PVP_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME, -EIO, + "STATUS_GRAPHICS_PVP_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME"}, + {STATUS_GRAPHICS_PVP_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP, -EIO, + "STATUS_GRAPHICS_PVP_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP"}, + {STATUS_GRAPHICS_PVP_MIRRORING_DEVICES_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_PVP_MIRRORING_DEVICES_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_OPM_INVALID_POINTER, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_POINTER"}, + {STATUS_GRAPHICS_OPM_INTERNAL_ERROR, -EIO, + "STATUS_GRAPHICS_OPM_INTERNAL_ERROR"}, + {STATUS_GRAPHICS_OPM_INVALID_HANDLE, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_HANDLE"}, + {STATUS_GRAPHICS_PVP_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE, -EIO, + "STATUS_GRAPHICS_PVP_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE"}, + {STATUS_GRAPHICS_PVP_INVALID_CERTIFICATE_LENGTH, -EIO, + "STATUS_GRAPHICS_PVP_INVALID_CERTIFICATE_LENGTH"}, + {STATUS_GRAPHICS_OPM_SPANNING_MODE_ENABLED, -EIO, + "STATUS_GRAPHICS_OPM_SPANNING_MODE_ENABLED"}, + {STATUS_GRAPHICS_OPM_THEATER_MODE_ENABLED, -EIO, + "STATUS_GRAPHICS_OPM_THEATER_MODE_ENABLED"}, + {STATUS_GRAPHICS_PVP_HFS_FAILED, -EIO, + "STATUS_GRAPHICS_PVP_HFS_FAILED"}, + {STATUS_GRAPHICS_OPM_INVALID_SRM, -EIO, + "STATUS_GRAPHICS_OPM_INVALID_SRM"}, + {STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_HDCP, -EIO, + "STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_HDCP"}, + {STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_ACP, -EIO, + "STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_ACP"}, + {STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_CGMSA, -EIO, + "STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_CGMSA"}, + {STATUS_GRAPHICS_OPM_HDCP_SRM_NEVER_SET, -EIO, + "STATUS_GRAPHICS_OPM_HDCP_SRM_NEVER_SET"}, + {STATUS_GRAPHICS_OPM_RESOLUTION_TOO_HIGH, -EIO, + "STATUS_GRAPHICS_OPM_RESOLUTION_TOO_HIGH"}, + {STATUS_GRAPHICS_OPM_ALL_HDCP_HARDWARE_ALREADY_IN_USE, -EIO, + "STATUS_GRAPHICS_OPM_ALL_HDCP_HARDWARE_ALREADY_IN_USE"}, + {STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_NO_LONGER_EXISTS, -EIO, + "STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_NO_LONGER_EXISTS"}, + {STATUS_GRAPHICS_OPM_SESSION_TYPE_CHANGE_IN_PROGRESS, -EIO, + "STATUS_GRAPHICS_OPM_SESSION_TYPE_CHANGE_IN_PROGRESS"}, + {STATUS_GRAPHICS_I2C_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_I2C_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_I2C_DEVICE_DOES_NOT_EXIST, -EIO, + "STATUS_GRAPHICS_I2C_DEVICE_DOES_NOT_EXIST"}, + {STATUS_GRAPHICS_I2C_ERROR_TRANSMITTING_DATA, -EIO, + "STATUS_GRAPHICS_I2C_ERROR_TRANSMITTING_DATA"}, + {STATUS_GRAPHICS_I2C_ERROR_RECEIVING_DATA, -EIO, + "STATUS_GRAPHICS_I2C_ERROR_RECEIVING_DATA"}, + {STATUS_GRAPHICS_DDCCI_VCP_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_DDCCI_VCP_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_DDCCI_INVALID_DATA, -EIO, + "STATUS_GRAPHICS_DDCCI_INVALID_DATA"}, + {STATUS_GRAPHICS_DDCCI_MONITOR_RETURNED_INVALID_TIMING_STATUS_BYTE, + -EIO, + "STATUS_GRAPHICS_DDCCI_MONITOR_RETURNED_INVALID_TIMING_STATUS_BYTE"}, + {STATUS_GRAPHICS_DDCCI_INVALID_CAPABILITIES_STRING, -EIO, + "STATUS_GRAPHICS_DDCCI_INVALID_CAPABILITIES_STRING"}, + {STATUS_GRAPHICS_MCA_INTERNAL_ERROR, -EIO, + "STATUS_GRAPHICS_MCA_INTERNAL_ERROR"}, + {STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_COMMAND, -EIO, + "STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_COMMAND"}, + {STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_LENGTH, -EIO, + "STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_LENGTH"}, + {STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_CHECKSUM, -EIO, + "STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_CHECKSUM"}, + {STATUS_GRAPHICS_INVALID_PHYSICAL_MONITOR_HANDLE, -EIO, + "STATUS_GRAPHICS_INVALID_PHYSICAL_MONITOR_HANDLE"}, + {STATUS_GRAPHICS_MONITOR_NO_LONGER_EXISTS, -EIO, + "STATUS_GRAPHICS_MONITOR_NO_LONGER_EXISTS"}, + {STATUS_GRAPHICS_ONLY_CONSOLE_SESSION_SUPPORTED, -EIO, + "STATUS_GRAPHICS_ONLY_CONSOLE_SESSION_SUPPORTED"}, + {STATUS_GRAPHICS_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME, -EIO, + "STATUS_GRAPHICS_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME"}, + {STATUS_GRAPHICS_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP, -EIO, + "STATUS_GRAPHICS_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP"}, + {STATUS_GRAPHICS_MIRRORING_DEVICES_NOT_SUPPORTED, -EIO, + "STATUS_GRAPHICS_MIRRORING_DEVICES_NOT_SUPPORTED"}, + {STATUS_GRAPHICS_INVALID_POINTER, -EIO, + "STATUS_GRAPHICS_INVALID_POINTER"}, + {STATUS_GRAPHICS_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE, -EIO, + "STATUS_GRAPHICS_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE"}, + {STATUS_GRAPHICS_PARAMETER_ARRAY_TOO_SMALL, -EIO, + "STATUS_GRAPHICS_PARAMETER_ARRAY_TOO_SMALL"}, + {STATUS_GRAPHICS_INTERNAL_ERROR, -EIO, + "STATUS_GRAPHICS_INTERNAL_ERROR"}, + {STATUS_GRAPHICS_SESSION_TYPE_CHANGE_IN_PROGRESS, -EIO, + "STATUS_GRAPHICS_SESSION_TYPE_CHANGE_IN_PROGRESS"}, + {STATUS_FVE_LOCKED_VOLUME, -EIO, "STATUS_FVE_LOCKED_VOLUME"}, + {STATUS_FVE_NOT_ENCRYPTED, -EIO, "STATUS_FVE_NOT_ENCRYPTED"}, + {STATUS_FVE_BAD_INFORMATION, -EIO, "STATUS_FVE_BAD_INFORMATION"}, + {STATUS_FVE_TOO_SMALL, -EIO, "STATUS_FVE_TOO_SMALL"}, + {STATUS_FVE_FAILED_WRONG_FS, -EIO, "STATUS_FVE_FAILED_WRONG_FS"}, + {STATUS_FVE_FAILED_BAD_FS, -EIO, "STATUS_FVE_FAILED_BAD_FS"}, + {STATUS_FVE_FS_NOT_EXTENDED, -EIO, "STATUS_FVE_FS_NOT_EXTENDED"}, + {STATUS_FVE_FS_MOUNTED, -EIO, "STATUS_FVE_FS_MOUNTED"}, + {STATUS_FVE_NO_LICENSE, -EIO, "STATUS_FVE_NO_LICENSE"}, + {STATUS_FVE_ACTION_NOT_ALLOWED, -EIO, "STATUS_FVE_ACTION_NOT_ALLOWED"}, + {STATUS_FVE_BAD_DATA, -EIO, "STATUS_FVE_BAD_DATA"}, + {STATUS_FVE_VOLUME_NOT_BOUND, -EIO, "STATUS_FVE_VOLUME_NOT_BOUND"}, + {STATUS_FVE_NOT_DATA_VOLUME, -EIO, "STATUS_FVE_NOT_DATA_VOLUME"}, + {STATUS_FVE_CONV_READ_ERROR, -EIO, "STATUS_FVE_CONV_READ_ERROR"}, + {STATUS_FVE_CONV_WRITE_ERROR, -EIO, "STATUS_FVE_CONV_WRITE_ERROR"}, + {STATUS_FVE_OVERLAPPED_UPDATE, -EIO, "STATUS_FVE_OVERLAPPED_UPDATE"}, + {STATUS_FVE_FAILED_SECTOR_SIZE, -EIO, "STATUS_FVE_FAILED_SECTOR_SIZE"}, + {STATUS_FVE_FAILED_AUTHENTICATION, -EIO, + "STATUS_FVE_FAILED_AUTHENTICATION"}, + {STATUS_FVE_NOT_OS_VOLUME, -EIO, "STATUS_FVE_NOT_OS_VOLUME"}, + {STATUS_FVE_KEYFILE_NOT_FOUND, -EIO, "STATUS_FVE_KEYFILE_NOT_FOUND"}, + {STATUS_FVE_KEYFILE_INVALID, -EIO, "STATUS_FVE_KEYFILE_INVALID"}, + {STATUS_FVE_KEYFILE_NO_VMK, -EIO, "STATUS_FVE_KEYFILE_NO_VMK"}, + {STATUS_FVE_TPM_DISABLED, -EIO, "STATUS_FVE_TPM_DISABLED"}, + {STATUS_FVE_TPM_SRK_AUTH_NOT_ZERO, -EIO, + "STATUS_FVE_TPM_SRK_AUTH_NOT_ZERO"}, + {STATUS_FVE_TPM_INVALID_PCR, -EIO, "STATUS_FVE_TPM_INVALID_PCR"}, + {STATUS_FVE_TPM_NO_VMK, -EIO, "STATUS_FVE_TPM_NO_VMK"}, + {STATUS_FVE_PIN_INVALID, -EIO, "STATUS_FVE_PIN_INVALID"}, + {STATUS_FVE_AUTH_INVALID_APPLICATION, -EIO, + "STATUS_FVE_AUTH_INVALID_APPLICATION"}, + {STATUS_FVE_AUTH_INVALID_CONFIG, -EIO, + "STATUS_FVE_AUTH_INVALID_CONFIG"}, + {STATUS_FVE_DEBUGGER_ENABLED, -EIO, "STATUS_FVE_DEBUGGER_ENABLED"}, + {STATUS_FVE_DRY_RUN_FAILED, -EIO, "STATUS_FVE_DRY_RUN_FAILED"}, + {STATUS_FVE_BAD_METADATA_POINTER, -EIO, + "STATUS_FVE_BAD_METADATA_POINTER"}, + {STATUS_FVE_OLD_METADATA_COPY, -EIO, "STATUS_FVE_OLD_METADATA_COPY"}, + {STATUS_FVE_REBOOT_REQUIRED, -EIO, "STATUS_FVE_REBOOT_REQUIRED"}, + {STATUS_FVE_RAW_ACCESS, -EIO, "STATUS_FVE_RAW_ACCESS"}, + {STATUS_FVE_RAW_BLOCKED, -EIO, "STATUS_FVE_RAW_BLOCKED"}, + {STATUS_FWP_CALLOUT_NOT_FOUND, -EIO, "STATUS_FWP_CALLOUT_NOT_FOUND"}, + {STATUS_FWP_CONDITION_NOT_FOUND, -EIO, + "STATUS_FWP_CONDITION_NOT_FOUND"}, + {STATUS_FWP_FILTER_NOT_FOUND, -EIO, "STATUS_FWP_FILTER_NOT_FOUND"}, + {STATUS_FWP_LAYER_NOT_FOUND, -EIO, "STATUS_FWP_LAYER_NOT_FOUND"}, + {STATUS_FWP_PROVIDER_NOT_FOUND, -EIO, "STATUS_FWP_PROVIDER_NOT_FOUND"}, + {STATUS_FWP_PROVIDER_CONTEXT_NOT_FOUND, -EIO, + "STATUS_FWP_PROVIDER_CONTEXT_NOT_FOUND"}, + {STATUS_FWP_SUBLAYER_NOT_FOUND, -EIO, "STATUS_FWP_SUBLAYER_NOT_FOUND"}, + {STATUS_FWP_NOT_FOUND, -EIO, "STATUS_FWP_NOT_FOUND"}, + {STATUS_FWP_ALREADY_EXISTS, -EIO, "STATUS_FWP_ALREADY_EXISTS"}, + {STATUS_FWP_IN_USE, -EIO, "STATUS_FWP_IN_USE"}, + {STATUS_FWP_DYNAMIC_SESSION_IN_PROGRESS, -EIO, + "STATUS_FWP_DYNAMIC_SESSION_IN_PROGRESS"}, + {STATUS_FWP_WRONG_SESSION, -EIO, "STATUS_FWP_WRONG_SESSION"}, + {STATUS_FWP_NO_TXN_IN_PROGRESS, -EIO, "STATUS_FWP_NO_TXN_IN_PROGRESS"}, + {STATUS_FWP_TXN_IN_PROGRESS, -EIO, "STATUS_FWP_TXN_IN_PROGRESS"}, + {STATUS_FWP_TXN_ABORTED, -EIO, "STATUS_FWP_TXN_ABORTED"}, + {STATUS_FWP_SESSION_ABORTED, -EIO, "STATUS_FWP_SESSION_ABORTED"}, + {STATUS_FWP_INCOMPATIBLE_TXN, -EIO, "STATUS_FWP_INCOMPATIBLE_TXN"}, + {STATUS_FWP_TIMEOUT, -ETIMEDOUT, "STATUS_FWP_TIMEOUT"}, + {STATUS_FWP_NET_EVENTS_DISABLED, -EIO, + "STATUS_FWP_NET_EVENTS_DISABLED"}, + {STATUS_FWP_INCOMPATIBLE_LAYER, -EIO, "STATUS_FWP_INCOMPATIBLE_LAYER"}, + {STATUS_FWP_KM_CLIENTS_ONLY, -EIO, "STATUS_FWP_KM_CLIENTS_ONLY"}, + {STATUS_FWP_LIFETIME_MISMATCH, -EIO, "STATUS_FWP_LIFETIME_MISMATCH"}, + {STATUS_FWP_BUILTIN_OBJECT, -EIO, "STATUS_FWP_BUILTIN_OBJECT"}, + {STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS, -EIO, + "STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS"}, + {STATUS_FWP_TOO_MANY_CALLOUTS, -EIO, "STATUS_FWP_TOO_MANY_CALLOUTS"}, + {STATUS_FWP_NOTIFICATION_DROPPED, -EIO, + "STATUS_FWP_NOTIFICATION_DROPPED"}, + {STATUS_FWP_TRAFFIC_MISMATCH, -EIO, "STATUS_FWP_TRAFFIC_MISMATCH"}, + {STATUS_FWP_INCOMPATIBLE_SA_STATE, -EIO, + "STATUS_FWP_INCOMPATIBLE_SA_STATE"}, + {STATUS_FWP_NULL_POINTER, -EIO, "STATUS_FWP_NULL_POINTER"}, + {STATUS_FWP_INVALID_ENUMERATOR, -EIO, "STATUS_FWP_INVALID_ENUMERATOR"}, + {STATUS_FWP_INVALID_FLAGS, -EIO, "STATUS_FWP_INVALID_FLAGS"}, + {STATUS_FWP_INVALID_NET_MASK, -EIO, "STATUS_FWP_INVALID_NET_MASK"}, + {STATUS_FWP_INVALID_RANGE, -EIO, "STATUS_FWP_INVALID_RANGE"}, + {STATUS_FWP_INVALID_INTERVAL, -EIO, "STATUS_FWP_INVALID_INTERVAL"}, + {STATUS_FWP_ZERO_LENGTH_ARRAY, -EIO, "STATUS_FWP_ZERO_LENGTH_ARRAY"}, + {STATUS_FWP_NULL_DISPLAY_NAME, -EIO, "STATUS_FWP_NULL_DISPLAY_NAME"}, + {STATUS_FWP_INVALID_ACTION_TYPE, -EIO, + "STATUS_FWP_INVALID_ACTION_TYPE"}, + {STATUS_FWP_INVALID_WEIGHT, -EIO, "STATUS_FWP_INVALID_WEIGHT"}, + {STATUS_FWP_MATCH_TYPE_MISMATCH, -EIO, + "STATUS_FWP_MATCH_TYPE_MISMATCH"}, + {STATUS_FWP_TYPE_MISMATCH, -EIO, "STATUS_FWP_TYPE_MISMATCH"}, + {STATUS_FWP_OUT_OF_BOUNDS, -EIO, "STATUS_FWP_OUT_OF_BOUNDS"}, + {STATUS_FWP_RESERVED, -EIO, "STATUS_FWP_RESERVED"}, + {STATUS_FWP_DUPLICATE_CONDITION, -EIO, + "STATUS_FWP_DUPLICATE_CONDITION"}, + {STATUS_FWP_DUPLICATE_KEYMOD, -EIO, "STATUS_FWP_DUPLICATE_KEYMOD"}, + {STATUS_FWP_ACTION_INCOMPATIBLE_WITH_LAYER, -EIO, + "STATUS_FWP_ACTION_INCOMPATIBLE_WITH_LAYER"}, + {STATUS_FWP_ACTION_INCOMPATIBLE_WITH_SUBLAYER, -EIO, + "STATUS_FWP_ACTION_INCOMPATIBLE_WITH_SUBLAYER"}, + {STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_LAYER, -EIO, + "STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_LAYER"}, + {STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_CALLOUT, -EIO, + "STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_CALLOUT"}, + {STATUS_FWP_INCOMPATIBLE_AUTH_METHOD, -EIO, + "STATUS_FWP_INCOMPATIBLE_AUTH_METHOD"}, + {STATUS_FWP_INCOMPATIBLE_DH_GROUP, -EIO, + "STATUS_FWP_INCOMPATIBLE_DH_GROUP"}, + {STATUS_FWP_EM_NOT_SUPPORTED, -EOPNOTSUPP, + "STATUS_FWP_EM_NOT_SUPPORTED"}, + {STATUS_FWP_NEVER_MATCH, -EIO, "STATUS_FWP_NEVER_MATCH"}, + {STATUS_FWP_PROVIDER_CONTEXT_MISMATCH, -EIO, + "STATUS_FWP_PROVIDER_CONTEXT_MISMATCH"}, + {STATUS_FWP_INVALID_PARAMETER, -EIO, "STATUS_FWP_INVALID_PARAMETER"}, + {STATUS_FWP_TOO_MANY_SUBLAYERS, -EIO, "STATUS_FWP_TOO_MANY_SUBLAYERS"}, + {STATUS_FWP_CALLOUT_NOTIFICATION_FAILED, -EIO, + "STATUS_FWP_CALLOUT_NOTIFICATION_FAILED"}, + {STATUS_FWP_INCOMPATIBLE_AUTH_CONFIG, -EIO, + "STATUS_FWP_INCOMPATIBLE_AUTH_CONFIG"}, + {STATUS_FWP_INCOMPATIBLE_CIPHER_CONFIG, -EIO, + "STATUS_FWP_INCOMPATIBLE_CIPHER_CONFIG"}, + {STATUS_FWP_TCPIP_NOT_READY, -EIO, "STATUS_FWP_TCPIP_NOT_READY"}, + {STATUS_FWP_INJECT_HANDLE_CLOSING, -EIO, + "STATUS_FWP_INJECT_HANDLE_CLOSING"}, + {STATUS_FWP_INJECT_HANDLE_STALE, -EIO, + "STATUS_FWP_INJECT_HANDLE_STALE"}, + {STATUS_FWP_CANNOT_PEND, -EIO, "STATUS_FWP_CANNOT_PEND"}, + {STATUS_NDIS_CLOSING, -EIO, "STATUS_NDIS_CLOSING"}, + {STATUS_NDIS_BAD_VERSION, -EIO, "STATUS_NDIS_BAD_VERSION"}, + {STATUS_NDIS_BAD_CHARACTERISTICS, -EIO, + "STATUS_NDIS_BAD_CHARACTERISTICS"}, + {STATUS_NDIS_ADAPTER_NOT_FOUND, -EIO, "STATUS_NDIS_ADAPTER_NOT_FOUND"}, + {STATUS_NDIS_OPEN_FAILED, -EIO, "STATUS_NDIS_OPEN_FAILED"}, + {STATUS_NDIS_DEVICE_FAILED, -EIO, "STATUS_NDIS_DEVICE_FAILED"}, + {STATUS_NDIS_MULTICAST_FULL, -EIO, "STATUS_NDIS_MULTICAST_FULL"}, + {STATUS_NDIS_MULTICAST_EXISTS, -EIO, "STATUS_NDIS_MULTICAST_EXISTS"}, + {STATUS_NDIS_MULTICAST_NOT_FOUND, -EIO, + "STATUS_NDIS_MULTICAST_NOT_FOUND"}, + {STATUS_NDIS_REQUEST_ABORTED, -EIO, "STATUS_NDIS_REQUEST_ABORTED"}, + {STATUS_NDIS_RESET_IN_PROGRESS, -EIO, "STATUS_NDIS_RESET_IN_PROGRESS"}, + {STATUS_NDIS_INVALID_PACKET, -EIO, "STATUS_NDIS_INVALID_PACKET"}, + {STATUS_NDIS_INVALID_DEVICE_REQUEST, -EIO, + "STATUS_NDIS_INVALID_DEVICE_REQUEST"}, + {STATUS_NDIS_ADAPTER_NOT_READY, -EIO, "STATUS_NDIS_ADAPTER_NOT_READY"}, + {STATUS_NDIS_INVALID_LENGTH, -EIO, "STATUS_NDIS_INVALID_LENGTH"}, + {STATUS_NDIS_INVALID_DATA, -EIO, "STATUS_NDIS_INVALID_DATA"}, + {STATUS_NDIS_BUFFER_TOO_SHORT, -ENOBUFS, + "STATUS_NDIS_BUFFER_TOO_SHORT"}, + {STATUS_NDIS_INVALID_OID, -EIO, "STATUS_NDIS_INVALID_OID"}, + {STATUS_NDIS_ADAPTER_REMOVED, -EIO, "STATUS_NDIS_ADAPTER_REMOVED"}, + {STATUS_NDIS_UNSUPPORTED_MEDIA, -EIO, "STATUS_NDIS_UNSUPPORTED_MEDIA"}, + {STATUS_NDIS_GROUP_ADDRESS_IN_USE, -EIO, + "STATUS_NDIS_GROUP_ADDRESS_IN_USE"}, + {STATUS_NDIS_FILE_NOT_FOUND, -EIO, "STATUS_NDIS_FILE_NOT_FOUND"}, + {STATUS_NDIS_ERROR_READING_FILE, -EIO, + "STATUS_NDIS_ERROR_READING_FILE"}, + {STATUS_NDIS_ALREADY_MAPPED, -EIO, "STATUS_NDIS_ALREADY_MAPPED"}, + {STATUS_NDIS_RESOURCE_CONFLICT, -EIO, "STATUS_NDIS_RESOURCE_CONFLICT"}, + {STATUS_NDIS_MEDIA_DISCONNECTED, -EIO, + "STATUS_NDIS_MEDIA_DISCONNECTED"}, + {STATUS_NDIS_INVALID_ADDRESS, -EIO, "STATUS_NDIS_INVALID_ADDRESS"}, + {STATUS_NDIS_PAUSED, -EIO, "STATUS_NDIS_PAUSED"}, + {STATUS_NDIS_INTERFACE_NOT_FOUND, -EIO, + "STATUS_NDIS_INTERFACE_NOT_FOUND"}, + {STATUS_NDIS_UNSUPPORTED_REVISION, -EIO, + "STATUS_NDIS_UNSUPPORTED_REVISION"}, + {STATUS_NDIS_INVALID_PORT, -EIO, "STATUS_NDIS_INVALID_PORT"}, + {STATUS_NDIS_INVALID_PORT_STATE, -EIO, + "STATUS_NDIS_INVALID_PORT_STATE"}, + {STATUS_NDIS_LOW_POWER_STATE, -EIO, "STATUS_NDIS_LOW_POWER_STATE"}, + {STATUS_NDIS_NOT_SUPPORTED, -ENOSYS, "STATUS_NDIS_NOT_SUPPORTED"}, + {STATUS_NDIS_DOT11_AUTO_CONFIG_ENABLED, -EIO, + "STATUS_NDIS_DOT11_AUTO_CONFIG_ENABLED"}, + {STATUS_NDIS_DOT11_MEDIA_IN_USE, -EIO, + "STATUS_NDIS_DOT11_MEDIA_IN_USE"}, + {STATUS_NDIS_DOT11_POWER_STATE_INVALID, -EIO, + "STATUS_NDIS_DOT11_POWER_STATE_INVALID"}, + {STATUS_IPSEC_BAD_SPI, -EIO, "STATUS_IPSEC_BAD_SPI"}, + {STATUS_IPSEC_SA_LIFETIME_EXPIRED, -EIO, + "STATUS_IPSEC_SA_LIFETIME_EXPIRED"}, + {STATUS_IPSEC_WRONG_SA, -EIO, "STATUS_IPSEC_WRONG_SA"}, + {STATUS_IPSEC_REPLAY_CHECK_FAILED, -EIO, + "STATUS_IPSEC_REPLAY_CHECK_FAILED"}, + {STATUS_IPSEC_INVALID_PACKET, -EIO, "STATUS_IPSEC_INVALID_PACKET"}, + {STATUS_IPSEC_INTEGRITY_CHECK_FAILED, -EIO, + "STATUS_IPSEC_INTEGRITY_CHECK_FAILED"}, + {STATUS_IPSEC_CLEAR_TEXT_DROP, -EIO, "STATUS_IPSEC_CLEAR_TEXT_DROP"}, + {0, 0, NULL} +}; + +/***************************************************************************** + Print an error message from the status code + *****************************************************************************/ +static void +smb2_print_status(__le32 status) +{ + int idx = 0; + + while (smb2_error_map_table[idx].status_string != NULL) { + if ((smb2_error_map_table[idx].smb2_status) == status) { + pr_notice("Status code returned 0x%08x %s\n", status, + smb2_error_map_table[idx].status_string); + } + idx++; + } + return; +} + +int +map_smb2_to_linux_error(char *buf, bool log_err) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + unsigned int i; + int rc = -EIO; + __le32 smb2err = hdr->Status; + + if (smb2err == 0) + return 0; + + /* mask facility */ + if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED))) + smb2_print_status(smb2err); + else if (cifsFYI & CIFS_RC) + smb2_print_status(smb2err); + + for (i = 0; i < sizeof(smb2_error_map_table) / + sizeof(struct status_to_posix_error); i++) { + if (smb2_error_map_table[i].smb2_status == smb2err) { + rc = smb2_error_map_table[i].posix_error; + break; + } + } + + /* on error mapping not found - return EIO */ + + cFYI(1, "Mapping SMB2 status code %d to POSIX err %d", + smb2err, rc); + + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h new file mode 100644 index 000000000000..d35ac689f24b --- /dev/null +++ b/fs/cifs/smb2pdu.h @@ -0,0 +1,56 @@ +/* + * fs/cifs/smb2pdu.h + * + * Copyright (c) International Business Machines Corp., 2009, 2010 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _SMB2PDU_H +#define _SMB2PDU_H + +#include + +/* + * SMB2 Header Definition + * + * "MBZ" : Must be Zero + * "BB" : BugBug, Something to check/review/analyze later + * "PDU" : "Protocol Data Unit" (ie a network "frame") + * + */ +struct smb2_hdr { + __be32 smb2_buf_length; /* big endian on wire */ + /* length is only two or three bytes - with + one or two byte type preceding it that MBZ */ + __u8 ProtocolId[4]; /* 0xFE 'S' 'M' 'B' */ + __le16 StructureSize; /* 64 */ + __le16 CreditCharge; /* MBZ */ + __le32 Status; /* Error from server */ + __le16 Command; + __le16 CreditRequest; /* CreditResponse */ + __le32 Flags; + __le32 NextCommand; + __u64 MessageId; /* opaque - so can stay little endian */ + __le32 ProcessId; + __u32 TreeId; /* opaque - so do not make little endian */ + __u64 SessionId; /* opaque - so do not make little endian */ + __u8 Signature[16]; +} __packed; + +#endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h new file mode 100644 index 000000000000..08249eecdf69 --- /dev/null +++ b/fs/cifs/smb2proto.h @@ -0,0 +1,37 @@ +/* + * fs/cifs/smb2proto.h + * + * Copyright (c) International Business Machines Corp., 2002, 2011 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _SMB2PROTO_H +#define _SMB2PROTO_H +#include +#include + +struct statfs; + +/* + ***************************************************************** + * All Prototypes + ***************************************************************** + */ +extern int map_smb2_to_linux_error(char *buf, bool log_err); + +#endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 2dc7e1c03316940dec899fa3206a595de000e99b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 26 Dec 2011 22:53:34 +0400 Subject: CIFS: Make transport routines work with SMB2 Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/cifsglob.h | 5 ++ fs/cifs/cifsproto.h | 1 + fs/cifs/smb1ops.c | 1 + fs/cifs/smb2ops.c | 17 ++++++ fs/cifs/smb2pdu.h | 59 +++++++++++++++++++ fs/cifs/smb2proto.h | 5 ++ fs/cifs/smb2transport.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/transport.c | 13 ++--- 9 files changed, 246 insertions(+), 8 deletions(-) create mode 100644 fs/cifs/smb2transport.c (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 4a7727143721..a73d7f888846 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -16,4 +16,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o -cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o +cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6d18962c9903..3575f0f832b1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "cifs_fs_sb.h" #include "cifsacl.h" @@ -218,6 +219,7 @@ struct smb_version_values { size_t header_size; size_t max_header_size; size_t read_rsp_size; + __le16 lock_cmd; }; #define HEADER_SIZE(server) (server->vals->header_size) @@ -812,6 +814,7 @@ typedef void (mid_callback_t)(struct mid_q_entry *mid); /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ + struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u32 pid; /* process id */ __u32 sequence_number; /* for CIFS signing */ @@ -1153,6 +1156,8 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern mempool_t *cifs_mid_poolp; + /* Operations for different SMB versions */ #define SMB1_VERSION_STRING "1.0" extern struct smb_version_operations smb1_operations; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8797e4064662..88967d0885bf 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -68,6 +68,7 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); +extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, mid_receive_t *receive, mid_callback_t *callback, void *cbdata, diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 728595f096c9..8f873863142a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -445,4 +445,5 @@ struct smb_version_values smb1_values = { .header_size = sizeof(struct smb_hdr), .max_header_size = MAX_CIFS_HDR_SIZE, .read_rsp_size = sizeof(READ_RSP), + .lock_cmd = cpu_to_le16(SMB_COM_LOCKING_ANDX), }; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f065e89756a1..09530f416123 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -18,10 +18,27 @@ */ #include "cifsglob.h" +#include "smb2pdu.h" +#include "smb2proto.h" + +static __u64 +smb2_get_next_mid(struct TCP_Server_Info *server) +{ + __u64 mid; + /* for SMB2 we need the current value */ + spin_lock(&GlobalMid_Lock); + mid = server->CurrentMid++; + spin_unlock(&GlobalMid_Lock); + return mid; +} struct smb_version_operations smb21_operations = { + .setup_request = smb2_setup_request, + .check_receive = smb2_check_receive, + .get_next_mid = smb2_get_next_mid, }; struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, + .lock_cmd = SMB2_LOCK, }; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index d35ac689f24b..c7f52e363d37 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -26,6 +26,65 @@ #include +/* + * Note that, due to trying to use names similar to the protocol specifications, + * there are many mixed case field names in the structures below. Although + * this does not match typical Linux kernel style, it is necessary to be + * be able to match against the protocol specfication. + * + * SMB2 commands + * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses + * (ie no useful data other than the SMB error code itself) and are marked such. + * Knowing this helps avoid response buffer allocations and copy in some cases. + */ + +/* List of commands in host endian */ +#define SMB2_NEGOTIATE_HE 0x0000 +#define SMB2_SESSION_SETUP_HE 0x0001 +#define SMB2_LOGOFF_HE 0x0002 /* trivial request/resp */ +#define SMB2_TREE_CONNECT_HE 0x0003 +#define SMB2_TREE_DISCONNECT_HE 0x0004 /* trivial req/resp */ +#define SMB2_CREATE_HE 0x0005 +#define SMB2_CLOSE_HE 0x0006 +#define SMB2_FLUSH_HE 0x0007 /* trivial resp */ +#define SMB2_READ_HE 0x0008 +#define SMB2_WRITE_HE 0x0009 +#define SMB2_LOCK_HE 0x000A +#define SMB2_IOCTL_HE 0x000B +#define SMB2_CANCEL_HE 0x000C +#define SMB2_ECHO_HE 0x000D +#define SMB2_QUERY_DIRECTORY_HE 0x000E +#define SMB2_CHANGE_NOTIFY_HE 0x000F +#define SMB2_QUERY_INFO_HE 0x0010 +#define SMB2_SET_INFO_HE 0x0011 +#define SMB2_OPLOCK_BREAK_HE 0x0012 + +/* The same list in little endian */ +#define SMB2_NEGOTIATE cpu_to_le16(SMB2_NEGOTIATE_HE) +#define SMB2_SESSION_SETUP cpu_to_le16(SMB2_SESSION_SETUP_HE) +#define SMB2_LOGOFF cpu_to_le16(SMB2_LOGOFF_HE) +#define SMB2_TREE_CONNECT cpu_to_le16(SMB2_TREE_CONNECT_HE) +#define SMB2_TREE_DISCONNECT cpu_to_le16(SMB2_TREE_DISCONNECT_HE) +#define SMB2_CREATE cpu_to_le16(SMB2_CREATE_HE) +#define SMB2_CLOSE cpu_to_le16(SMB2_CLOSE_HE) +#define SMB2_FLUSH cpu_to_le16(SMB2_FLUSH_HE) +#define SMB2_READ cpu_to_le16(SMB2_READ_HE) +#define SMB2_WRITE cpu_to_le16(SMB2_WRITE_HE) +#define SMB2_LOCK cpu_to_le16(SMB2_LOCK_HE) +#define SMB2_IOCTL cpu_to_le16(SMB2_IOCTL_HE) +#define SMB2_CANCEL cpu_to_le16(SMB2_CANCEL_HE) +#define SMB2_ECHO cpu_to_le16(SMB2_ECHO_HE) +#define SMB2_QUERY_DIRECTORY cpu_to_le16(SMB2_QUERY_DIRECTORY_HE) +#define SMB2_CHANGE_NOTIFY cpu_to_le16(SMB2_CHANGE_NOTIFY_HE) +#define SMB2_QUERY_INFO cpu_to_le16(SMB2_QUERY_INFO_HE) +#define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE) +#define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE) + +#define NUMBER_OF_SMB2_COMMANDS 0x0013 + +/* BB FIXME - analyze following length BB */ +#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ + /* * SMB2 Header Definition * diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 08249eecdf69..0e59afb5edf9 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -34,4 +34,9 @@ struct statfs; */ extern int map_smb2_to_linux_error(char *buf, bool log_err); +extern int smb2_check_receive(struct mid_q_entry *mid, + struct TCP_Server_Info *server, bool log_error); +extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, + unsigned int nvec, struct mid_q_entry **ret_mid); + #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c new file mode 100644 index 000000000000..b4b6b9a6c0fb --- /dev/null +++ b/fs/cifs/smb2transport.c @@ -0,0 +1,151 @@ +/* + * fs/cifs/smb2transport.c + * + * Copyright (C) International Business Machines Corp., 2002, 2011 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) 2006 + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "smb2pdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "smb2proto.h" +#include "cifs_debug.h" +#include "smb2status.h" + +/* + * Set message id for the request. Should be called after wait_for_free_request + * and when srv_mutex is held. + */ +static inline void +smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) +{ + hdr->MessageId = get_next_mid(server); +} + +static struct mid_q_entry * +smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer, + struct TCP_Server_Info *server) +{ + struct mid_q_entry *temp; + + if (server == NULL) { + cERROR(1, "Null TCP session in smb2_mid_entry_alloc"); + return NULL; + } + + temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); + if (temp == NULL) + return temp; + else { + memset(temp, 0, sizeof(struct mid_q_entry)); + temp->mid = smb_buffer->MessageId; /* always LE */ + temp->pid = current->pid; + temp->command = smb_buffer->Command; /* Always LE */ + temp->when_alloc = jiffies; + temp->server = server; + + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + temp->callback = cifs_wake_up_task; + temp->callback_data = current; + } + + atomic_inc(&midCount); + temp->mid_state = MID_REQUEST_ALLOCATED; + return temp; +} + +static int +smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf, + struct mid_q_entry **mid) +{ + if (ses->server->tcpStatus == CifsExiting) + return -ENOENT; + + if (ses->server->tcpStatus == CifsNeedReconnect) { + cFYI(1, "tcp session dead - return to caller to retry"); + return -EAGAIN; + } + + if (ses->status != CifsGood) { + /* check if SMB2 session is bad because we are setting it up */ + if ((buf->Command != SMB2_SESSION_SETUP) && + (buf->Command != SMB2_NEGOTIATE)) + return -EAGAIN; + /* else ok - we are setting up session */ + } + *mid = smb2_mid_entry_alloc(buf, ses->server); + if (*mid == NULL) + return -ENOMEM; + spin_lock(&GlobalMid_Lock); + list_add_tail(&(*mid)->qhead, &ses->server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + return 0; +} + +int +smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, + bool log_error) +{ + unsigned int len = get_rfc1002_length(mid->resp_buf); + + dump_smb(mid->resp_buf, min_t(u32, 80, len)); + /* convert the length into a more usable form */ + /* BB - uncomment with SMB2 signing implementation */ + /* if ((len > 24) && + (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { + if (smb2_verify_signature(mid->resp_buf, server)) + cERROR(1, "Unexpected SMB signature"); + } */ + + return map_smb2_to_linux_error(mid->resp_buf, log_error); +} + +int +smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, + unsigned int nvec, struct mid_q_entry **ret_mid) +{ + int rc; + struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + struct mid_q_entry *mid; + + smb2_seq_num_into_buf(ses->server, hdr); + + rc = smb2_get_mid_entry(ses, hdr, &mid); + if (rc) + return rc; + /* rc = smb2_sign_smb2(iov, nvec, ses->server); + if (rc) + delete_mid(mid); */ + *ret_mid = mid; + return rc; +} + +/* BB add missing functions here */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 904702db2526..bcc02b476f6e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -35,10 +35,8 @@ #include "cifsproto.h" #include "cifs_debug.h" -extern mempool_t *cifs_mid_poolp; - -static void -wake_up_task(struct mid_q_entry *mid) +void +cifs_wake_up_task(struct mid_q_entry *mid) { wake_up_process(mid->callback_data); } @@ -65,12 +63,13 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; + temp->server = server; /* * The default is for the mid to be synchronous, so the * default callback just wakes up the current task. */ - temp->callback = wake_up_task; + temp->callback = cifs_wake_up_task; temp->callback_data = current; } @@ -83,6 +82,7 @@ void DeleteMidQEntry(struct mid_q_entry *midEntry) { #ifdef CONFIG_CIFS_STATS2 + __le16 command = midEntry->server->vals->lock_cmd; unsigned long now; #endif midEntry->mid_state = MID_FREE; @@ -96,8 +96,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) /* commands taking longer than one second are indications that something is wrong, unless it is quite a slow link or server */ if ((now - midEntry->when_alloc) > HZ) { - if ((cifsFYI & CIFS_TIMER) && - (midEntry->command != cpu_to_le16(SMB_COM_LOCKING_ANDX))) { + if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n", -- cgit v1.2.3 From 28ea5290d78a7fc87a4b4f7cedcaa662f5b8d977 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 May 2012 16:18:00 +0400 Subject: CIFS: Add SMB2 credits support For SMB2 protocol we can add more than one credit for one received request: it depends on CreditRequest field in SMB2 response header. Also we divide all requests by type: echoes, oplocks and others. Each type uses its own slot pull. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 5 ++++ fs/cifs/cifsproto.h | 1 + fs/cifs/connect.c | 2 +- fs/cifs/smb2ops.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 3575f0f832b1..480b6385a9b6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -343,6 +343,11 @@ struct TCP_Server_Info { char server_GUID[16]; __u16 sec_mode; bool session_estab; /* mark when very first sess is established */ +#ifdef CONFIG_CIFS_SMB2 + int echo_credits; /* echo reserved slots */ + int oplock_credits; /* oplock break reserved slots */ + bool echoes:1; /* enable echoes */ +#endif u16 dialect; /* dialect index that server chose */ enum securityEnum secType; bool oplocks:1; /* enable oplocks */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 88967d0885bf..3b4d41f9ceeb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -91,6 +91,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf , struct smb_hdr *out_buf, int *bytes_returned); +extern int cifs_reconnect(struct TCP_Server_Info *server); extern int checkSMB(char *buf, unsigned int length); extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *); extern bool backup_cred(struct cifs_sb_info *); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index cfb7e7797642..a6197224b102 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -297,7 +297,7 @@ static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, * reconnect tcp session * wake up waiters on reconnection? - (not needed currently) */ -static int +int cifs_reconnect(struct TCP_Server_Info *server) { int rc = 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 09530f416123..67a05984cd41 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -20,6 +20,81 @@ #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" +#include "cifsproto.h" +#include "cifs_debug.h" + +static int +change_conf(struct TCP_Server_Info *server) +{ + server->credits += server->echo_credits + server->oplock_credits; + server->oplock_credits = server->echo_credits = 0; + switch (server->credits) { + case 0: + return -1; + case 1: + server->echoes = false; + server->oplocks = false; + cERROR(1, "disabling echoes and oplocks"); + break; + case 2: + server->echoes = true; + server->oplocks = false; + server->echo_credits = 1; + cFYI(1, "disabling oplocks"); + break; + default: + server->echoes = true; + server->oplocks = true; + server->echo_credits = 1; + server->oplock_credits = 1; + } + server->credits -= server->echo_credits + server->oplock_credits; + return 0; +} + +static void +smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, + const int optype) +{ + int *val, rc = 0; + spin_lock(&server->req_lock); + val = server->ops->get_credits_field(server, optype); + *val += add; + server->in_flight--; + if (server->in_flight == 0) + rc = change_conf(server); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + if (rc) + cifs_reconnect(server); +} + +static void +smb2_set_credits(struct TCP_Server_Info *server, const int val) +{ + spin_lock(&server->req_lock); + server->credits = val; + spin_unlock(&server->req_lock); +} + +static int * +smb2_get_credits_field(struct TCP_Server_Info *server, const int optype) +{ + switch (optype) { + case CIFS_ECHO_OP: + return &server->echo_credits; + case CIFS_OBREAK_OP: + return &server->oplock_credits; + default: + return &server->credits; + } +} + +static unsigned int +smb2_get_credits(struct mid_q_entry *mid) +{ + return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest); +} static __u64 smb2_get_next_mid(struct TCP_Server_Info *server) @@ -35,6 +110,10 @@ smb2_get_next_mid(struct TCP_Server_Info *server) struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, + .add_credits = smb2_add_credits, + .set_credits = smb2_set_credits, + .get_credits_field = smb2_get_credits_field, + .get_credits = smb2_get_credits, .get_next_mid = smb2_get_next_mid, }; -- cgit v1.2.3 From 4b1241006c337f57745b0fc2f17b24f8009ca82d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 9 Jul 2012 16:09:23 +0400 Subject: CIFS: Fix a wrong pointer in atomic_open Commit 30d904947459cca2beb69e0110716f5248b31f2a caused a regression in cifs open codepath. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/dir.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d364654491e3..2caba0b54acb 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -387,7 +387,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct cifs_tcon *tcon; __u16 fileHandle; __u32 oplock; - struct file *filp; struct cifsFileInfo *pfile_info; /* Posix open is only called (at lookup time) for file create now. For @@ -418,7 +417,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, inode, direntry->d_name.name, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); - filp = ERR_CAST(tlink); if (IS_ERR(tlink)) goto out_free_xid; @@ -436,10 +434,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out; } - pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); + pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock); if (pfile_info == NULL) { CIFSSMBClose(xid, tcon, fileHandle); - fput(filp); rc = -ENOMEM; } -- cgit v1.2.3 From 093b2bdad3221e3fae3c26d89387e7297a157664 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 8 Jun 2011 15:51:07 +0400 Subject: CIFS: Make demultiplex_thread work with SMB2 code Now we can process SMB2 messages: check message, get message id and wakeup awaiting routines. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/cifs_debug.c | 2 +- fs/cifs/smb2misc.c | 307 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 37 +++++++ fs/cifs/smb2pdu.h | 36 ++++++ fs/cifs/smb2proto.h | 2 + 6 files changed, 384 insertions(+), 2 deletions(-) create mode 100644 fs/cifs/smb2misc.c (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index a73d7f888846..b77e9ec02bd1 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -16,4 +16,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o -cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o +cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o smb2misc.o diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index e8140528ca5c..8aa8693bb65c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -65,7 +65,7 @@ void cifs_dump_detail(void *buf) cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", smb->Command, smb->Status.CifsError, smb->Flags, smb->Flags2, smb->Mid, smb->Pid); - cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb)); + cERROR(1, "smb buf %p len %u", smb, smbCalcSize(smb)); #endif /* CONFIG_CIFS_DEBUG2 */ } diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c new file mode 100644 index 000000000000..d4226782ec88 --- /dev/null +++ b/fs/cifs/smb2misc.c @@ -0,0 +1,307 @@ +/* + * fs/cifs/smb2misc.c + * + * Copyright (C) International Business Machines Corp., 2002,2011 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include "smb2pdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "smb2proto.h" +#include "cifs_debug.h" +#include "cifs_unicode.h" +#include "smb2status.h" + +static int +check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) +{ + /* + * Make sure that this really is an SMB, that it is a response, + * and that the message ids match. + */ + if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) && + (mid == hdr->MessageId)) { + if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR) + return 0; + else { + /* only one valid case where server sends us request */ + if (hdr->Command == SMB2_OPLOCK_BREAK) + return 0; + else + cERROR(1, "Received Request not response"); + } + } else { /* bad signature or mid */ + if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER) + cERROR(1, "Bad protocol string signature header %x", + *(unsigned int *) hdr->ProtocolId); + if (mid != hdr->MessageId) + cERROR(1, "Mids do not match"); + } + cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); + return 1; +} + +/* + * The following table defines the expected "StructureSize" of SMB2 responses + * in order by SMB2 command. This is similar to "wct" in SMB/CIFS responses. + * + * Note that commands are defined in smb2pdu.h in le16 but the array below is + * indexed by command in host byte order + */ +static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { + /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65), + /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9), + /* SMB2_LOGOFF */ __constant_cpu_to_le16(4), + /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16), + /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4), + /* SMB2_CREATE */ __constant_cpu_to_le16(89), + /* SMB2_CLOSE */ __constant_cpu_to_le16(60), + /* SMB2_FLUSH */ __constant_cpu_to_le16(4), + /* SMB2_READ */ __constant_cpu_to_le16(17), + /* SMB2_WRITE */ __constant_cpu_to_le16(17), + /* SMB2_LOCK */ __constant_cpu_to_le16(4), + /* SMB2_IOCTL */ __constant_cpu_to_le16(49), + /* BB CHECK this ... not listed in documentation */ + /* SMB2_CANCEL */ __constant_cpu_to_le16(0), + /* SMB2_ECHO */ __constant_cpu_to_le16(4), + /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9), + /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9), + /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9), + /* SMB2_SET_INFO */ __constant_cpu_to_le16(2), + /* BB FIXME can also be 44 for lease break */ + /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24) +}; + +int +smb2_check_message(char *buf, unsigned int length) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; + __u64 mid = hdr->MessageId; + __u32 len = get_rfc1002_length(buf); + __u32 clc_len; /* calculated length */ + int command; + + /* BB disable following printk later */ + cFYI(1, "%s length: 0x%x, smb_buf_length: 0x%x", __func__, length, len); + + /* + * Add function to do table lookup of StructureSize by command + * ie Validate the wct via smb2_struct_sizes table above + */ + + if (length < 2 + sizeof(struct smb2_hdr)) { + if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { + pdu->StructureSize2 = 0; + /* + * As with SMB/CIFS, on some error cases servers may + * not return wct properly + */ + return 0; + } else { + cERROR(1, "Length less than SMB header size"); + } + return 1; + } + if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { + cERROR(1, "SMB length greater than maximum, mid=%lld", mid); + return 1; + } + + if (check_smb2_hdr(hdr, mid)) + return 1; + + if (hdr->StructureSize != SMB2_HEADER_SIZE) { + cERROR(1, "Illegal structure size %d", + le16_to_cpu(hdr->StructureSize)); + return 1; + } + + command = le16_to_cpu(hdr->Command); + if (command >= NUMBER_OF_SMB2_COMMANDS) { + cERROR(1, "Illegal SMB2 command %d", command); + return 1; + } + + if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { + if (hdr->Status == 0 || + pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) { + /* error packets have 9 byte structure size */ + cERROR(1, "Illegal response size %u for command %d", + le16_to_cpu(pdu->StructureSize2), command); + return 1; + } + } + + if (4 + len != length) { + cERROR(1, "Total length %u RFC1002 length %u mismatch mid %llu", + length, 4 + len, mid); + return 1; + } + + clc_len = smb2_calc_size(hdr); + + if (4 + len != clc_len) { + cFYI(1, "Calculated size %u length %u mismatch mid %llu", + clc_len, 4 + len, mid); + if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ + return 0; /* BB workaround Samba 3 bug SessSetup rsp */ + return 1; + } + return 0; +} + +/* + * The size of the variable area depends on the offset and length fields + * located in different fields for various SMB2 responses. SMB2 responses + * with no variable length info, show an offset of zero for the offset field. + */ +static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { + /* SMB2_NEGOTIATE */ true, + /* SMB2_SESSION_SETUP */ true, + /* SMB2_LOGOFF */ false, + /* SMB2_TREE_CONNECT */ false, + /* SMB2_TREE_DISCONNECT */ false, + /* SMB2_CREATE */ true, + /* SMB2_CLOSE */ false, + /* SMB2_FLUSH */ false, + /* SMB2_READ */ true, + /* SMB2_WRITE */ false, + /* SMB2_LOCK */ false, + /* SMB2_IOCTL */ true, + /* SMB2_CANCEL */ false, /* BB CHECK this not listed in documentation */ + /* SMB2_ECHO */ false, + /* SMB2_QUERY_DIRECTORY */ true, + /* SMB2_CHANGE_NOTIFY */ true, + /* SMB2_QUERY_INFO */ true, + /* SMB2_SET_INFO */ false, + /* SMB2_OPLOCK_BREAK */ false +}; + +/* + * Returns the pointer to the beginning of the data area. Length of the data + * area and the offset to it (from the beginning of the smb are also returned. + */ +static char * +smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) +{ + *off = 0; + *len = 0; + + /* error responses do not have data area */ + if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED && + (((struct smb2_err_rsp *)hdr)->StructureSize) == + SMB2_ERROR_STRUCTURE_SIZE2) + return NULL; + + /* + * Following commands have data areas so we have to get the location + * of the data buffer offset and data buffer length for the particular + * command. + */ + switch (hdr->Command) { + case SMB2_NEGOTIATE: + case SMB2_SESSION_SETUP: + case SMB2_CREATE: + case SMB2_READ: + case SMB2_QUERY_INFO: + case SMB2_QUERY_DIRECTORY: + case SMB2_IOCTL: + case SMB2_CHANGE_NOTIFY: + default: + /* BB FIXME for unimplemented cases above */ + cERROR(1, "no length check for command"); + break; + } + + /* + * Invalid length or offset probably means data area is invalid, but + * we have little choice but to ignore the data area in this case. + */ + if (*off > 4096) { + cERROR(1, "offset %d too large, data area ignored", *off); + *len = 0; + *off = 0; + } else if (*off < 0) { + cERROR(1, "negative offset %d to data invalid ignore data area", + *off); + *off = 0; + *len = 0; + } else if (*len < 0) { + cERROR(1, "negative data length %d invalid, data area ignored", + *len); + *len = 0; + } else if (*len > 128 * 1024) { + cERROR(1, "data area larger than 128K: %d", *len); + *len = 0; + } + + /* return pointer to beginning of data area, ie offset from SMB start */ + if ((*off != 0) && (*len != 0)) + return hdr->ProtocolId + *off; + else + return NULL; +} + +/* + * Calculate the size of the SMB message based on the fixed header + * portion, the number of word parameters and the data portion of the message. + */ +unsigned int +smb2_calc_size(struct smb2_hdr *hdr) +{ + struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; + int offset; /* the offset from the beginning of SMB to data area */ + int data_length; /* the length of the variable length data area */ + /* Structure Size has already been checked to make sure it is 64 */ + int len = 4 + le16_to_cpu(pdu->hdr.StructureSize); + + /* + * StructureSize2, ie length of fixed parameter area has already + * been checked to make sure it is the correct length. + */ + len += le16_to_cpu(pdu->StructureSize2); + + if (has_smb2_data_area[le16_to_cpu(hdr->Command)] == false) + goto calc_size_exit; + + smb2_get_data_area_len(&offset, &data_length, hdr); + cFYI(1, "SMB2 data length %d offset %d", data_length, offset); + + if (data_length > 0) { + /* + * Check to make sure that data area begins after fixed area, + * Note that last byte of the fixed area is part of data area + * for some commands, typically those with odd StructureSize, + * so we must add one to the calculation (and 4 to account for + * the size of the RFC1001 hdr. + */ + if (offset + 4 + 1 < len) { + cERROR(1, "data area offset %d overlaps SMB2 header %d", + offset + 4 + 1, len); + data_length = 0; + } else { + len = 4 + offset + data_length; + } + } +calc_size_exit: + cFYI(1, "SMB2 len %d", len); + return len; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 67a05984cd41..c6f81541a635 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -107,6 +107,38 @@ smb2_get_next_mid(struct TCP_Server_Info *server) return mid; } +static struct mid_q_entry * +smb2_find_mid(struct TCP_Server_Info *server, char *buf) +{ + struct mid_q_entry *mid; + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + + spin_lock(&GlobalMid_Lock); + list_for_each_entry(mid, &server->pending_mid_q, qhead) { + if ((mid->mid == hdr->MessageId) && + (mid->mid_state == MID_REQUEST_SUBMITTED) && + (mid->command == hdr->Command)) { + spin_unlock(&GlobalMid_Lock); + return mid; + } + } + spin_unlock(&GlobalMid_Lock); + return NULL; +} + +static void +smb2_dump_detail(void *buf) +{ +#ifdef CONFIG_CIFS_DEBUG2 + struct smb2_hdr *smb = (struct smb2_hdr *)buf; + + cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d", + smb->Command, smb->Status, smb->Flags, smb->MessageId, + smb->ProcessId); + cERROR(1, "smb buf %p len %u", smb, smb2_calc_size(smb)); +#endif +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, @@ -115,9 +147,14 @@ struct smb_version_operations smb21_operations = { .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, .get_next_mid = smb2_get_next_mid, + .find_mid = smb2_find_mid, + .check_message = smb2_check_message, + .dump_detail = smb2_dump_detail, }; struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, .lock_cmd = SMB2_LOCK, }; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7f52e363d37..b08a277df896 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -85,6 +85,12 @@ /* BB FIXME - analyze following length BB */ #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ +#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) + +#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64) + +#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) + /* * SMB2 Header Definition * @@ -112,4 +118,34 @@ struct smb2_hdr { __u8 Signature[16]; } __packed; +struct smb2_pdu { + struct smb2_hdr hdr; + __le16 StructureSize2; /* size of wct area (varies, request specific) */ +} __packed; + +/* + * SMB2 flag definitions + */ +#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001) +#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002) +#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004) +#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008) +#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000) + +/* + * Definitions for SMB2 Protocol Data Units (network frames) + * + * See MS-SMB2.PDF specification for protocol details. + * The Naming convention is the lower case version of the SMB2 + * command code name for the struct. Note that structures must be packed. + * + */ +struct smb2_err_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; + __le16 Reserved; /* MBZ */ + __le32 ByteCount; /* even if zero, at least one byte follows */ + __u8 ErrorData[1]; /* variable length */ +} __packed; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0e59afb5edf9..19bf987c2648 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -33,6 +33,8 @@ struct statfs; ***************************************************************** */ extern int map_smb2_to_linux_error(char *buf, bool log_err); +extern int smb2_check_message(char *buf, unsigned int length); +extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); -- cgit v1.2.3 From 3792c1732878822ebf5a1c7e83e23453b9bbb698 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 12 Jan 2012 22:40:50 +0400 Subject: CIFS: Respect SMB2 header/max header size Use SMB2 header size values for allocation and memset because they are bigger and suitable for both CIFS and SMB2. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 14 ++++++++++++-- fs/cifs/misc.c | 25 +++++++++++++++++++------ 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2e9929dc2072..7a7cda9f7912 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -48,6 +48,9 @@ #include #include "cifs_spnego.h" #include "fscache.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2pdu.h" +#endif #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ int cifsFYI = 0; @@ -980,6 +983,14 @@ cifs_destroy_inodecache(void) static int cifs_init_request_bufs(void) { + size_t max_hdr_size = MAX_CIFS_HDR_SIZE; +#ifdef CONFIG_CIFS_SMB2 + /* + * SMB2 maximum header size is bigger than CIFS one - no problems to + * allocate some more bytes for CIFS. + */ + max_hdr_size = MAX_SMB2_HDR_SIZE; +#endif if (CIFSMaxBufSize < 8192) { /* Buffer size can not be smaller than 2 * PATH_MAX since maximum Unicode path name has to fit in any SMB/CIFS path based frames */ @@ -991,8 +1002,7 @@ cifs_init_request_bufs(void) } /* cERROR(1, "CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize); */ cifs_req_cachep = kmem_cache_create("cifs_request", - CIFSMaxBufSize + - MAX_CIFS_HDR_SIZE, 0, + CIFSMaxBufSize + max_hdr_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (cifs_req_cachep == NULL) return -ENOMEM; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 64601146f157..ad2538a64c70 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -29,6 +29,9 @@ #include "smberr.h" #include "nterr.h" #include "cifs_unicode.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2pdu.h" +#endif extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; @@ -143,17 +146,27 @@ struct smb_hdr * cifs_buf_get(void) { struct smb_hdr *ret_buf = NULL; - -/* We could use negotiated size instead of max_msgsize - - but it may be more efficient to always alloc same size - albeit slightly larger than necessary and maxbuffersize - defaults to this and can not be bigger */ + size_t buf_size = sizeof(struct smb_hdr); + +#ifdef CONFIG_CIFS_SMB2 + /* + * SMB2 header is bigger than CIFS one - no problems to clean some + * more bytes for CIFS. + */ + buf_size = sizeof(struct smb2_hdr); +#endif + /* + * We could use negotiated size instead of max_msgsize - + * but it may be more efficient to always alloc same size + * albeit slightly larger than necessary and maxbuffersize + * defaults to this and can not be bigger. + */ ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS); /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ if (ret_buf) { - memset(ret_buf, 0, sizeof(struct smb_hdr) + 3); + memset(ret_buf, 0, buf_size + 3); atomic_inc(&bufAllocCount); #ifdef CONFIG_CIFS_STATS2 atomic_inc(&totBufAllocCount); -- cgit v1.2.3 From ec2e4523fdba88317e06d0c7a88af3a0860447fc Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 27 Dec 2011 16:12:43 +0400 Subject: CIFS: Add capability to send SMB2 negotiate message and add negotiate request type to let set_credits know that we are only on negotiate stage and no need to make a decision about disabling echos and oplocks. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 3 +- fs/cifs/cifsglob.h | 13 ++- fs/cifs/cifssmb.c | 7 -- fs/cifs/smb2misc.c | 7 +- fs/cifs/smb2ops.c | 22 +++- fs/cifs/smb2pdu.c | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 39 +++++++ fs/cifs/smb2proto.h | 7 ++ 8 files changed, 417 insertions(+), 11 deletions(-) create mode 100644 fs/cifs/smb2pdu.c (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index b77e9ec02bd1..daf6837d9e0e 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -16,4 +16,5 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o -cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o smb2misc.o +cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ + smb2misc.o smb2pdu.o diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 480b6385a9b6..2d48f880b130 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -313,6 +313,12 @@ get_rfc1002_length(void *buf) return be32_to_cpu(*((__be32 *)buf)); } +static inline void +inc_rfc1001_len(void *buf, int count) +{ + be32_add_cpu((__be32 *)buf, count); +} + struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; @@ -393,6 +399,10 @@ struct TCP_Server_Info { atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #endif +#ifdef CONFIG_CIFS_SMB2 + unsigned int max_read; + unsigned int max_write; +#endif /* CONFIG_CIFS_SMB2 */ }; static inline unsigned int @@ -986,7 +996,8 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, /* Type of request operation */ #define CIFS_ECHO_OP 0x080 /* echo request */ #define CIFS_OBREAK_OP 0x0100 /* oplock break request */ -#define CIFS_OP_MASK 0x0180 /* mask request type */ +#define CIFS_NEG_OP 0x0200 /* negotiate request */ +#define CIFS_OP_MASK 0x0380 /* mask request type */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7a3b4a3b113b..dcb0ad87e173 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -388,13 +388,6 @@ vt2_err: return -EINVAL; } -static inline void inc_rfc1001_len(void *pSMB, int count) -{ - struct smb_hdr *hdr = (struct smb_hdr *)pSMB; - - be32_add_cpu(&hdr->smb_buf_length, count); -} - int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) { diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index d4226782ec88..e4dede4ae058 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -199,7 +199,7 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { * Returns the pointer to the beginning of the data area. Length of the data * area and the offset to it (from the beginning of the smb are also returned. */ -static char * +char * smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) { *off = 0; @@ -218,6 +218,11 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) */ switch (hdr->Command) { case SMB2_NEGOTIATE: + *off = le16_to_cpu( + ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferOffset); + *len = le16_to_cpu( + ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferLength); + break; case SMB2_SESSION_SETUP: case SMB2_CREATE: case SMB2_READ: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c6f81541a635..2b5232b4f7e7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -61,7 +61,7 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, val = server->ops->get_credits_field(server, optype); *val += add; server->in_flight--; - if (server->in_flight == 0) + if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) rc = change_conf(server); spin_unlock(&server->req_lock); wake_up(&server->request_q); @@ -139,6 +139,24 @@ smb2_dump_detail(void *buf) #endif } +static bool +smb2_need_neg(struct TCP_Server_Info *server) +{ + return server->max_read == 0; +} + +static int +smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) +{ + int rc; + ses->server->CurrentMid = 0; + rc = SMB2_negotiate(xid, ses); + /* BB we probably don't need to retry with modern servers */ + if (rc == -EAGAIN) + rc = -EHOSTDOWN; + return rc; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, @@ -150,6 +168,8 @@ struct smb_version_operations smb21_operations = { .find_mid = smb2_find_mid, .check_message = smb2_check_message, .dump_detail = smb2_dump_detail, + .need_neg = smb2_need_neg, + .negotiate = smb2_negotiate, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c new file mode 100644 index 000000000000..719e4c4f0307 --- /dev/null +++ b/fs/cifs/smb2pdu.c @@ -0,0 +1,330 @@ +/* + * fs/cifs/smb2pdu.c + * + * Copyright (C) International Business Machines Corp., 2009, 2011 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * Contains the routines for constructing the SMB2 PDUs themselves + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + /* SMB2 PDU handling routines here - except for leftovers (eg session setup) */ + /* Note that there are handle based routines which must be */ + /* treated slightly differently for reconnection purposes since we never */ + /* want to reuse a stale file handle and only the caller knows the file info */ + +#include +#include +#include +#include +#include +#include "smb2pdu.h" +#include "cifsglob.h" +#include "cifsacl.h" +#include "cifsproto.h" +#include "smb2proto.h" +#include "cifs_unicode.h" +#include "cifs_debug.h" +#include "ntlmssp.h" +#include "smb2status.h" + +/* + * The following table defines the expected "StructureSize" of SMB2 requests + * in order by SMB2 command. This is similar to "wct" in SMB/CIFS requests. + * + * Note that commands are defined in smb2pdu.h in le16 but the array below is + * indexed by command in host byte order. + */ +static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { + /* SMB2_NEGOTIATE */ 36, + /* SMB2_SESSION_SETUP */ 25, + /* SMB2_LOGOFF */ 4, + /* SMB2_TREE_CONNECT */ 9, + /* SMB2_TREE_DISCONNECT */ 4, + /* SMB2_CREATE */ 57, + /* SMB2_CLOSE */ 24, + /* SMB2_FLUSH */ 24, + /* SMB2_READ */ 49, + /* SMB2_WRITE */ 49, + /* SMB2_LOCK */ 48, + /* SMB2_IOCTL */ 57, + /* SMB2_CANCEL */ 4, + /* SMB2_ECHO */ 4, + /* SMB2_QUERY_DIRECTORY */ 33, + /* SMB2_CHANGE_NOTIFY */ 32, + /* SMB2_QUERY_INFO */ 41, + /* SMB2_SET_INFO */ 33, + /* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */ +}; + + +static void +smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , + const struct cifs_tcon *tcon) +{ + struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; + char *temp = (char *)hdr; + /* lookup word count ie StructureSize from table */ + __u16 parmsize = smb2_req_struct_sizes[le16_to_cpu(smb2_cmd)]; + + /* + * smaller than SMALL_BUFFER_SIZE but bigger than fixed area of + * largest operations (Create) + */ + memset(temp, 0, 256); + + /* Note this is only network field converted to big endian */ + hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr) + - 4 /* RFC 1001 length field itself not counted */); + + hdr->ProtocolId[0] = 0xFE; + hdr->ProtocolId[1] = 'S'; + hdr->ProtocolId[2] = 'M'; + hdr->ProtocolId[3] = 'B'; + hdr->StructureSize = cpu_to_le16(64); + hdr->Command = smb2_cmd; + hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */ + hdr->ProcessId = cpu_to_le32((__u16)current->tgid); + + if (!tcon) + goto out; + + hdr->TreeId = tcon->tid; + /* Uid is not converted */ + if (tcon->ses) + hdr->SessionId = tcon->ses->Suid; + /* BB check following DFS flags BB */ + /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */ + /* if (tcon->share_flags & SHI1005_FLAGS_DFS) + hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */ + /* BB how does SMB2 do case sensitive? */ + /* if (tcon->nocase) + hdr->Flags |= SMBFLG_CASELESS; */ + /* if (tcon->ses && tcon->ses->server && + (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) + hdr->Flags |= SMB2_FLAGS_SIGNED; */ +out: + pdu->StructureSize2 = cpu_to_le16(parmsize); + return; +} + +static int +smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) +{ + int rc = 0; + /* BB add missing code here */ + return rc; +} + +/* + * Allocate and return pointer to an SMB request hdr, and set basic + * SMB information in the SMB header. If the return code is zero, this + * function must have filled in request_buf pointer. + */ +static int +small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, + void **request_buf) +{ + int rc = 0; + + rc = smb2_reconnect(smb2_command, tcon); + if (rc) + return rc; + + /* BB eventually switch this to SMB2 specific small buf size */ + *request_buf = cifs_small_buf_get(); + if (*request_buf == NULL) { + /* BB should we add a retry in here if not a writepage? */ + return -ENOMEM; + } + + smb2_hdr_assemble((struct smb2_hdr *) *request_buf, smb2_command, tcon); + + if (tcon != NULL) { +#ifdef CONFIG_CIFS_STATS2 + /* + uint16_t com_code = le16_to_cpu(smb2_command); + cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]); + */ +#endif + cifs_stats_inc(&tcon->num_smbs_sent); + } + + return rc; +} + +static void +free_rsp_buf(int resp_buftype, void *rsp) +{ + if (resp_buftype == CIFS_SMALL_BUFFER) + cifs_small_buf_release(rsp); + else if (resp_buftype == CIFS_LARGE_BUFFER) + cifs_buf_release(rsp); +} + +#define SMB2_NUM_PROT 1 + +#define SMB2_PROT 0 +#define SMB21_PROT 1 +#define BAD_PROT 0xFFFF + +#define SMB2_PROT_ID 0x0202 +#define SMB21_PROT_ID 0x0210 +#define BAD_PROT_ID 0xFFFF + +static struct { + int index; + __le16 name; +} smb2protocols[] = { + {SMB2_PROT, cpu_to_le16(SMB2_PROT_ID)}, + {SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)}, + {BAD_PROT, cpu_to_le16(BAD_PROT_ID)} +}; + +/* + * + * SMB2 Worker functions follow: + * + * The general structure of the worker functions is: + * 1) Call smb2_init (assembles SMB2 header) + * 2) Initialize SMB2 command specific fields in fixed length area of SMB + * 3) Call smb_sendrcv2 (sends request on socket and waits for response) + * 4) Decode SMB2 command specific fields in the fixed length area + * 5) Decode variable length data area (if any for this SMB2 command type) + * 6) Call free smb buffer + * 7) return + * + */ + +int +SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +{ + struct smb2_negotiate_req *req; + struct smb2_negotiate_rsp *rsp; + struct kvec iov[1]; + int rc = 0; + int resp_buftype; + struct TCP_Server_Info *server; + unsigned int sec_flags; + u16 i; + u16 temp = 0; + int blob_offset, blob_length; + char *security_blob; + int flags = CIFS_NEG_OP; + + cFYI(1, "Negotiate protocol"); + + if (ses->server) + server = ses->server; + else { + rc = -EIO; + return rc; + } + + rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); + if (rc) + return rc; + + /* if any of auth flags (ie not sign or seal) are overriden use them */ + if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) + sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ + else /* if override flags set only sign/seal OR them with global auth */ + sec_flags = global_secflags | ses->overrideSecFlg; + + cFYI(1, "sec_flags 0x%x", sec_flags); + + req->hdr.SessionId = 0; + + for (i = 0; i < SMB2_NUM_PROT; i++) + req->Dialects[i] = smb2protocols[i].name; + + req->DialectCount = cpu_to_le16(i); + inc_rfc1001_len(req, i * 2); + + /* only one of SMB2 signing flags may be set in SMB2 request */ + if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) + temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; + else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ + temp = SMB2_NEGOTIATE_SIGNING_ENABLED; + + req->SecurityMode = cpu_to_le16(temp); + + req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags); + + rsp = (struct smb2_negotiate_rsp *)iov[0].iov_base; + /* + * No tcon so can't do + * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); + */ + if (rc != 0) + goto neg_exit; + + if (rsp == NULL) { + rc = -EIO; + goto neg_exit; + } + + cFYI(1, "mode 0x%x", rsp->SecurityMode); + + if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) + cFYI(1, "negotiated smb2.1 dialect"); + else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name) + cFYI(1, "negotiated smb2 dialect"); + else { + cERROR(1, "Illegal dialect returned by server %d", + le16_to_cpu(rsp->DialectRevision)); + rc = -EIO; + goto neg_exit; + } + server->dialect = le16_to_cpu(rsp->DialectRevision); + + server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); + server->max_read = le32_to_cpu(rsp->MaxReadSize); + server->max_write = le32_to_cpu(rsp->MaxWriteSize); + /* BB Do we need to validate the SecurityMode? */ + server->sec_mode = le16_to_cpu(rsp->SecurityMode); + server->capabilities = le32_to_cpu(rsp->Capabilities); + + security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, + &rsp->hdr); + if (blob_length == 0) { + cERROR(1, "missing security blob on negprot"); + rc = -EIO; + goto neg_exit; + } +#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ + rc = decode_neg_token_init(security_blob, blob_length, + &server->sec_type); + if (rc == 1) + rc = 0; + else if (rc == 0) { + rc = -EIO; + goto neg_exit; + } +#endif + +neg_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index b08a277df896..ef8dae213f60 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -148,4 +148,43 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +struct smb2_negotiate_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 36 */ + __le16 DialectCount; + __le16 SecurityMode; + __le16 Reserved; /* MBZ */ + __le32 Capabilities; + __u8 ClientGUID[16]; /* MBZ */ + __le64 ClientStartTime; /* MBZ */ + __le16 Dialects[2]; /* variable length */ +} __packed; + +/* SecurityMode flags */ +#define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 +#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 +/* Capabilities flags */ +#define SMB2_GLOBAL_CAP_DFS 0x00000001 +#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ +#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ + +struct smb2_negotiate_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 65 */ + __le16 SecurityMode; + __le16 DialectRevision; + __le16 Reserved; /* MBZ */ + __u8 ServerGUID[16]; + __le32 Capabilities; + __le32 MaxTransactSize; + __le32 MaxReadSize; + __le32 MaxWriteSize; + __le64 SystemTime; /* MBZ */ + __le64 ServerStartTime; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __le32 Reserved2; /* may be any value, ignore */ + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 19bf987c2648..881767002807 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -35,10 +35,17 @@ struct statfs; extern int map_smb2_to_linux_error(char *buf, bool log_err); extern int smb2_check_message(char *buf, unsigned int length); extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); +extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, unsigned int nvec, struct mid_q_entry **ret_mid); +/* + * SMB2 Worker functions - most of protocol specific implementation details + * are contained within these calls. + */ +extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses); + #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 5478f9ba9a34d660eb3227dcd16314689c51f946 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 27 Dec 2011 16:22:00 +0400 Subject: CIFS: Add session setup/logoff capability for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 + fs/cifs/ntlmssp.h | 10 +++ fs/cifs/sess.c | 6 +- fs/cifs/smb2misc.c | 5 ++ fs/cifs/smb2ops.c | 2 + fs/cifs/smb2pdu.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 37 +++++++++ fs/cifs/smb2proto.h | 3 + 8 files changed, 284 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2d48f880b130..0d78bc410cb3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -504,6 +504,9 @@ struct cifs_ses { struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ bool need_reconnect:1; /* connection reset, uid now invalid */ +#ifdef CONFIG_CIFS_SMB2 + __u16 session_flags; +#endif /* CONFIG_CIFS_SMB2 */ }; /* no more than one of the following three session flags may be set */ #define CIFS_SES_NT4 1 diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 5d52e4a3b1ed..848249fa120f 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -126,3 +126,13 @@ typedef struct _AUTHENTICATE_MESSAGE { do not set the version is present flag */ char UserString[0]; } __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE; + +/* + * Size of the session key (crypto key encrypted with the password + */ + +int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); +void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses); +int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, + struct cifs_ses *ses, + const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 08efc3c8efef..382c06d01b38 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -364,7 +364,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, return rc; } -static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, +int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses) { unsigned int tioffset; /* challenge message target info area */ @@ -415,7 +415,7 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, /* We do not malloc the blob, it is passed in pbuffer, because it is fixed size, and small, making this approach cleaner */ -static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, +void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses) { NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; @@ -451,7 +451,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, /* We do not malloc the blob, it is passed in pbuffer, because its maximum possible size is fixed and small, making this approach cleaner. This function returns the length of the data in the blob */ -static int build_ntlmssp_auth_blob(unsigned char *pbuffer, +int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e4dede4ae058..10729a74da27 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -224,6 +224,11 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferLength); break; case SMB2_SESSION_SETUP: + *off = le16_to_cpu( + ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferOffset); + *len = le16_to_cpu( + ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferLength); + break; case SMB2_CREATE: case SMB2_READ: case SMB2_QUERY_INFO: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 2b5232b4f7e7..0057861ce19d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -170,6 +170,8 @@ struct smb_version_operations smb21_operations = { .dump_detail = smb2_dump_detail, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, + .sess_setup = SMB2_sess_setup, + .logoff = SMB2_logoff, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 719e4c4f0307..2165f0d15963 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -328,3 +328,224 @@ neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; } + +int +SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_cp) +{ + struct smb2_sess_setup_req *req; + struct smb2_sess_setup_rsp *rsp = NULL; + struct kvec iov[2]; + int rc = 0; + int resp_buftype; + __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ + struct TCP_Server_Info *server; + unsigned int sec_flags; + u8 temp = 0; + u16 blob_length = 0; + char *security_blob; + char *ntlmssp_blob = NULL; + bool use_spnego = false; /* else use raw ntlmssp */ + + cFYI(1, "Session Setup"); + + if (ses->server) + server = ses->server; + else { + rc = -EIO; + return rc; + } + + /* + * If memory allocation is successful, caller of this function + * frees it. + */ + ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); + if (!ses->ntlmssp) + return -ENOMEM; + + ses->server->secType = RawNTLMSSP; + +ssetup_ntlmssp_authenticate: + if (phase == NtLmChallenge) + phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ + + rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req); + if (rc) + return rc; + + /* if any of auth flags (ie not sign or seal) are overriden use them */ + if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) + sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ + else /* if override flags set only sign/seal OR them with global auth */ + sec_flags = global_secflags | ses->overrideSecFlg; + + cFYI(1, "sec_flags 0x%x", sec_flags); + + req->hdr.SessionId = 0; /* First session, not a reauthenticate */ + req->VcNumber = 0; /* MBZ */ + /* to enable echos and oplocks */ + req->hdr.CreditRequest = cpu_to_le16(3); + + /* only one of SMB2 signing flags may be set in SMB2 request */ + if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) + temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; + else if (ses->server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) + temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; + else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ + temp = SMB2_NEGOTIATE_SIGNING_ENABLED; + + req->SecurityMode = temp; + req->Capabilities = 0; + req->Channel = 0; /* MBZ */ + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and 1 for pad */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + if (phase == NtLmNegotiate) { + ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE), + GFP_KERNEL); + if (ntlmssp_blob == NULL) { + rc = -ENOMEM; + goto ssetup_exit; + } + build_ntlmssp_negotiate_blob(ntlmssp_blob, ses); + if (use_spnego) { + /* blob_length = build_spnego_ntlmssp_blob( + &security_blob, + sizeof(struct _NEGOTIATE_MESSAGE), + ntlmssp_blob); */ + /* BB eventually need to add this */ + cERROR(1, "spnego not supported for SMB2 yet"); + rc = -EOPNOTSUPP; + kfree(ntlmssp_blob); + goto ssetup_exit; + } else { + blob_length = sizeof(struct _NEGOTIATE_MESSAGE); + /* with raw NTLMSSP we don't encapsulate in SPNEGO */ + security_blob = ntlmssp_blob; + } + } else if (phase == NtLmAuthenticate) { + req->hdr.SessionId = ses->Suid; + ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, + GFP_KERNEL); + if (ntlmssp_blob == NULL) { + cERROR(1, "failed to malloc ntlmssp blob"); + rc = -ENOMEM; + goto ssetup_exit; + } + rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, + nls_cp); + if (rc) { + cFYI(1, "build_ntlmssp_auth_blob failed %d", rc); + goto ssetup_exit; /* BB double check error handling */ + } + if (use_spnego) { + /* blob_length = build_spnego_ntlmssp_blob( + &security_blob, + blob_length, + ntlmssp_blob); */ + cERROR(1, "spnego not supported for SMB2 yet"); + rc = -EOPNOTSUPP; + kfree(ntlmssp_blob); + goto ssetup_exit; + } else { + security_blob = ntlmssp_blob; + } + } else { + cERROR(1, "illegal ntlmssp phase"); + rc = -EIO; + goto ssetup_exit; + } + + /* Testing shows that buffer offset must be at location of Buffer[0] */ + req->SecurityBufferOffset = + cpu_to_le16(sizeof(struct smb2_sess_setup_req) - + 1 /* pad */ - 4 /* rfc1001 len */); + req->SecurityBufferLength = cpu_to_le16(blob_length); + iov[1].iov_base = security_blob; + iov[1].iov_len = blob_length; + + inc_rfc1001_len(req, blob_length - 1 /* pad */); + + /* BB add code to build os and lm fields */ + + rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, CIFS_LOG_ERROR); + + kfree(security_blob); + rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; + if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { + if (phase != NtLmNegotiate) { + cERROR(1, "Unexpected more processing error"); + goto ssetup_exit; + } + if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != + le16_to_cpu(rsp->SecurityBufferOffset)) { + cERROR(1, "Invalid security buffer offset %d", + le16_to_cpu(rsp->SecurityBufferOffset)); + rc = -EIO; + goto ssetup_exit; + } + + /* NTLMSSP Negotiate sent now processing challenge (response) */ + phase = NtLmChallenge; /* process ntlmssp challenge */ + rc = 0; /* MORE_PROCESSING is not an error here but expected */ + ses->Suid = rsp->hdr.SessionId; + rc = decode_ntlmssp_challenge(rsp->Buffer, + le16_to_cpu(rsp->SecurityBufferLength), ses); + } + + /* + * BB eventually add code for SPNEGO decoding of NtlmChallenge blob, + * but at least the raw NTLMSSP case works. + */ + /* + * No tcon so can't do + * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); + */ + if (rc != 0) + goto ssetup_exit; + + if (rsp == NULL) { + rc = -EIO; + goto ssetup_exit; + } + + ses->session_flags = le16_to_cpu(rsp->SessionFlags); +ssetup_exit: + free_rsp_buf(resp_buftype, rsp); + + /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ + if ((phase == NtLmChallenge) && (rc == 0)) + goto ssetup_ntlmssp_authenticate; + return rc; +} + +int +SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) +{ + struct smb2_logoff_req *req; /* response is also trivial struct */ + int rc = 0; + struct TCP_Server_Info *server; + + cFYI(1, "disconnect session %p", ses); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req); + if (rc) + return rc; + + /* since no tcon, smb2_init can not do this, so do here */ + req->hdr.SessionId = ses->Suid; + + rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); + /* + * No tcon so can't do + * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); + */ + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index ef8dae213f60..26af68b2955a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -187,4 +187,41 @@ struct smb2_negotiate_rsp { __u8 Buffer[1]; /* variable length GSS security buffer */ } __packed; +struct smb2_sess_setup_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 25 */ + __u8 VcNumber; + __u8 SecurityMode; + __le32 Capabilities; + __le32 Channel; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __le64 PreviousSessionId; + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + +/* Currently defined SessionFlags */ +#define SMB2_SESSION_FLAG_IS_GUEST 0x0001 +#define SMB2_SESSION_FLAG_IS_NULL 0x0002 +struct smb2_sess_setup_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 SessionFlags; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + +struct smb2_logoff_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + +struct smb2_logoff_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 881767002807..9364fbcb90c6 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -47,5 +47,8 @@ extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, * are contained within these calls. */ extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses); +extern int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_cp); +extern int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From faaf946a7d5b79194358437150f34ab4c66bfe21 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 27 Dec 2011 16:04:00 +0400 Subject: CIFS: Add tree connect/disconnect capability for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 1 - fs/cifs/cifs_unicode.h | 1 - fs/cifs/cifsglob.h | 11 +++- fs/cifs/smb2ops.c | 2 + fs/cifs/smb2pdu.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/cifs/smb2pdu.h | 57 ++++++++++++++++++ fs/cifs/smb2proto.h | 4 ++ 7 files changed, 230 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index fbb9da951843..97c1d4210869 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -330,4 +330,3 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, ctoUTF16_out: return i; } - diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index a513a546700b..a44c6eb8a4d7 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -84,7 +84,6 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen, const struct nls_table *codepage); extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, const struct nls_table *cp, int mapChars); - #endif /* diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0d78bc410cb3..ef4e0a0bc826 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -528,7 +528,7 @@ struct cifs_tcon { char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ char *nativeFileSystem; char *password; /* for share-level security */ - __u16 tid; /* The 2 byte tree id */ + __u32 tid; /* The 4 byte tree id */ __u16 Flags; /* optional support bits */ enum statusEnum tidStatus; #ifdef CONFIG_CIFS_STATS @@ -584,6 +584,15 @@ struct cifs_tcon { bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool need_reconnect:1; /* connection reset, tid now invalid */ +#ifdef CONFIG_CIFS_SMB2 + bool print:1; /* set if connection to printer share */ + bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ + __u32 capabilities; + __u32 share_flags; + __u32 maximal_access; + __u32 vol_serial_number; + __le64 vol_create_time; +#endif /* CONFIG_CIFS_SMB2 */ #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ struct fscache_cookie *fscache; /* cookie for share */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0057861ce19d..0e33ca32abf9 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -172,6 +172,8 @@ struct smb_version_operations smb21_operations = { .negotiate = smb2_negotiate, .sess_setup = SMB2_sess_setup, .logoff = SMB2_logoff, + .tree_connect = SMB2_tcon, + .tree_disconnect = SMB2_tdis, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2165f0d15963..1bf037ec5a9d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -110,8 +110,8 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , hdr->SessionId = tcon->ses->Suid; /* BB check following DFS flags BB */ /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */ - /* if (tcon->share_flags & SHI1005_FLAGS_DFS) - hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */ + if (tcon->share_flags & SHI1005_FLAGS_DFS) + hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; /* BB how does SMB2 do case sensitive? */ /* if (tcon->nocase) hdr->Flags |= SMBFLG_CASELESS; */ @@ -549,3 +549,158 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) */ return rc; } + +static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code) +{ + /* cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[code]); */ +} + +#define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */) + +int +SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, + struct cifs_tcon *tcon, const struct nls_table *cp) +{ + struct smb2_tree_connect_req *req; + struct smb2_tree_connect_rsp *rsp = NULL; + struct kvec iov[2]; + int rc = 0; + int resp_buftype; + int unc_path_len; + struct TCP_Server_Info *server; + __le16 *unc_path = NULL; + + cFYI(1, "TCON"); + + if ((ses->server) && tree) + server = ses->server; + else + return -EIO; + + if (tcon && tcon->bad_network_name) + return -ENOENT; + + unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL); + if (unc_path == NULL) + return -ENOMEM; + + unc_path_len = cifs_strtoUTF16(unc_path, tree, strlen(tree), cp) + 1; + unc_path_len *= 2; + if (unc_path_len < 2) { + kfree(unc_path); + return -EINVAL; + } + + rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); + if (rc) { + kfree(unc_path); + return rc; + } + + if (tcon == NULL) { + /* since no tcon, smb2_init can not do this, so do here */ + req->hdr.SessionId = ses->Suid; + /* if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED) + req->hdr.Flags |= SMB2_FLAGS_SIGNED; */ + } + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and 1 for pad */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + + /* Testing shows that buffer offset must be at location of Buffer[0] */ + req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req) + - 1 /* pad */ - 4 /* do not count rfc1001 len field */); + req->PathLength = cpu_to_le16(unc_path_len - 2); + iov[1].iov_base = unc_path; + iov[1].iov_len = unc_path_len; + + inc_rfc1001_len(req, unc_path_len - 1 /* pad */); + + rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0); + rsp = (struct smb2_tree_connect_rsp *)iov[0].iov_base; + + if (rc != 0) { + if (tcon) { + cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE); + tcon->need_reconnect = true; + } + goto tcon_error_exit; + } + + if (rsp == NULL) { + rc = -EIO; + goto tcon_exit; + } + + if (tcon == NULL) { + ses->ipc_tid = rsp->hdr.TreeId; + goto tcon_exit; + } + + if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + cFYI(1, "connection to disk share"); + else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + tcon->ipc = true; + cFYI(1, "connection to pipe share"); + } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { + tcon->print = true; + cFYI(1, "connection to printer"); + } else { + cERROR(1, "unknown share type %d", rsp->ShareType); + rc = -EOPNOTSUPP; + goto tcon_error_exit; + } + + tcon->share_flags = le32_to_cpu(rsp->ShareFlags); + tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); + tcon->tidStatus = CifsGood; + tcon->need_reconnect = false; + tcon->tid = rsp->hdr.TreeId; + strncpy(tcon->treeName, tree, MAX_TREE_SIZE); + + if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && + ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) + cERROR(1, "DFS capability contradicts DFS flag"); + +tcon_exit: + free_rsp_buf(resp_buftype, rsp); + kfree(unc_path); + return rc; + +tcon_error_exit: + if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { + cERROR(1, "BAD_NETWORK_NAME: %s", tree); + tcon->bad_network_name = true; + } + goto tcon_exit; +} + +int +SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) +{ + struct smb2_tree_disconnect_req *req; /* response is trivial */ + int rc = 0; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + + cFYI(1, "Tree Disconnect"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) + return 0; + + rc = small_smb2_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req); + if (rc) + return rc; + + rc = SendReceiveNoRsp(xid, ses, (char *)&req->hdr, 0); + if (rc) + cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE); + + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 26af68b2955a..aa77bf3a7a69 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -224,4 +224,61 @@ struct smb2_logoff_rsp { __le16 Reserved; } __packed; +struct smb2_tree_connect_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 Reserved; + __le16 PathOffset; + __le16 PathLength; + __u8 Buffer[1]; /* variable length */ +} __packed; + +struct smb2_tree_connect_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 16 */ + __u8 ShareType; /* see below */ + __u8 Reserved; + __le32 ShareFlags; /* see below */ + __le32 Capabilities; /* see below */ + __le32 MaximalAccess; +} __packed; + +/* Possible ShareType values */ +#define SMB2_SHARE_TYPE_DISK 0x01 +#define SMB2_SHARE_TYPE_PIPE 0x02 +#define SMB2_SHARE_TYPE_PRINT 0x03 + +/* + * Possible ShareFlags - exactly one and only one of the first 4 caching flags + * must be set (any of the remaining, SHI1005, flags may be set individually + * or in combination. + */ +#define SMB2_SHAREFLAG_MANUAL_CACHING 0x00000000 +#define SMB2_SHAREFLAG_AUTO_CACHING 0x00000010 +#define SMB2_SHAREFLAG_VDO_CACHING 0x00000020 +#define SMB2_SHAREFLAG_NO_CACHING 0x00000030 +#define SHI1005_FLAGS_DFS 0x00000001 +#define SHI1005_FLAGS_DFS_ROOT 0x00000002 +#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS 0x00000100 +#define SHI1005_FLAGS_FORCE_SHARED_DELETE 0x00000200 +#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 +#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 +#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 +#define SHI1005_FLAGS_ENABLE_HASH 0x00002000 + +/* Possible share capabilities */ +#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) + +struct smb2_tree_disconnect_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + +struct smb2_tree_disconnect_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 9364fbcb90c6..bc7299349dbf 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -50,5 +50,9 @@ extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses); extern int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); extern int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses); +extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, + const char *tree, struct cifs_tcon *tcon, + const struct nls_table *); +extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From aa24d1e9692411e605084938ced6b160f92df454 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 27 Dec 2011 16:23:34 +0400 Subject: CIFS: Process reconnects for SMB2 shares Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 1 + fs/cifs/cifssmb.c | 21 +++++---- fs/cifs/connect.c | 3 ++ fs/cifs/smb2pdu.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 143 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3b4d41f9ceeb..61baaa3330fb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -171,6 +171,7 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, const char *devname); extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); +extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index dcb0ad87e173..f1dfc7844f1b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -112,24 +112,29 @@ cifs_kmap_unlock(void) #define cifs_kmap_unlock() do { ; } while(0) #endif /* CONFIG_HIGHMEM */ -/* Mark as invalid, all open files on tree connections since they - were closed when session to server was lost */ -static void mark_open_files_invalid(struct cifs_tcon *pTcon) +/* + * Mark as invalid, all open files on tree connections since they + * were closed when session to server was lost. + */ +void +cifs_mark_open_files_invalid(struct cifs_tcon *tcon) { struct cifsFileInfo *open_file = NULL; struct list_head *tmp; struct list_head *tmp1; -/* list all files open on tree connection and mark them invalid */ + /* list all files open on tree connection and mark them invalid */ spin_lock(&cifs_file_list_lock); - list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { + list_for_each_safe(tmp, tmp1, &tcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); open_file->invalidHandle = true; open_file->oplock_break_cancelled = true; } spin_unlock(&cifs_file_list_lock); - /* BB Add call to invalidate_inodes(sb) for all superblocks mounted - to this tcon */ + /* + * BB Add call to invalidate_inodes(sb) for all superblocks mounted + * to this tcon. + */ } /* reconnect the socket, tcon, and smb session if needed */ @@ -209,7 +214,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) goto out; } - mark_open_files_invalid(tcon); + cifs_mark_open_files_invalid(tcon); rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); mutex_unlock(&ses->session_mutex); cFYI(1, "reconnect tcon rc = %d", rc); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a6197224b102..7cf8b1632242 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -317,6 +317,9 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedReconnect; spin_unlock(&GlobalMid_Lock); server->maxBuf = 0; +#ifdef CONFIG_CIFS_SMB2 + server->max_read = 0; +#endif cFYI(1, "Reconnecting tcp session"); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1bf037ec5a9d..48c04b2832e2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -127,7 +127,132 @@ static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { int rc = 0; - /* BB add missing code here */ + struct nls_table *nls_codepage; + struct cifs_ses *ses; + struct TCP_Server_Info *server; + + /* + * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so + * check for tcp and smb session status done differently + * for those three - in the calling routine. + */ + if (tcon == NULL) + return rc; + + if (smb2_command == SMB2_TREE_CONNECT) + return rc; + + if (tcon->tidStatus == CifsExiting) { + /* + * only tree disconnect, open, and write, + * (and ulogoff which does not have tcon) + * are allowed as we start force umount. + */ + if ((smb2_command != SMB2_WRITE) && + (smb2_command != SMB2_CREATE) && + (smb2_command != SMB2_TREE_DISCONNECT)) { + cFYI(1, "can not send cmd %d while umounting", + smb2_command); + return -ENODEV; + } + } + if ((!tcon->ses) || (tcon->ses->status == CifsExiting) || + (!tcon->ses->server)) + return -EIO; + + ses = tcon->ses; + server = ses->server; + + /* + * Give demultiplex thread up to 10 seconds to reconnect, should be + * greater than cifs socket timeout which is 7 seconds + */ + while (server->tcpStatus == CifsNeedReconnect) { + /* + * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE + * here since they are implicitly done when session drops. + */ + switch (smb2_command) { + /* + * BB Should we keep oplock break and add flush to exceptions? + */ + case SMB2_TREE_DISCONNECT: + case SMB2_CANCEL: + case SMB2_CLOSE: + case SMB2_OPLOCK_BREAK: + return -EAGAIN; + } + + wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + + /* are we still trying to reconnect? */ + if (server->tcpStatus != CifsNeedReconnect) + break; + + /* + * on "soft" mounts we wait once. Hard mounts keep + * retrying until process is killed or server comes + * back on-line + */ + if (!tcon->retry) { + cFYI(1, "gave up waiting on reconnect in smb_init"); + return -EHOSTDOWN; + } + } + + if (!tcon->ses->need_reconnect && !tcon->need_reconnect) + return rc; + + nls_codepage = load_nls_default(); + + /* + * need to prevent multiple threads trying to simultaneously reconnect + * the same SMB session + */ + mutex_lock(&tcon->ses->session_mutex); + rc = cifs_negotiate_protocol(0, tcon->ses); + if (!rc && tcon->ses->need_reconnect) + rc = cifs_setup_session(0, tcon->ses, nls_codepage); + + if (rc || !tcon->need_reconnect) { + mutex_unlock(&tcon->ses->session_mutex); + goto out; + } + + cifs_mark_open_files_invalid(tcon); + rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); + mutex_unlock(&tcon->ses->session_mutex); + cFYI(1, "reconnect tcon rc = %d", rc); + if (rc) + goto out; + atomic_inc(&tconInfoReconnectCount); + /* + * BB FIXME add code to check if wsize needs update due to negotiated + * smb buffer size shrinking. + */ +out: + /* + * Check if handle based operation so we know whether we can continue + * or not without returning to caller to reset file handle. + */ + /* + * BB Is flush done by server on drop of tcp session? Should we special + * case it and skip above? + */ + switch (smb2_command) { + case SMB2_FLUSH: + case SMB2_READ: + case SMB2_WRITE: + case SMB2_LOCK: + case SMB2_IOCTL: + case SMB2_QUERY_DIRECTORY: + case SMB2_CHANGE_NOTIFY: + case SMB2_QUERY_INFO: + case SMB2_SET_INFO: + return -EAGAIN; + } + unload_nls(nls_codepage); return rc; } -- cgit v1.2.3 From b669f33ca61738171aecc5ae90d776d91b122eb8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 27 May 2012 20:21:53 +0400 Subject: CIFS: Move getting dfs referalls to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/cifsproto.h | 15 +++++++-------- fs/cifs/cifssmb.c | 13 ++++++------- fs/cifs/connect.c | 15 ++++++++------- fs/cifs/smb1ops.c | 1 + 5 files changed, 27 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ef4e0a0bc826..2d80d82f41d0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -162,6 +162,7 @@ struct TCP_Server_Info; struct cifsFileInfo; struct cifs_ses; struct cifs_tcon; +struct dfs_info3_param; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -208,6 +209,10 @@ struct smb_version_operations { struct cifs_tcon *, const struct nls_table *); /* close tree connecion */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); + /* get DFS referrals */ + int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, + const char *, struct dfs_info3_param **, + unsigned int *, const struct nls_table *, int); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 61baaa3330fb..4857965b22db 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -226,17 +226,16 @@ extern int CIFSSMBUnixQPathInfo(const unsigned int xid, const struct nls_table *nls_codepage, int remap); extern int CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, - const unsigned char *searchName, - struct dfs_info3_param **target_nodes, - unsigned int *number_of_nodes_in_array, - const struct nls_table *nls_codepage, int remap); + const char *search_name, + struct dfs_info3_param **target_nodes, + unsigned int *num_of_nodes, + const struct nls_table *nls_codepage, int remap); -extern int get_dfs_path(unsigned int xid, struct cifs_ses *pSesInfo, +extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, const struct nls_table *nls_codepage, - unsigned int *pnum_referrals, - struct dfs_info3_param **preferrals, - int remap); + unsigned int *num_referrals, + struct dfs_info3_param **referrals, int remap); extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, struct smb_vol *vol); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f1dfc7844f1b..af859c325db1 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4766,8 +4766,7 @@ parse_DFS_referrals_exit: int CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, - const unsigned char *searchName, - struct dfs_info3_param **target_nodes, + const char *search_name, struct dfs_info3_param **target_nodes, unsigned int *num_of_nodes, const struct nls_table *nls_codepage, int remap) { @@ -4781,7 +4780,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, *num_of_nodes = 0; *target_nodes = NULL; - cFYI(1, "In GetDFSRefer the path %s", searchName); + cFYI(1, "In GetDFSRefer the path %s", search_name); if (ses == NULL) return -ENODEV; getDFSRetry: @@ -4804,14 +4803,14 @@ getDFSRetry: pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; name_len = cifsConvertToUTF16((__le16 *) pSMB->RequestFileName, - searchName, PATH_MAX, nls_codepage, + search_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); + name_len = strnlen(search_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->RequestFileName, searchName, name_len); + strncpy(pSMB->RequestFileName, search_name, name_len); } if (ses->server) { @@ -4867,7 +4866,7 @@ getDFSRetry: /* parse returned result into more usable form */ rc = parse_DFS_referrals(pSMBr, num_of_nodes, target_nodes, nls_codepage, remap, - searchName); + search_name); GetDFSRefExit: cifs_buf_release(pSMB); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7cf8b1632242..70a34d126001 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2767,14 +2767,14 @@ out: } int -get_dfs_path(unsigned int xid, struct cifs_ses *ses, const char *old_path, +get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, const struct nls_table *nls_codepage, unsigned int *num_referrals, struct dfs_info3_param **referrals, int remap) { char *temp_unc; int rc = 0; - if (!ses->server->ops->tree_connect) + if (!ses->server->ops->tree_connect || !ses->server->ops->get_dfs_refer) return -ENOSYS; *num_referrals = 0; @@ -2796,11 +2796,12 @@ get_dfs_path(unsigned int xid, struct cifs_ses *ses, const char *old_path, kfree(temp_unc); } if (rc == 0) - rc = CIFSGetDFSRefer(xid, ses, old_path, referrals, - num_referrals, nls_codepage, remap); + rc = ses->server->ops->get_dfs_refer(xid, ses, old_path, + referrals, num_referrals, + nls_codepage, remap); /* * BB - map targetUNCs to dfs_info3 structures, here or in - * CIFSGetDFSRefer. + * ses->server->ops->get_dfs_refer. */ return rc; @@ -3488,7 +3489,7 @@ build_unc_path_to_root(const struct smb_vol *vol, * determine whether there were referrals. */ static int -expand_dfs_referral(unsigned int xid, struct cifs_ses *pSesInfo, +expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb, int check_prefix) { @@ -3504,7 +3505,7 @@ expand_dfs_referral(unsigned int xid, struct cifs_ses *pSesInfo, /* For DFS paths, skip the first '\' of the UNC */ ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1; - rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls, + rc = get_dfs_path(xid, ses, ref_path, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 8f873863142a..6d9025b29e54 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -434,6 +434,7 @@ struct smb_version_operations smb1_operations = { .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, .tree_disconnect = CIFSSMBTDis, + .get_dfs_refer = CIFSGetDFSRefer, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From af4281dc22f1eb8a9503b53330ca02f57db68b25 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 27 May 2012 20:48:35 +0400 Subject: CIFS: Move informational tcon calls to ops struct and rename variables in cifs_mount. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/connect.c | 51 ++++++++++++++++++++++++--------------------------- fs/cifs/smb1ops.c | 8 ++++++++ 3 files changed, 34 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2d80d82f41d0..acfa68569f3d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -213,6 +213,8 @@ struct smb_version_operations { int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); + /* informational QFS call */ + void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); }; struct smb_version_values { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 70a34d126001..80807923a545 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3600,9 +3600,9 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { int rc; unsigned int xid; - struct cifs_ses *pSesInfo; + struct cifs_ses *ses; struct cifs_tcon *tcon; - struct TCP_Server_Info *srvTcp; + struct TCP_Server_Info *server; char *full_path; struct tcon_link *tlink; #ifdef CONFIG_CIFS_DFS_UPCALL @@ -3619,39 +3619,39 @@ try_mount_again: if (referral_walks_count) { if (tcon) cifs_put_tcon(tcon); - else if (pSesInfo) - cifs_put_smb_ses(pSesInfo); + else if (ses) + cifs_put_smb_ses(ses); free_xid(xid); } #endif rc = 0; tcon = NULL; - pSesInfo = NULL; - srvTcp = NULL; + ses = NULL; + server = NULL; full_path = NULL; tlink = NULL; xid = get_xid(); /* get a reference to a tcp session */ - srvTcp = cifs_get_tcp_session(volume_info); - if (IS_ERR(srvTcp)) { - rc = PTR_ERR(srvTcp); + server = cifs_get_tcp_session(volume_info); + if (IS_ERR(server)) { + rc = PTR_ERR(server); bdi_destroy(&cifs_sb->bdi); goto out; } /* get a reference to a SMB session */ - pSesInfo = cifs_get_smb_ses(srvTcp, volume_info); - if (IS_ERR(pSesInfo)) { - rc = PTR_ERR(pSesInfo); - pSesInfo = NULL; + ses = cifs_get_smb_ses(server, volume_info); + if (IS_ERR(ses)) { + rc = PTR_ERR(ses); + ses = NULL; goto mount_fail_check; } /* search for existing tcon to this server share */ - tcon = cifs_get_tcon(pSesInfo, volume_info); + tcon = cifs_get_tcon(ses, volume_info); if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; @@ -3672,11 +3672,9 @@ try_mount_again: } else tcon->unix_ext = 0; /* server does not support them */ - /* do not care if following two calls succeed - informational */ - if (!tcon->ipc) { - CIFSSMBQFSDeviceInfo(xid, tcon); - CIFSSMBQFSAttributeInfo(xid, tcon); - } + /* do not care if a following call succeed - informational */ + if (!tcon->ipc && server->ops->qfs_tcon) + server->ops->qfs_tcon(xid, tcon); cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); @@ -3694,8 +3692,8 @@ remote_path_check: * Chase the referral if found, otherwise continue normally. */ if (referral_walks_count == 0) { - int refrc = expand_dfs_referral(xid, pSesInfo, volume_info, - cifs_sb, false); + int refrc = expand_dfs_referral(xid, ses, volume_info, cifs_sb, + false); if (!refrc) { referral_walks_count++; goto try_mount_again; @@ -3733,8 +3731,7 @@ remote_path_check: goto mount_fail_check; } - rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb, - true); + rc = expand_dfs_referral(xid, ses, volume_info, cifs_sb, true); if (!rc) { referral_walks_count++; @@ -3756,7 +3753,7 @@ remote_path_check: goto mount_fail_check; } - tlink->tl_uid = pSesInfo->linux_uid; + tlink->tl_uid = ses->linux_uid; tlink->tl_tcon = tcon; tlink->tl_time = jiffies; set_bit(TCON_LINK_MASTER, &tlink->tl_flags); @@ -3777,10 +3774,10 @@ mount_fail_check: /* up accidentally freeing someone elses tcon struct */ if (tcon) cifs_put_tcon(tcon); - else if (pSesInfo) - cifs_put_smb_ses(pSesInfo); + else if (ses) + cifs_put_smb_ses(ses); else - cifs_put_tcp_session(srvTcp); + cifs_put_tcp_session(server); bdi_destroy(&cifs_sb->bdi); } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 6d9025b29e54..96eb06ff9dd1 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -410,6 +410,13 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static void +cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +{ + CIFSSMBQFSDeviceInfo(xid, tcon); + CIFSSMBQFSAttributeInfo(xid, tcon); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -435,6 +442,7 @@ struct smb_version_operations smb1_operations = { .tree_connect = CIFSTCon, .tree_disconnect = CIFSSMBTDis, .get_dfs_refer = CIFSGetDFSRefer, + .qfs_tcon = cifs_qfs_tcon, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 68889f269b16a11866f4ec71e8177bdd0c184a3f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 May 2012 14:40:22 +0400 Subject: CIFS: Move is_path_accessible to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifsproto.h | 12 +++++------ fs/cifs/cifssmb.c | 60 ++++++++++++++++++++++++++--------------------------- fs/cifs/connect.c | 31 ++++++--------------------- fs/cifs/smb1ops.c | 25 ++++++++++++++++++++++ 5 files changed, 69 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index acfa68569f3d..f711d666e3db 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -215,6 +215,9 @@ struct smb_version_operations { unsigned int *, const struct nls_table *, int); /* informational QFS call */ void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); + /* check if a path is accessible or not */ + int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *, const char *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 4857965b22db..b9967adeaa9e 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -208,14 +208,12 @@ extern int CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, extern int CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_ALL_INFO *pFindData); extern int CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - FILE_ALL_INFO *findData, - int legacy /* whether to use old info level */, - const struct nls_table *nls_codepage, int remap); + const char *search_Name, FILE_ALL_INFO *data, + int legacy /* whether to use old info level */, + const struct nls_table *nls_codepage, int remap); extern int SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - FILE_ALL_INFO *findData, - const struct nls_table *nls_codepage, int remap); + const char *search_name, FILE_ALL_INFO *data, + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 netfid, FILE_UNIX_BASIC_INFO *pFindData); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index af859c325db1..84a53380e124 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3853,10 +3853,10 @@ setCifsAclRetry: /* Legacy Query Path Information call for lookup to old servers such as Win9x/WinME */ -int SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - FILE_ALL_INFO *pFinfo, - const struct nls_table *nls_codepage, int remap) +int +SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *search_name, FILE_ALL_INFO *data, + const struct nls_table *nls_codepage, int remap) { QUERY_INFORMATION_REQ *pSMB; QUERY_INFORMATION_RSP *pSMBr; @@ -3864,7 +3864,7 @@ int SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len; - cFYI(1, "In SMBQPath path %s", searchName); + cFYI(1, "In SMBQPath path %s", search_name); QInfRetry: rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -3874,14 +3874,14 @@ QInfRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, - searchName, PATH_MAX, nls_codepage, + search_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { - name_len = strnlen(searchName, PATH_MAX); + name_len = strnlen(search_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + strncpy(pSMB->FileName, search_name, name_len); } pSMB->BufferFormat = 0x04; name_len++; /* account for buffer type byte */ @@ -3892,23 +3892,23 @@ QInfRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cFYI(1, "Send error in QueryInfo = %d", rc); - } else if (pFinfo) { + } else if (data) { struct timespec ts; __u32 time = le32_to_cpu(pSMBr->last_write_time); /* decode response */ /* BB FIXME - add time zone adjustment BB */ - memset(pFinfo, 0, sizeof(FILE_ALL_INFO)); + memset(data, 0, sizeof(FILE_ALL_INFO)); ts.tv_nsec = 0; ts.tv_sec = time; /* decode time fields */ - pFinfo->ChangeTime = cpu_to_le64(cifs_UnixTimeToNT(ts)); - pFinfo->LastWriteTime = pFinfo->ChangeTime; - pFinfo->LastAccessTime = 0; - pFinfo->AllocationSize = + data->ChangeTime = cpu_to_le64(cifs_UnixTimeToNT(ts)); + data->LastWriteTime = data->ChangeTime; + data->LastAccessTime = 0; + data->AllocationSize = cpu_to_le64(le32_to_cpu(pSMBr->size)); - pFinfo->EndOfFile = pFinfo->AllocationSize; - pFinfo->Attributes = + data->EndOfFile = data->AllocationSize; + data->Attributes = cpu_to_le32(le16_to_cpu(pSMBr->attr)); } else rc = -EIO; /* bad buffer passed in */ @@ -3990,12 +3990,11 @@ QFileInfoRetry: int CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - FILE_ALL_INFO *pFindData, + const char *search_name, FILE_ALL_INFO *data, int legacy /* old style infolevel */, const struct nls_table *nls_codepage, int remap) { -/* level 263 SMB_QUERY_FILE_ALL_INFO */ + /* level 263 SMB_QUERY_FILE_ALL_INFO */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; int rc = 0; @@ -4003,7 +4002,7 @@ CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, int name_len; __u16 params, byte_count; -/* cFYI(1, "In QPathInfo path %s", searchName); */ + /* cFYI(1, "In QPathInfo path %s", search_name); */ QPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -4012,14 +4011,14 @@ QPathInfoRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, + cifsConvertToUTF16((__le16 *) pSMB->FileName, search_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); + name_len = strnlen(search_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + strncpy(pSMB->FileName, search_name, name_len); } params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; @@ -4064,20 +4063,21 @@ QPathInfoRetry: else if (legacy && get_bcc(&pSMBr->hdr) < 24) rc = -EIO; /* 24 or 26 expected but we do not read last field */ - else if (pFindData) { + else if (data) { int size; __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - /* On legacy responses we do not read the last field, - EAsize, fortunately since it varies by subdialect and - also note it differs on Set vs. Get, ie two bytes or 4 - bytes depending but we don't care here */ + /* + * On legacy responses we do not read the last field, + * EAsize, fortunately since it varies by subdialect and + * also note it differs on Set vs Get, ie two bytes or 4 + * bytes depending but we don't care here. + */ if (legacy) size = sizeof(FILE_INFO_STANDARD); else size = sizeof(FILE_ALL_INFO); - memcpy((char *) pFindData, - (char *) &pSMBr->hdr.Protocol + + memcpy((char *) data, (char *) &pSMBr->hdr.Protocol + data_offset, size); } else rc = -ENOMEM; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80807923a545..34588fe11c57 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3402,30 +3402,6 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) return rsize; } -static int -is_path_accessible(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, const char *full_path) -{ - int rc; - FILE_ALL_INFO *pfile_info; - - pfile_info = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (pfile_info == NULL) - return -ENOMEM; - - rc = CIFSSMBQPathInfo(xid, tcon, full_path, pfile_info, - 0 /* not legacy */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - - if (rc == -EOPNOTSUPP || rc == -EINVAL) - rc = SMBQueryInformation(xid, tcon, full_path, pfile_info, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - kfree(pfile_info); - return rc; -} - static void cleanup_volume_info_contents(struct smb_vol *volume_info) { @@ -3703,13 +3679,18 @@ remote_path_check: /* check if a whole path is not remote */ if (!rc && tcon) { + if (!server->ops->is_path_accessible) { + rc = -ENOSYS; + goto mount_fail_check; + } /* build_path_to_root works only when we have a valid tcon */ full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon); if (full_path == NULL) { rc = -ENOMEM; goto mount_fail_check; } - rc = is_path_accessible(xid, tcon, cifs_sb, full_path); + rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, + full_path); if (rc != 0 && rc != -EREMOTE) { kfree(full_path); goto mount_fail_check; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 96eb06ff9dd1..43f3881ad3b8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -417,6 +417,30 @@ cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) CIFSSMBQFSAttributeInfo(xid, tcon); } +static int +cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path) +{ + int rc; + FILE_ALL_INFO *file_info; + + file_info = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (file_info == NULL) + return -ENOMEM; + + rc = CIFSSMBQPathInfo(xid, tcon, full_path, file_info, + 0 /* not legacy */, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc == -EOPNOTSUPP || rc == -EINVAL) + rc = SMBQueryInformation(xid, tcon, full_path, file_info, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + kfree(file_info); + return rc; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -443,6 +467,7 @@ struct smb_version_operations smb1_operations = { .tree_disconnect = CIFSSMBTDis, .get_dfs_refer = CIFSGetDFSRefer, .qfs_tcon = cifs_qfs_tcon, + .is_path_accessible = cifs_is_path_accessible, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 2503a0dba989486c59523a947a1dcb50ad90fee9 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 26 Dec 2011 22:58:46 +0400 Subject: CIFS: Add SMB2 support for is_path_accessible that needs for a successful mount through SMB2 protocol. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 61 ++++++++++++++++++ fs/cifs/cifs_unicode.h | 5 ++ fs/cifs/smb2misc.c | 25 ++++++++ fs/cifs/smb2ops.c | 25 ++++++++ fs/cifs/smb2pdu.c | 132 ++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2proto.h | 8 +++ 7 files changed, 423 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 97c1d4210869..7dab9c04ad52 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -330,3 +330,64 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, ctoUTF16_out: return i; } + +#ifdef CONFIG_CIFS_SMB2 +/* + * cifs_local_to_utf16_bytes - how long will a string be after conversion? + * @from - pointer to input string + * @maxbytes - don't go past this many bytes of input string + * @codepage - source codepage + * + * Walk a string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + */ + +static int +cifs_local_to_utf16_bytes(const char *from, int len, + const struct nls_table *codepage) +{ + int charlen; + int i; + wchar_t wchar_to; + + for (i = 0; len && *from; i++, from += charlen, len -= charlen) { + charlen = codepage->char2uni(from, len, &wchar_to); + /* Failed conversion defaults to a question mark */ + if (charlen < 1) + charlen = 1; + } + return 2 * i; /* UTF16 characters are two bytes */ +} + +/* + * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage + * @src - source string + * @maxlen - don't walk past this many bytes in the source string + * @utf16_len - the length of the allocated string in bytes (including null) + * @cp - source codepage + * @remap - map special chars + * + * Take a string convert it from the local codepage to UTF16 and + * put it in a new buffer. Returns a pointer to the new string or NULL on + * error. + */ +__le16 * +cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, + const struct nls_table *cp, int remap) +{ + int len; + __le16 *dst; + + len = cifs_local_to_utf16_bytes(src, maxlen, cp); + len += 2; /* NULL */ + dst = kmalloc(len, GFP_KERNEL); + if (!dst) { + *utf16_len = 0; + return NULL; + } + cifsConvertToUTF16(dst, src, strlen(src), cp, remap); + *utf16_len = len; + return dst; +} +#endif /* CONFIG_CIFS_SMB2 */ diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index a44c6eb8a4d7..4fb097468e21 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -84,6 +84,11 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen, const struct nls_table *codepage); extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, const struct nls_table *cp, int mapChars); +#ifdef CONFIG_CIFS_SMB2 +extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, + int *utf16_len, const struct nls_table *cp, + int remap); +#endif /* CONFIG_CIFS_SMB2 */ #endif /* diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10729a74da27..eb73a136641c 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -230,6 +230,11 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferLength); break; case SMB2_CREATE: + *off = le32_to_cpu( + ((struct smb2_create_rsp *)hdr)->CreateContextsOffset); + *len = le32_to_cpu( + ((struct smb2_create_rsp *)hdr)->CreateContextsLength); + break; case SMB2_READ: case SMB2_QUERY_INFO: case SMB2_QUERY_DIRECTORY: @@ -315,3 +320,23 @@ calc_size_exit: cFYI(1, "SMB2 len %d", len); return len; } + +/* Note: caller must free return buffer */ +__le16 * +cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) +{ + int len; + const char *start_of_path; + __le16 *to; + + /* Windows doesn't allow paths beginning with \ */ + if (from[0] == '\\') + start_of_path = from + 1; + else + start_of_path = from; + to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + return to; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0e33ca32abf9..1266137406fa 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -157,6 +157,30 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static int +smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path) +{ + int rc; + __u64 persistent_fid, volatile_fid; + __le16 *utf16_path; + + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0); + if (rc) { + kfree(utf16_path); + return rc; + } + + rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); + kfree(utf16_path); + return rc; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, @@ -174,6 +198,7 @@ struct smb_version_operations smb21_operations = { .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, + .is_path_accessible = smb2_is_path_accessible, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 48c04b2832e2..ef0769c398a5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -829,3 +829,135 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) return rc; } + +int +SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, + u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, + __u32 create_disposition, __u32 file_attributes, __u32 create_options) +{ + struct smb2_create_req *req; + struct smb2_create_rsp *rsp; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[2]; + int resp_buftype; + int uni_path_len; + int rc = 0; + int num_iovecs = 2; + + cFYI(1, "create/open"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_CREATE, tcon, (void **) &req); + if (rc) + return rc; + + if (enable_oplocks) + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH; + else + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; + req->ImpersonationLevel = IL_IMPERSONATION; + req->DesiredAccess = cpu_to_le32(desired_access); + /* File attributes ignored on open (used in create though) */ + req->FileAttributes = cpu_to_le32(file_attributes); + req->ShareAccess = FILE_SHARE_ALL_LE; + req->CreateDisposition = cpu_to_le32(create_disposition); + req->CreateOptions = cpu_to_le32(create_options); + uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; + req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) + - 1 /* pad */ - 4 /* do not count rfc1001 len field */); + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + /* MUST set path len (NameLength) to 0 opening root of share */ + if (uni_path_len >= 4) { + req->NameLength = cpu_to_le16(uni_path_len - 2); + /* -1 since last byte is buf[0] which is sent below (path) */ + iov[0].iov_len--; + iov[1].iov_len = uni_path_len; + iov[1].iov_base = path; + /* + * -1 since last byte is buf[0] which was counted in + * smb2_buf_len. + */ + inc_rfc1001_len(req, uni_path_len - 1); + } else { + num_iovecs = 1; + req->NameLength = 0; + } + + rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); + rsp = (struct smb2_create_rsp *)iov[0].iov_base; + + if (rc != 0) { + cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); + goto creat_exit; + } + + if (rsp == NULL) { + rc = -EIO; + goto creat_exit; + } + *persistent_fid = rsp->PersistentFileId; + *volatile_fid = rsp->VolatileFileId; +creat_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} + +int +SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid) +{ + struct smb2_close_req *req; + struct smb2_close_rsp *rsp; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[1]; + int resp_buftype; + int rc = 0; + + cFYI(1, "Close"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req); + if (rc) + return rc; + + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + rsp = (struct smb2_close_rsp *)iov[0].iov_base; + + if (rc != 0) { + if (tcon) + cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); + goto close_exit; + } + + if (rsp == NULL) { + rc = -EIO; + goto close_exit; + } + + /* BB FIXME - decode close response, update inode for caching */ + +close_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index aa77bf3a7a69..5cd358ef312e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -281,4 +281,171 @@ struct smb2_tree_disconnect_rsp { __le16 Reserved; } __packed; +/* File Attrubutes */ +#define FILE_ATTRIBUTE_READONLY 0x00000001 +#define FILE_ATTRIBUTE_HIDDEN 0x00000002 +#define FILE_ATTRIBUTE_SYSTEM 0x00000004 +#define FILE_ATTRIBUTE_DIRECTORY 0x00000010 +#define FILE_ATTRIBUTE_ARCHIVE 0x00000020 +#define FILE_ATTRIBUTE_NORMAL 0x00000080 +#define FILE_ATTRIBUTE_TEMPORARY 0x00000100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 +#define FILE_ATTRIBUTE_COMPRESSED 0x00000800 +#define FILE_ATTRIBUTE_OFFLINE 0x00001000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 + +/* Oplock levels */ +#define SMB2_OPLOCK_LEVEL_NONE 0x00 +#define SMB2_OPLOCK_LEVEL_II 0x01 +#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 +#define SMB2_OPLOCK_LEVEL_BATCH 0x09 +#define SMB2_OPLOCK_LEVEL_LEASE 0xFF + +/* Desired Access Flags */ +#define FILE_READ_DATA_LE cpu_to_le32(0x00000001) +#define FILE_WRITE_DATA_LE cpu_to_le32(0x00000002) +#define FILE_APPEND_DATA_LE cpu_to_le32(0x00000004) +#define FILE_READ_EA_LE cpu_to_le32(0x00000008) +#define FILE_WRITE_EA_LE cpu_to_le32(0x00000010) +#define FILE_EXECUTE_LE cpu_to_le32(0x00000020) +#define FILE_READ_ATTRIBUTES_LE cpu_to_le32(0x00000080) +#define FILE_WRITE_ATTRIBUTES_LE cpu_to_le32(0x00000100) +#define FILE_DELETE_LE cpu_to_le32(0x00010000) +#define FILE_READ_CONTROL_LE cpu_to_le32(0x00020000) +#define FILE_WRITE_DAC_LE cpu_to_le32(0x00040000) +#define FILE_WRITE_OWNER_LE cpu_to_le32(0x00080000) +#define FILE_SYNCHRONIZE_LE cpu_to_le32(0x00100000) +#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000) +#define FILE_MAXIMAL_ACCESS_LE cpu_to_le32(0x02000000) +#define FILE_GENERIC_ALL_LE cpu_to_le32(0x10000000) +#define FILE_GENERIC_EXECUTE_LE cpu_to_le32(0x20000000) +#define FILE_GENERIC_WRITE_LE cpu_to_le32(0x40000000) +#define FILE_GENERIC_READ_LE cpu_to_le32(0x80000000) + +/* ShareAccess Flags */ +#define FILE_SHARE_READ_LE cpu_to_le32(0x00000001) +#define FILE_SHARE_WRITE_LE cpu_to_le32(0x00000002) +#define FILE_SHARE_DELETE_LE cpu_to_le32(0x00000004) +#define FILE_SHARE_ALL_LE cpu_to_le32(0x00000007) + +/* CreateDisposition Flags */ +#define FILE_SUPERSEDE_LE cpu_to_le32(0x00000000) +#define FILE_OPEN_LE cpu_to_le32(0x00000001) +#define FILE_CREATE_LE cpu_to_le32(0x00000002) +#define FILE_OPEN_IF_LE cpu_to_le32(0x00000003) +#define FILE_OVERWRITE_LE cpu_to_le32(0x00000004) +#define FILE_OVERWRITE_IF_LE cpu_to_le32(0x00000005) + +/* CreateOptions Flags */ +#define FILE_DIRECTORY_FILE_LE cpu_to_le32(0x00000001) +/* same as #define CREATE_NOT_FILE_LE cpu_to_le32(0x00000001) */ +#define FILE_WRITE_THROUGH_LE cpu_to_le32(0x00000002) +#define FILE_SEQUENTIAL_ONLY_LE cpu_to_le32(0x00000004) +#define FILE_NO_INTERMEDIATE_BUFFERRING_LE cpu_to_le32(0x00000008) +#define FILE_SYNCHRONOUS_IO_ALERT_LE cpu_to_le32(0x00000010) +#define FILE_SYNCHRONOUS_IO_NON_ALERT_LE cpu_to_le32(0x00000020) +#define FILE_NON_DIRECTORY_FILE_LE cpu_to_le32(0x00000040) +#define FILE_COMPLETE_IF_OPLOCKED_LE cpu_to_le32(0x00000100) +#define FILE_NO_EA_KNOWLEDGE_LE cpu_to_le32(0x00000200) +#define FILE_RANDOM_ACCESS_LE cpu_to_le32(0x00000800) +#define FILE_DELETE_ON_CLOSE_LE cpu_to_le32(0x00001000) +#define FILE_OPEN_BY_FILE_ID_LE cpu_to_le32(0x00002000) +#define FILE_OPEN_FOR_BACKUP_INTENT_LE cpu_to_le32(0x00004000) +#define FILE_NO_COMPRESSION_LE cpu_to_le32(0x00008000) +#define FILE_RESERVE_OPFILTER_LE cpu_to_le32(0x00100000) +#define FILE_OPEN_REPARSE_POINT_LE cpu_to_le32(0x00200000) +#define FILE_OPEN_NO_RECALL_LE cpu_to_le32(0x00400000) +#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE cpu_to_le32(0x00800000) + +#define FILE_READ_RIGHTS_LE (FILE_READ_DATA_LE | FILE_READ_EA_LE \ + | FILE_READ_ATTRIBUTES_LE) +#define FILE_WRITE_RIGHTS_LE (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE \ + | FILE_WRITE_EA_LE | FILE_WRITE_ATTRIBUTES_LE) +#define FILE_EXEC_RIGHTS_LE (FILE_EXECUTE_LE) + +/* Impersonation Levels */ +#define IL_ANONYMOUS cpu_to_le32(0x00000000) +#define IL_IDENTIFICATION cpu_to_le32(0x00000001) +#define IL_IMPERSONATION cpu_to_le32(0x00000002) +#define IL_DELEGATE cpu_to_le32(0x00000003) + +/* Create Context Values */ +#define SMB2_CREATE_EA_BUFFER "ExtA" /* extended attributes */ +#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ +#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" +#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" +#define SMB2_CREATE_ALLOCATION_SIZE "AlSi" +#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" +#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" +#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" +#define SMB2_CREATE_REQUEST_LEASE "RqLs" + +struct smb2_create_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 57 */ + __u8 SecurityFlags; + __u8 RequestedOplockLevel; + __le32 ImpersonationLevel; + __le64 SmbCreateFlags; + __le64 Reserved; + __le32 DesiredAccess; + __le32 FileAttributes; + __le32 ShareAccess; + __le32 CreateDisposition; + __le32 CreateOptions; + __le16 NameOffset; + __le16 NameLength; + __le32 CreateContextsOffset; + __le32 CreateContextsLength; + __u8 Buffer[1]; +} __packed; + +struct smb2_create_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 89 */ + __u8 OplockLevel; + __u8 Reserved; + __le32 CreateAction; + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; + __le64 EndofFile; + __le32 FileAttributes; + __le32 Reserved2; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 CreateContextsOffset; + __le32 CreateContextsLength; + __u8 Buffer[1]; +} __packed; + +/* Currently defined values for close flags */ +#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) +struct smb2_close_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __le16 Flags; + __le32 Reserved; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; + +struct smb2_close_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* 60 */ + __le16 Flags; + __le32 Reserved; + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ + __le64 EndOfFile; + __le32 Attributes; +} __packed; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index bc7299349dbf..85aa8d5ea41a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -36,6 +36,8 @@ extern int map_smb2_to_linux_error(char *buf, bool log_err); extern int smb2_check_message(char *buf, unsigned int length); extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); +extern __le16 *cifs_convert_path_to_utf16(const char *from, + struct cifs_sb_info *cifs_sb); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); @@ -54,5 +56,11 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *); extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); +extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, + __le16 *path, u64 *persistent_fid, u64 *volatile_fid, + __u32 desired_access, __u32 create_disposition, + __u32 file_attributes, __u32 create_options); +extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 1208ef1f76540b621f80e6130c4fb7bed8ece360 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 27 May 2012 17:34:43 +0400 Subject: CIFS: Move query inode info code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 9 ++++++ fs/cifs/cifsproto.h | 14 ++++---- fs/cifs/cifssmb.c | 11 +++---- fs/cifs/inode.c | 93 +++++++++++++++++++++-------------------------------- fs/cifs/smb1ops.c | 50 ++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f711d666e3db..2b1234599e72 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -163,6 +163,7 @@ struct cifsFileInfo; struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; +struct cifs_fattr; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -218,6 +219,14 @@ struct smb_version_operations { /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); + /* query path data from the server */ + int (*query_path_info)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *, const char *, + FILE_ALL_INFO *, bool *); + /* get server index number */ + int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *, const char *, + u64 *uniqueid, FILE_ALL_INFO *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b9967adeaa9e..8e93de01c79d 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -138,11 +138,9 @@ extern struct inode *cifs_iget(struct super_block *sb, struct cifs_fattr *fattr); extern int cifs_get_file_info(struct file *filp); -extern int cifs_get_inode_info(struct inode **pinode, - const unsigned char *search_path, - FILE_ALL_INFO *pfile_info, - struct super_block *sb, unsigned int xid, - const __u16 *pfid); +extern int cifs_get_inode_info(struct inode **inode, const char *full_path, + FILE_ALL_INFO *data, struct super_block *sb, + int xid, const __u16 *fid); extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, @@ -376,9 +374,9 @@ extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, const int nvec, const int long_op); extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, __u64 *inode_number, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *search_name, __u64 *inode_number, + const struct nls_table *nls_codepage, + int remap); extern int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, const __u16 netfid, const __u8 lock_type, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 84a53380e124..fe30bb5dd2d8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4559,8 +4559,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - __u64 *inode_number, + const char *search_name, __u64 *inode_number, const struct nls_table *nls_codepage, int remap) { int rc = 0; @@ -4569,7 +4568,7 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, int name_len, bytes_returned; __u16 params, byte_count; - cFYI(1, "In GetSrvInodeNum for %s", searchName); + cFYI(1, "In GetSrvInodeNum for %s", search_name); if (tcon == NULL) return -ENODEV; @@ -4582,14 +4581,14 @@ GetInodeNumberRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, - searchName, PATH_MAX, nls_codepage, + search_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); + name_len = strnlen(search_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + strncpy(pSMB->FileName, search_name, name_len); } params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index af902864ac03..df071fb2567f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -600,61 +600,54 @@ cgfi_exit: return rc; } -int cifs_get_inode_info(struct inode **pinode, - const unsigned char *full_path, FILE_ALL_INFO *pfindData, - struct super_block *sb, unsigned int xid, const __u16 *pfid) +int +cifs_get_inode_info(struct inode **inode, const char *full_path, + FILE_ALL_INFO *data, struct super_block *sb, int xid, + const __u16 *fid) { int rc = 0, tmprc; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct tcon_link *tlink; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); char *buf = NULL; - bool adjustTZ = false; + bool adjust_tz = false; struct cifs_fattr fattr; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; cFYI(1, "Getting info on %s", full_path); - if ((pfindData == NULL) && (*pinode != NULL)) { - if (CIFS_I(*pinode)->clientCanCacheRead) { + if ((data == NULL) && (*inode != NULL)) { + if (CIFS_I(*inode)->clientCanCacheRead) { cFYI(1, "No need to revalidate cached inode sizes"); goto cgii_exit; } } - /* if file info not passed in then get it from server */ - if (pfindData == NULL) { + /* if inode info is not passed, get it from server */ + if (data == NULL) { + if (!server->ops->query_path_info) { + rc = -ENOSYS; + goto cgii_exit; + } buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { rc = -ENOMEM; goto cgii_exit; } - pfindData = (FILE_ALL_INFO *)buf; - - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, pTcon, full_path, pfindData, - 0 /* not legacy */, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - /* BB optimize code so we do not make the above call - when server claims no NT SMB support and the above call - failed at least once - set flag in tcon or mount */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { - rc = SMBQueryInformation(xid, pTcon, full_path, - pfindData, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - adjustTZ = true; - } + data = (FILE_ALL_INFO *)buf; + rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, + data, &adjust_tz); } if (!rc) { - cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData, - cifs_sb, adjustTZ); + cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb, + adjust_tz); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; @@ -668,28 +661,17 @@ int cifs_get_inode_info(struct inode **pinode, * Is an i_ino of zero legal? Can we use that to check if the server * supports returning inode numbers? Are there other sanity checks we * can use to ensure that the server is really filling in that field? - * - * We can not use the IndexNumber field by default from Windows or - * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA - * CIFS spec claims that this value is unique within the scope of a - * share, and the windows docs hint that it's actually unique - * per-machine. - * - * There may be higher info levels that work but are there Windows - * server or network appliances for which IndexNumber field is not - * guaranteed unique? */ - if (*pinode == NULL) { + if (*inode == NULL) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - int rc1 = 0; - - rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - full_path, &fattr.cf_uniqueid, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc1 || !fattr.cf_uniqueid) { - cFYI(1, "GetSrvInodeNum rc %d", rc1); + if (server->ops->get_srv_inum) + tmprc = server->ops->get_srv_inum(xid, tcon, + cifs_sb, full_path, &fattr.cf_uniqueid, + data); + else + tmprc = -ENOSYS; + if (tmprc || !fattr.cf_uniqueid) { + cFYI(1, "GetSrvInodeNum rc %d", tmprc); fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); } @@ -697,7 +679,7 @@ int cifs_get_inode_info(struct inode **pinode, fattr.cf_uniqueid = iunique(sb, ROOT_I); } } else { - fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid; + fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; } /* query for SFU type info if supported and needed */ @@ -711,8 +693,7 @@ int cifs_get_inode_info(struct inode **pinode, #ifdef CONFIG_CIFS_ACL /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { - rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, - pfid); + rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, full_path, fid); if (rc) { cFYI(1, "%s: Getting ACL failed with error: %d", __func__, rc); @@ -732,12 +713,12 @@ int cifs_get_inode_info(struct inode **pinode, cFYI(1, "CIFSCheckMFSymlink: %d", tmprc); } - if (!*pinode) { - *pinode = cifs_iget(sb, &fattr); - if (!*pinode) + if (!*inode) { + *inode = cifs_iget(sb, &fattr); + if (!*inode) rc = -ENOMEM; } else { - cifs_fattr_to_inode(*pinode, &fattr); + cifs_fattr_to_inode(*inode, &fattr); } cgii_exit: diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 43f3881ad3b8..fa210010358d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -441,6 +441,54 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static int +cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + FILE_ALL_INFO *data, bool *adjustTZ) +{ + int rc; + + /* could do find first instead but this returns more info */ + rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + /* + * BB optimize code so we do not make the above call when server claims + * no NT SMB support and the above call failed at least once - set flag + * in tcon or mount. + */ + if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + rc = SMBQueryInformation(xid, tcon, full_path, data, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + *adjustTZ = true; + } + return rc; +} + +static int +cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + u64 *uniqueid, FILE_ALL_INFO *data) +{ + /* + * We can not use the IndexNumber field by default from Windows or + * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA + * CIFS spec claims that this value is unique within the scope of a + * share, and the windows docs hint that it's actually unique + * per-machine. + * + * There may be higher info levels that work but are there Windows + * server or network appliances for which IndexNumber field is not + * guaranteed unique? + */ + return CIFSGetSrvInodeNumber(xid, tcon, full_path, uniqueid, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -468,6 +516,8 @@ struct smb_version_operations smb1_operations = { .get_dfs_refer = CIFSGetDFSRefer, .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, + .query_path_info = cifs_query_path_info, + .get_srv_inum = cifs_get_srv_inum, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From be4cb9e3d4ef7af1aaf66cebab1391ff91b48beb Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 29 Dec 2011 17:06:33 +0400 Subject: CIFS: Query SMB2 inode info Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/smb2glob.h | 44 +++++++++++++++++++ fs/cifs/smb2inode.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2misc.c | 7 ++- fs/cifs/smb2ops.c | 11 +++++ fs/cifs/smb2pdu.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 111 ++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2proto.h | 7 +++ 8 files changed, 417 insertions(+), 2 deletions(-) create mode 100644 fs/cifs/smb2glob.h create mode 100644 fs/cifs/smb2inode.c (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index daf6837d9e0e..feee94309271 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ - smb2misc.o smb2pdu.o + smb2misc.o smb2pdu.o smb2inode.o diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h new file mode 100644 index 000000000000..33c1d89090c0 --- /dev/null +++ b/fs/cifs/smb2glob.h @@ -0,0 +1,44 @@ +/* + * fs/cifs/smb2glob.h + * + * Definitions for various global variables and structures + * + * Copyright (C) International Business Machines Corp., 2002, 2011 + * Etersoft, 2012 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) + * Pavel Shilovsky (pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + */ +#ifndef _SMB2_GLOB_H +#define _SMB2_GLOB_H + +/* + ***************************************************************** + * Constants go here + ***************************************************************** + */ + +/* + * Identifiers for functions that use the open, operation, close pattern + * in smb2inode.c:smb2_open_op_close() + */ +#define SMB2_OP_SET_DELETE 1 +#define SMB2_OP_SET_INFO 2 +#define SMB2_OP_QUERY_INFO 3 +#define SMB2_OP_QUERY_DIR 4 +#define SMB2_OP_MKDIR 5 +#define SMB2_OP_RENAME 6 +#define SMB2_OP_DELETE 7 + +#endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c new file mode 100644 index 000000000000..1ba5c405315c --- /dev/null +++ b/fs/cifs/smb2inode.c @@ -0,0 +1,124 @@ +/* + * fs/cifs/smb2inode.c + * + * Copyright (C) International Business Machines Corp., 2002, 2011 + * Etersoft, 2012 + * Author(s): Pavel Shilovsky (pshilovsky@samba.org), + * Steve French (sfrench@us.ibm.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include "cifsfs.h" +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "cifs_unicode.h" +#include "fscache.h" +#include "smb2glob.h" +#include "smb2pdu.h" +#include "smb2proto.h" + +static int +smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + __u32 desired_access, __u32 create_disposition, + __u32 file_attributes, __u32 create_options, + void *data, int command) +{ + int rc, tmprc = 0; + u64 persistent_fid, volatile_fid; + __le16 *utf16_path; + + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, + desired_access, create_disposition, file_attributes, + create_options); + if (rc) { + kfree(utf16_path); + return rc; + } + + switch (command) { + case SMB2_OP_DELETE: + break; + case SMB2_OP_QUERY_INFO: + tmprc = SMB2_query_info(xid, tcon, persistent_fid, + volatile_fid, + (struct smb2_file_all_info *)data); + break; + case SMB2_OP_MKDIR: + /* + * Directories are created through parameters in the + * SMB2_open() call. + */ + break; + default: + cERROR(1, "Invalid command"); + break; + } + + rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); + if (tmprc) + rc = tmprc; + kfree(utf16_path); + return rc; +} + +static void +move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) +{ + memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); + dst->CurrentByteOffset = src->CurrentByteOffset; + dst->Mode = src->Mode; + dst->AlignmentRequirement = src->AlignmentRequirement; + dst->IndexNumber1 = 0; /* we don't use it */ +} + +int +smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + FILE_ALL_INFO *data, bool *adjust_tz) +{ + int rc; + struct smb2_file_all_info *smb2_data; + + *adjust_tz = false; + + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + GFP_KERNEL); + if (smb2_data == NULL) + return -ENOMEM; + + rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, + smb2_data, SMB2_OP_QUERY_INFO); + if (rc) + goto out; + + move_smb2_info_to_cifs(data, smb2_data); +out: + kfree(smb2_data); + return rc; +} diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index eb73a136641c..a4ff5d547554 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -235,8 +235,13 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) *len = le32_to_cpu( ((struct smb2_create_rsp *)hdr)->CreateContextsLength); break; - case SMB2_READ: case SMB2_QUERY_INFO: + *off = le16_to_cpu( + ((struct smb2_query_info_rsp *)hdr)->OutputBufferOffset); + *len = le32_to_cpu( + ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); + break; + case SMB2_READ: case SMB2_QUERY_DIRECTORY: case SMB2_IOCTL: case SMB2_CHANGE_NOTIFY: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1266137406fa..bcf310c8b784 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -181,6 +181,15 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static int +smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + u64 *uniqueid, FILE_ALL_INFO *data) +{ + *uniqueid = le64_to_cpu(data->IndexNumber); + return 0; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, @@ -199,6 +208,8 @@ struct smb_version_operations smb21_operations = { .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, .is_path_accessible = smb2_is_path_accessible, + .query_path_info = smb2_query_path_info, + .get_srv_inum = smb2_get_srv_inum, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ef0769c398a5..7ef5324786a6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -961,3 +961,116 @@ close_exit: free_rsp_buf(resp_buftype, rsp); return rc; } + +static int +validate_buf(unsigned int offset, unsigned int buffer_length, + struct smb2_hdr *hdr, unsigned int min_buf_size) + +{ + unsigned int smb_len = be32_to_cpu(hdr->smb2_buf_length); + char *end_of_smb = smb_len + 4 /* RFC1001 length field */ + (char *)hdr; + char *begin_of_buf = 4 /* RFC1001 len field */ + offset + (char *)hdr; + char *end_of_buf = begin_of_buf + buffer_length; + + + if (buffer_length < min_buf_size) { + cERROR(1, "buffer length %d smaller than minimum size %d", + buffer_length, min_buf_size); + return -EINVAL; + } + + /* check if beyond RFC1001 maximum length */ + if ((smb_len > 0x7FFFFF) || (buffer_length > 0x7FFFFF)) { + cERROR(1, "buffer length %d or smb length %d too large", + buffer_length, smb_len); + return -EINVAL; + } + + if ((begin_of_buf > end_of_smb) || (end_of_buf > end_of_smb)) { + cERROR(1, "illegal server response, bad offset to data"); + return -EINVAL; + } + + return 0; +} + +/* + * If SMB buffer fields are valid, copy into temporary buffer to hold result. + * Caller must free buffer. + */ +static int +validate_and_copy_buf(unsigned int offset, unsigned int buffer_length, + struct smb2_hdr *hdr, unsigned int minbufsize, + char *data) + +{ + char *begin_of_buf = 4 /* RFC1001 len field */ + offset + (char *)hdr; + int rc; + + if (!data) + return -EINVAL; + + rc = validate_buf(offset, buffer_length, hdr, minbufsize); + if (rc) + return rc; + + memcpy(data, begin_of_buf, buffer_length); + + return 0; +} + +int +SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + struct smb2_file_all_info *data) +{ + struct smb2_query_info_req *req; + struct smb2_query_info_rsp *rsp = NULL; + struct kvec iov[2]; + int rc = 0; + int resp_buftype; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + + cFYI(1, "Query Info"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); + if (rc) + return rc; + + req->InfoType = SMB2_O_INFO_FILE; + req->FileInfoClass = FILE_ALL_INFORMATION; + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + /* 4 for rfc1002 length field and 1 for Buffer */ + req->InputBufferOffset = + cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + req->OutputBufferLength = + cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2); + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); + goto qinf_exit; + } + + rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; + + rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), + &rsp->hdr, sizeof(struct smb2_file_all_info), + (char *)data); + +qinf_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 5cd358ef312e..9151e9040b02 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -448,4 +448,115 @@ struct smb2_close_rsp { __le32 Attributes; } __packed; +/* Possible InfoType values */ +#define SMB2_O_INFO_FILE 0x01 +#define SMB2_O_INFO_FILESYSTEM 0x02 +#define SMB2_O_INFO_SECURITY 0x03 +#define SMB2_O_INFO_QUOTA 0x04 + +struct smb2_query_info_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 41 */ + __u8 InfoType; + __u8 FileInfoClass; + __le32 OutputBufferLength; + __le16 InputBufferOffset; + __u16 Reserved; + __le32 InputBufferLength; + __le32 AdditionalInformation; + __le32 Flags; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __u8 Buffer[1]; +} __packed; + +struct smb2_query_info_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 OutputBufferOffset; + __le32 OutputBufferLength; + __u8 Buffer[1]; +} __packed; + +/* + * PDU infolevel structure definitions + * BB consider moving to a different header + */ + +/* partial list of QUERY INFO levels */ +#define FILE_DIRECTORY_INFORMATION 1 +#define FILE_FULL_DIRECTORY_INFORMATION 2 +#define FILE_BOTH_DIRECTORY_INFORMATION 3 +#define FILE_BASIC_INFORMATION 4 +#define FILE_STANDARD_INFORMATION 5 +#define FILE_INTERNAL_INFORMATION 6 +#define FILE_EA_INFORMATION 7 +#define FILE_ACCESS_INFORMATION 8 +#define FILE_NAME_INFORMATION 9 +#define FILE_RENAME_INFORMATION 10 +#define FILE_LINK_INFORMATION 11 +#define FILE_NAMES_INFORMATION 12 +#define FILE_DISPOSITION_INFORMATION 13 +#define FILE_POSITION_INFORMATION 14 +#define FILE_FULL_EA_INFORMATION 15 +#define FILE_MODE_INFORMATION 16 +#define FILE_ALIGNMENT_INFORMATION 17 +#define FILE_ALL_INFORMATION 18 +#define FILE_ALLOCATION_INFORMATION 19 +#define FILE_END_OF_FILE_INFORMATION 20 +#define FILE_ALTERNATE_NAME_INFORMATION 21 +#define FILE_STREAM_INFORMATION 22 +#define FILE_PIPE_INFORMATION 23 +#define FILE_PIPE_LOCAL_INFORMATION 24 +#define FILE_PIPE_REMOTE_INFORMATION 25 +#define FILE_MAILSLOT_QUERY_INFORMATION 26 +#define FILE_MAILSLOT_SET_INFORMATION 27 +#define FILE_COMPRESSION_INFORMATION 28 +#define FILE_OBJECT_ID_INFORMATION 29 +/* Number 30 not defined in documents */ +#define FILE_MOVE_CLUSTER_INFORMATION 31 +#define FILE_QUOTA_INFORMATION 32 +#define FILE_REPARSE_POINT_INFORMATION 33 +#define FILE_NETWORK_OPEN_INFORMATION 34 +#define FILE_ATTRIBUTE_TAG_INFORMATION 35 +#define FILE_TRACKING_INFORMATION 36 +#define FILEID_BOTH_DIRECTORY_INFORMATION 37 +#define FILEID_FULL_DIRECTORY_INFORMATION 38 +#define FILE_VALID_DATA_LENGTH_INFORMATION 39 +#define FILE_SHORT_NAME_INFORMATION 40 +#define FILE_SFIO_RESERVE_INFORMATION 44 +#define FILE_SFIO_VOLUME_INFORMATION 45 +#define FILE_HARD_LINK_INFORMATION 46 +#define FILE_NORMALIZED_NAME_INFORMATION 48 +#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 +#define FILE_STANDARD_LINK_INFORMATION 54 + +/* + * This level 18, although with struct with same name is different from cifs + * level 0x107. Level 0x107 has an extra u64 between AccessFlags and + * CurrentByteOffset. + */ +struct smb2_file_all_info { /* data block encoding of response to level 18 */ + __le64 CreationTime; /* Beginning of FILE_BASIC_INFO equivalent */ + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 Attributes; + __u32 Pad1; /* End of FILE_BASIC_INFO_INFO equivalent */ + __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ + __le64 EndOfFile; /* size ie offset to first free byte in file */ + __le32 NumberOfLinks; /* hard links */ + __u8 DeletePending; + __u8 Directory; + __u16 Pad2; /* End of FILE_STANDARD_INFO equivalent */ + __le64 IndexNumber; + __le32 EASize; + __le32 AccessFlags; + __le64 CurrentByteOffset; + __le32 Mode; + __le32 AlignmentRequirement; + __le32 FileNameLength; + char FileName[1]; +} __packed; /* level 18 Query */ + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 85aa8d5ea41a..1517b4c03c90 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -44,6 +44,10 @@ extern int smb2_check_receive(struct mid_q_entry *mid, extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, unsigned int nvec, struct mid_q_entry **ret_mid); +extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, FILE_ALL_INFO *data, + bool *adjust_tz); /* * SMB2 Worker functions - most of protocol specific implementation details * are contained within these calls. @@ -62,5 +66,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __u32 file_attributes, __u32 create_options); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); +extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id, + struct smb2_file_all_info *data); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 9224dfc2f92f4faff7b3d9e169255278129b47e8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 27 May 2012 20:39:52 +0400 Subject: CIFS: Move building path to root to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 ++-- fs/cifs/cifsglob.h | 13 +++++++++++++ fs/cifs/cifsproto.h | 3 --- fs/cifs/connect.c | 2 +- fs/cifs/inode.c | 32 -------------------------------- fs/cifs/smb1ops.c | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 48 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 7a7cda9f7912..db8a404a51dd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -549,8 +549,8 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) char *s, *p; char sep; - full_path = cifs_build_path_to_root(vol, cifs_sb, - cifs_sb_master_tcon(cifs_sb)); + full_path = build_path_to_root(vol, cifs_sb, + cifs_sb_master_tcon(cifs_sb)); if (full_path == NULL) return ERR_PTR(-ENOMEM); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2b1234599e72..340dce0ed07b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -164,6 +164,7 @@ struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; +struct smb_vol; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -227,6 +228,9 @@ struct smb_version_operations { int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, u64 *uniqueid, FILE_ALL_INFO *); + /* build a full path to the root of the mount */ + char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, + struct cifs_tcon *); }; struct smb_version_values { @@ -803,6 +807,15 @@ convert_delimiter(char *path, char delim) } } +static inline char * +build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon) +{ + if (!vol->ops->build_path_to_root) + return NULL; + return vol->ops->build_path_to_root(vol, cifs_sb, tcon); +} + #ifdef CONFIG_CIFS_STATS #define cifs_stats_inc atomic_inc diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8e93de01c79d..334b867a81ff 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -57,9 +57,6 @@ extern int init_cifs_idmap(void); extern void exit_cifs_idmap(void); extern void cifs_destroy_idmaptrees(void); extern char *build_path_from_dentry(struct dentry *); -extern char *cifs_build_path_to_root(struct smb_vol *vol, - struct cifs_sb_info *cifs_sb, - struct cifs_tcon *tcon); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); extern char *cifs_compose_mount_options(const char *sb_mountdata, const char *fullpath, const struct dfs_info3_param *ref, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 34588fe11c57..7b4bc1e0b08e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3684,7 +3684,7 @@ remote_path_check: goto mount_fail_check; } /* build_path_to_root works only when we have a valid tcon */ - full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon); + full_path = build_path_to_root(volume_info, cifs_sb, tcon); if (full_path == NULL) { rc = -ENOMEM; goto mount_fail_check; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index df071fb2567f..def10064fe9d 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -731,38 +731,6 @@ static const struct inode_operations cifs_ipc_inode_ops = { .lookup = cifs_lookup, }; -char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, - struct cifs_tcon *tcon) -{ - int pplen = vol->prepath ? strlen(vol->prepath) : 0; - int dfsplen; - char *full_path = NULL; - - /* if no prefix path, simply set path to the root of share to "" */ - if (pplen == 0) { - full_path = kmalloc(1, GFP_KERNEL); - if (full_path) - full_path[0] = 0; - return full_path; - } - - if (tcon->Flags & SMB_SHARE_IS_IN_DFS) - dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); - else - dfsplen = 0; - - full_path = kmalloc(dfsplen + pplen + 1, GFP_KERNEL); - if (full_path == NULL) - return full_path; - - if (dfsplen) - strncpy(full_path, tcon->treeName, dfsplen); - strncpy(full_path + dfsplen, vol->prepath, pplen); - convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); - full_path[dfsplen + pplen] = 0; /* add trailing null */ - return full_path; -} - static int cifs_find_inode(struct inode *inode, void *opaque) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index fa210010358d..7195fadf1cfa 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -489,6 +489,37 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); } +static char * +cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon) +{ + int pplen = vol->prepath ? strlen(vol->prepath) : 0; + int dfsplen; + char *full_path = NULL; + + /* if no prefix path, simply set path to the root of share to "" */ + if (pplen == 0) { + full_path = kzalloc(1, GFP_KERNEL); + return full_path; + } + + if (tcon->Flags & SMB_SHARE_IS_IN_DFS) + dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); + else + dfsplen = 0; + + full_path = kmalloc(dfsplen + pplen + 1, GFP_KERNEL); + if (full_path == NULL) + return full_path; + + if (dfsplen) + strncpy(full_path, tcon->treeName, dfsplen); + strncpy(full_path + dfsplen, vol->prepath, pplen); + convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); + full_path[dfsplen + pplen] = 0; /* add trailing null */ + return full_path; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -518,6 +549,7 @@ struct smb_version_operations smb1_operations = { .is_path_accessible = cifs_is_path_accessible, .query_path_info = cifs_query_path_info, .get_srv_inum = cifs_get_srv_inum, + .build_path_to_root = cifs_build_path_to_root, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 25e266320caca88a4463385b6f4ef696111d2c9a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 27 May 2012 20:44:23 +0400 Subject: CIFS: Add SMB2 support for build_path_to_root Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bcf310c8b784..2f9950e43911 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -190,6 +190,23 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, return 0; } +static char * +smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon) +{ + int pplen = vol->prepath ? strlen(vol->prepath) : 0; + char *full_path = NULL; + + /* if no prefix path, simply set path to the root of share to "" */ + if (pplen == 0) { + full_path = kzalloc(2, GFP_KERNEL); + return full_path; + } + + cERROR(1, "prefixpath is not supported for SMB2 now"); + return NULL; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .check_receive = smb2_check_receive, @@ -210,6 +227,7 @@ struct smb_version_operations smb21_operations = { .is_path_accessible = smb2_is_path_accessible, .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, + .build_path_to_root = smb2_build_path_to_root, }; struct smb_version_values smb21_values = { -- cgit v1.2.3 From 45740847e2362f36410e8118ac685876be473039 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 1 Jun 2012 14:26:18 +0400 Subject: CIFS: Setup async request in ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifsproto.h | 2 ++ fs/cifs/smb1ops.c | 1 + fs/cifs/transport.c | 4 ++-- 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 340dce0ed07b..5e4d1c56767d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -173,6 +173,9 @@ struct smb_version_operations { /* setup request: allocate mid, sign message */ int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int, struct mid_q_entry **); + /* setup async request: allocate mid, sign message */ + int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *, + unsigned int, struct mid_q_entry **); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 334b867a81ff..cf7fb185103c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -78,6 +78,8 @@ extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, struct mid_q_entry **); +extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *, + unsigned int, struct mid_q_entry **); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7195fadf1cfa..7bd4973591de 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -524,6 +524,7 @@ struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, .setup_request = cifs_setup_request, + .setup_async_request = cifs_setup_async_request, .check_receive = cifs_check_receive, .add_credits = cifs_add_credits, .set_credits = cifs_set_credits, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bcc02b476f6e..83867ef348df 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -345,7 +345,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) return 0; } -static int +int cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, struct mid_q_entry **ret_mid) { @@ -391,7 +391,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, return rc; mutex_lock(&server->srv_mutex); - rc = cifs_setup_async_request(server, iov, nvec, &mid); + rc = server->ops->setup_async_request(server, iov, nvec, &mid); if (rc) { mutex_unlock(&server->srv_mutex); add_credits(server, 1, optype); -- cgit v1.2.3 From c95b8eeda3efcb419ea0a3f864cf99e32c038c21 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 11 Jul 2012 14:45:28 +0400 Subject: CIFS: Add SMB2 support for async requests Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 1 + fs/cifs/smb2proto.h | 4 ++++ fs/cifs/smb2transport.c | 23 ++++++++++++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 2f9950e43911..8672e49d1c4c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -209,6 +209,7 @@ smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, + .setup_async_request = smb2_setup_async_request, .check_receive = smb2_check_receive, .add_credits = smb2_add_credits, .set_credits = smb2_set_credits, diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 1517b4c03c90..1a17955c35c9 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -43,6 +43,10 @@ extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, unsigned int nvec, struct mid_q_entry **ret_mid); +extern int smb2_setup_async_request(struct TCP_Server_Info *server, + struct kvec *iov, unsigned int nvec, + struct mid_q_entry **ret_mid); +extern void smb2_echo_request(struct work_struct *work); extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index b4b6b9a6c0fb..31f5d420b3ea 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -148,4 +148,25 @@ smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, return rc; } -/* BB add missing functions here */ +int +smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, + unsigned int nvec, struct mid_q_entry **ret_mid) +{ + int rc = 0; + struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + struct mid_q_entry *mid; + + smb2_seq_num_into_buf(server, hdr); + + mid = smb2_mid_entry_alloc(hdr, server); + if (mid == NULL) + return -ENOMEM; + + /* rc = smb2_sign_smb2(iov, nvec, server); + if (rc) { + DeleteMidQEntry(mid); + return rc; + }*/ + *ret_mid = mid; + return rc; +} -- cgit v1.2.3 From f6d7617862e106affc59c6933099e45629af5c4e Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 May 2012 14:47:16 +0400 Subject: CIFS: Move echo code to osp struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++++ fs/cifs/connect.c | 10 ++++++---- fs/cifs/smb1ops.c | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5e4d1c56767d..0c53a8339253 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -234,6 +234,10 @@ struct smb_version_operations { /* build a full path to the root of the mount */ char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, struct cifs_tcon *); + /* check if we can send an echo or nor */ + bool (*can_echo)(struct TCP_Server_Info *); + /* send echo request */ + int (*echo)(struct TCP_Server_Info *); }; struct smb_version_values { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7b4bc1e0b08e..a83ed766aa94 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -406,15 +406,17 @@ cifs_echo_request(struct work_struct *work) struct TCP_Server_Info, echo.work); /* - * We cannot send an echo until the NEGOTIATE_PROTOCOL request is - * done, which is indicated by maxBuf != 0. Also, no need to ping if - * we got a response recently + * We cannot send an echo if it is disabled or until the + * NEGOTIATE_PROTOCOL request is done, which is indicated by + * server->ops->need_neg() == true. Also, no need to ping if + * we got a response recently. */ if (!server->ops->need_neg || server->ops->need_neg(server) || + (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; - rc = CIFSSMBEcho(server); + rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS; if (rc) cFYI(1, "Unable to send echo request to server: %s", server->hostname); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7bd4973591de..c9326b4ec6cd 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -551,6 +551,7 @@ struct smb_version_operations smb1_operations = { .query_path_info = cifs_query_path_info, .get_srv_inum = cifs_get_srv_inum, .build_path_to_root = cifs_build_path_to_root, + .echo = CIFSSMBEcho, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 9094fad1ed90caebd25b1bdec3c8982d079356ee Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 12 Jul 2012 18:30:44 +0400 Subject: CIFS: Add echo request support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/connect.c | 3 --- fs/cifs/smb2ops.c | 8 ++++++++ fs/cifs/smb2pdu.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 12 ++++++++++++ fs/cifs/smb2proto.h | 1 + 6 files changed, 73 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0c53a8339253..ae9a1e900c15 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -73,6 +73,9 @@ /* (max path length + 1 for null) * 2 for unicode */ #define MAX_NAME 514 +/* SMB echo "timeout" -- FIXME: tunable? */ +#define SMB_ECHO_INTERVAL (60 * HZ) + #include "cifspdu.h" #ifndef XATTR_DOS_ATTRIB diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a83ed766aa94..5ab173fd6339 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -56,9 +56,6 @@ #define CIFS_PORT 445 #define RFC1001_PORT 139 -/* SMB echo "timeout" -- FIXME: tunable? */ -#define SMB_ECHO_INTERVAL (60 * HZ) - extern mempool_t *cifs_req_poolp; /* FIXME: should these be tunable? */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 8672e49d1c4c..483bd0ba2ecb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -207,6 +207,12 @@ smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, return NULL; } +static bool +smb2_can_echo(struct TCP_Server_Info *server) +{ + return server->echoes; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -226,6 +232,8 @@ struct smb_version_operations smb21_operations = { .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, .is_path_accessible = smb2_is_path_accessible, + .can_echo = smb2_can_echo, + .echo = SMB2_echo, .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, .build_path_to_root = smb2_build_path_to_root, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7ef5324786a6..373b6945161f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1074,3 +1074,52 @@ qinf_exit: free_rsp_buf(resp_buftype, rsp); return rc; } + +/* + * This is a no-op for now. We're not really interested in the reply, but + * rather in the fact that the server sent one and that server->lstrp + * gets updated. + * + * FIXME: maybe we should consider checking that the reply matches request? + */ +static void +smb2_echo_callback(struct mid_q_entry *mid) +{ + struct TCP_Server_Info *server = mid->callback_data; + struct smb2_echo_rsp *smb2 = (struct smb2_echo_rsp *)mid->resp_buf; + unsigned int credits_received = 1; + + if (mid->mid_state == MID_RESPONSE_RECEIVED) + credits_received = le16_to_cpu(smb2->hdr.CreditRequest); + + DeleteMidQEntry(mid); + add_credits(server, credits_received, CIFS_ECHO_OP); +} + +int +SMB2_echo(struct TCP_Server_Info *server) +{ + struct smb2_echo_req *req; + int rc = 0; + struct kvec iov; + + cFYI(1, "In echo request"); + + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); + if (rc) + return rc; + + req->hdr.CreditRequest = cpu_to_le16(1); + + iov.iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov.iov_len = get_rfc1002_length(req) + 4; + + rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server, + CIFS_ECHO_OP); + if (rc) + cFYI(1, "Echo request failed: %d", rc); + + cifs_small_buf_release(req); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 9151e9040b02..59aae608d366 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -448,6 +448,18 @@ struct smb2_close_rsp { __le32 Attributes; } __packed; +struct smb2_echo_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __u16 Reserved; +} __packed; + +struct smb2_echo_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __u16 Reserved; +} __packed; + /* Possible InfoType values */ #define SMB2_O_INFO_FILE 0x01 #define SMB2_O_INFO_FILESYSTEM 0x02 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 1a17955c35c9..902bbe2b5ad3 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -73,5 +73,6 @@ extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); +extern int SMB2_echo(struct TCP_Server_Info *server); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 44c581866e2ae4bbc3c8eea5a3e3c7a0f639e12d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 28 May 2012 14:16:31 +0400 Subject: CIFS: Move clear/print_stats code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 60 +++++----------------------------------------- fs/cifs/cifsglob.h | 48 +++++++++++++++++++++---------------- fs/cifs/cifssmb.c | 54 ++++++++++++++++++++--------------------- fs/cifs/misc.c | 2 +- fs/cifs/smb1ops.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+), 103 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 8aa8693bb65c..d9ea6ede6a7a 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -282,24 +282,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, struct cifs_tcon, tcon_list); atomic_set(&tcon->num_smbs_sent, 0); - atomic_set(&tcon->num_writes, 0); - atomic_set(&tcon->num_reads, 0); - atomic_set(&tcon->num_oplock_brks, 0); - atomic_set(&tcon->num_opens, 0); - atomic_set(&tcon->num_posixopens, 0); - atomic_set(&tcon->num_posixmkdirs, 0); - atomic_set(&tcon->num_closes, 0); - atomic_set(&tcon->num_deletes, 0); - atomic_set(&tcon->num_mkdirs, 0); - atomic_set(&tcon->num_rmdirs, 0); - atomic_set(&tcon->num_renames, 0); - atomic_set(&tcon->num_t2renames, 0); - atomic_set(&tcon->num_ffirst, 0); - atomic_set(&tcon->num_fnext, 0); - atomic_set(&tcon->num_fclose, 0); - atomic_set(&tcon->num_hardlinks, 0); - atomic_set(&tcon->num_symlinks, 0); - atomic_set(&tcon->num_locks, 0); + if (server->ops->clear_stats) + server->ops->clear_stats(tcon); } } } @@ -358,42 +342,10 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n%d) %s", i, tcon->treeName); if (tcon->need_reconnect) seq_puts(m, "\tDISCONNECTED "); - seq_printf(m, "\nSMBs: %d Oplock Breaks: %d", - atomic_read(&tcon->num_smbs_sent), - atomic_read(&tcon->num_oplock_brks)); - seq_printf(m, "\nReads: %d Bytes: %lld", - atomic_read(&tcon->num_reads), - (long long)(tcon->bytes_read)); - seq_printf(m, "\nWrites: %d Bytes: %lld", - atomic_read(&tcon->num_writes), - (long long)(tcon->bytes_written)); - seq_printf(m, "\nFlushes: %d", - atomic_read(&tcon->num_flushes)); - seq_printf(m, "\nLocks: %d HardLinks: %d " - "Symlinks: %d", - atomic_read(&tcon->num_locks), - atomic_read(&tcon->num_hardlinks), - atomic_read(&tcon->num_symlinks)); - seq_printf(m, "\nOpens: %d Closes: %d " - "Deletes: %d", - atomic_read(&tcon->num_opens), - atomic_read(&tcon->num_closes), - atomic_read(&tcon->num_deletes)); - seq_printf(m, "\nPosix Opens: %d " - "Posix Mkdirs: %d", - atomic_read(&tcon->num_posixopens), - atomic_read(&tcon->num_posixmkdirs)); - seq_printf(m, "\nMkdirs: %d Rmdirs: %d", - atomic_read(&tcon->num_mkdirs), - atomic_read(&tcon->num_rmdirs)); - seq_printf(m, "\nRenames: %d T2 Renames %d", - atomic_read(&tcon->num_renames), - atomic_read(&tcon->num_t2renames)); - seq_printf(m, "\nFindFirst: %d FNext %d " - "FClose %d", - atomic_read(&tcon->num_ffirst), - atomic_read(&tcon->num_fnext), - atomic_read(&tcon->num_fclose)); + seq_printf(m, "\nSMBs: %d", + atomic_read(&tcon->num_smbs_sent)); + if (server->ops->print_stats) + server->ops->print_stats(m, tcon); } } } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ae9a1e900c15..0896328418aa 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -197,6 +197,8 @@ struct smb_version_operations { /* find mid corresponding to the response message */ struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *); void (*dump_detail)(void *); + void (*clear_stats)(struct cifs_tcon *); + void (*print_stats)(struct seq_file *m, struct cifs_tcon *); /* verify the message */ int (*check_message)(char *, unsigned int); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); @@ -566,27 +568,31 @@ struct cifs_tcon { enum statusEnum tidStatus; #ifdef CONFIG_CIFS_STATS atomic_t num_smbs_sent; - atomic_t num_writes; - atomic_t num_reads; - atomic_t num_flushes; - atomic_t num_oplock_brks; - atomic_t num_opens; - atomic_t num_closes; - atomic_t num_deletes; - atomic_t num_mkdirs; - atomic_t num_posixopens; - atomic_t num_posixmkdirs; - atomic_t num_rmdirs; - atomic_t num_renames; - atomic_t num_t2renames; - atomic_t num_ffirst; - atomic_t num_fnext; - atomic_t num_fclose; - atomic_t num_hardlinks; - atomic_t num_symlinks; - atomic_t num_locks; - atomic_t num_acl_get; - atomic_t num_acl_set; + union { + struct { + atomic_t num_writes; + atomic_t num_reads; + atomic_t num_flushes; + atomic_t num_oplock_brks; + atomic_t num_opens; + atomic_t num_closes; + atomic_t num_deletes; + atomic_t num_mkdirs; + atomic_t num_posixopens; + atomic_t num_posixmkdirs; + atomic_t num_rmdirs; + atomic_t num_renames; + atomic_t num_t2renames; + atomic_t num_ffirst; + atomic_t num_fnext; + atomic_t num_fclose; + atomic_t num_hardlinks; + atomic_t num_symlinks; + atomic_t num_locks; + atomic_t num_acl_get; + atomic_t num_acl_set; + } cifs_stats; + } stats; #ifdef CONFIG_CIFS_STATS2 unsigned long long time_writes; unsigned long long time_reads; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index fe30bb5dd2d8..cabc7a01f5df 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -893,7 +893,7 @@ PsxDelete: cFYI(1, "Posix delete returned %d", rc); cifs_buf_release(pSMB); - cifs_stats_inc(&tcon->num_deletes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes); if (rc == -EAGAIN) goto PsxDelete; @@ -936,7 +936,7 @@ DelFileRetry: pSMB->ByteCount = cpu_to_le16(name_len + 1); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_deletes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes); if (rc) cFYI(1, "Error in RMFile = %d", rc); @@ -981,7 +981,7 @@ RmDirRetry: pSMB->ByteCount = cpu_to_le16(name_len + 1); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_rmdirs); + cifs_stats_inc(&tcon->stats.cifs_stats.num_rmdirs); if (rc) cFYI(1, "Error in RMDir = %d", rc); @@ -1024,7 +1024,7 @@ MkDirRetry: pSMB->ByteCount = cpu_to_le16(name_len + 1); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_mkdirs); + cifs_stats_inc(&tcon->stats.cifs_stats.num_mkdirs); if (rc) cFYI(1, "Error in Mkdir = %d", rc); @@ -1147,9 +1147,9 @@ psx_create_err: cifs_buf_release(pSMB); if (posix_flags & SMB_O_DIRECTORY) - cifs_stats_inc(&tcon->num_posixmkdirs); + cifs_stats_inc(&tcon->stats.cifs_stats.num_posixmkdirs); else - cifs_stats_inc(&tcon->num_posixopens); + cifs_stats_inc(&tcon->stats.cifs_stats.num_posixopens); if (rc == -EAGAIN) goto PsxCreat; @@ -1270,7 +1270,7 @@ OldOpenRetry: /* long_op set to 1 to allow for oplock break timeouts */ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *)pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_opens); + cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { cFYI(1, "Error in Open = %d", rc); } else { @@ -1383,7 +1383,7 @@ openRetry: /* long_op set to 1 to allow for oplock break timeouts */ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *)pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_opens); + cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { cFYI(1, "Error in Open = %d", rc); } else { @@ -1650,7 +1650,7 @@ cifs_async_readv(struct cifs_readdata *rdata) rdata, 0); if (rc == 0) - cifs_stats_inc(&tcon->num_reads); + cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); else kref_put(&rdata->refcount, cifs_readdata_release); @@ -1720,7 +1720,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4; rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, &resp_buf_type, CIFS_LOG_ERROR); - cifs_stats_inc(&tcon->num_reads); + cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); pSMBr = (READ_RSP *)iov[0].iov_base; if (rc) { cERROR(1, "Send error in read = %d", rc); @@ -1872,7 +1872,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, long_op); - cifs_stats_inc(&tcon->num_writes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { cFYI(1, "Send error in write = %d", rc); } else { @@ -2123,7 +2123,7 @@ cifs_async_writev(struct cifs_writedata *wdata) NULL, cifs_writev_callback, wdata, 0); if (rc == 0) - cifs_stats_inc(&tcon->num_writes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); else kref_put(&wdata->refcount, cifs_writedata_release); @@ -2213,7 +2213,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, long_op); - cifs_stats_inc(&tcon->num_writes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { cFYI(1, "Send error Write2 = %d", rc); } else if (resp_buf_type == 0) { @@ -2279,7 +2279,7 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, iov[1].iov_base = (char *)buf; iov[1].iov_len = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); - cifs_stats_inc(&tcon->num_locks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); if (rc) cFYI(1, "Send error in cifs_lockv = %d", rc); @@ -2348,7 +2348,7 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, flags); /* SMB buffer freed by function above */ } - cifs_stats_inc(&tcon->num_locks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); if (rc) cFYI(1, "Send error in Lock = %d", rc); @@ -2511,7 +2511,7 @@ CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) pSMB->LastWriteTime = 0xFFFFFFFF; pSMB->ByteCount = 0; rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); - cifs_stats_inc(&tcon->num_closes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_closes); if (rc) { if (rc != -EINTR) { /* EINTR is expected when user ctl-c to kill app */ @@ -2540,7 +2540,7 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) pSMB->FileID = (__u16) smb_file_id; pSMB->ByteCount = 0; rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); - cifs_stats_inc(&tcon->num_flushes); + cifs_stats_inc(&tcon->stats.cifs_stats.num_flushes); if (rc) cERROR(1, "Send error in Flush = %d", rc); @@ -2603,7 +2603,7 @@ renameRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_renames); + cifs_stats_inc(&tcon->stats.cifs_stats.num_renames); if (rc) cFYI(1, "Send error in rename = %d", rc); @@ -2684,7 +2684,7 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&pTcon->num_t2renames); + cifs_stats_inc(&pTcon->stats.cifs_stats.num_t2renames); if (rc) cFYI(1, "Send error in Rename (by file handle) = %d", rc); @@ -2841,7 +2841,7 @@ createSymLinkRetry: pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_symlinks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_symlinks); if (rc) cFYI(1, "Send error in SetPathInfo create symlink = %d", rc); @@ -2927,7 +2927,7 @@ createHardLinkRetry: pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_hardlinks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) cFYI(1, "Send error in SetPathInfo (hard link) = %d", rc); @@ -2999,7 +2999,7 @@ winCreateHardLinkRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_hardlinks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) cFYI(1, "Send error in hard link (NT rename) = %d", rc); @@ -3417,7 +3417,7 @@ queryAclRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_acl_get); + cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get); if (rc) { cFYI(1, "Send error in Query POSIX ACL = %d", rc); } else { @@ -3728,7 +3728,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0); - cifs_stats_inc(&tcon->num_acl_get); + cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get); if (rc) { cFYI(1, "Send error in QuerySecDesc = %d", rc); } else { /* decode response */ @@ -4330,7 +4330,7 @@ findFirstRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_ffirst); + cifs_stats_inc(&tcon->stats.cifs_stats.num_ffirst); if (rc) {/* BB add logic to retry regular search if Unix search rejected unexpectedly by server */ @@ -4457,7 +4457,7 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cifs_stats_inc(&tcon->num_fnext); + cifs_stats_inc(&tcon->stats.cifs_stats.num_fnext); if (rc) { if (rc == -EBADF) { psrch_inf->endOfSearch = true; @@ -4548,7 +4548,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, if (rc) cERROR(1, "Send error in FindClose = %d", rc); - cifs_stats_inc(&tcon->num_fclose); + cifs_stats_inc(&tcon->stats.cifs_stats.num_fclose); /* Since session is dead, search handle closed on server already */ if (rc == -EAGAIN) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index ad2538a64c70..ce41fee07e5b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -461,7 +461,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (tcon->tid != buf->Tid) continue; - cifs_stats_inc(&tcon->num_oplock_brks); + cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); spin_lock(&cifs_file_list_lock); list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c9326b4ec6cd..581740998735 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -520,6 +520,72 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, return full_path; } +static void +cifs_clear_stats(struct cifs_tcon *tcon) +{ +#ifdef CONFIG_CIFS_STATS + atomic_set(&tcon->stats.cifs_stats.num_writes, 0); + atomic_set(&tcon->stats.cifs_stats.num_reads, 0); + atomic_set(&tcon->stats.cifs_stats.num_flushes, 0); + atomic_set(&tcon->stats.cifs_stats.num_oplock_brks, 0); + atomic_set(&tcon->stats.cifs_stats.num_opens, 0); + atomic_set(&tcon->stats.cifs_stats.num_posixopens, 0); + atomic_set(&tcon->stats.cifs_stats.num_posixmkdirs, 0); + atomic_set(&tcon->stats.cifs_stats.num_closes, 0); + atomic_set(&tcon->stats.cifs_stats.num_deletes, 0); + atomic_set(&tcon->stats.cifs_stats.num_mkdirs, 0); + atomic_set(&tcon->stats.cifs_stats.num_rmdirs, 0); + atomic_set(&tcon->stats.cifs_stats.num_renames, 0); + atomic_set(&tcon->stats.cifs_stats.num_t2renames, 0); + atomic_set(&tcon->stats.cifs_stats.num_ffirst, 0); + atomic_set(&tcon->stats.cifs_stats.num_fnext, 0); + atomic_set(&tcon->stats.cifs_stats.num_fclose, 0); + atomic_set(&tcon->stats.cifs_stats.num_hardlinks, 0); + atomic_set(&tcon->stats.cifs_stats.num_symlinks, 0); + atomic_set(&tcon->stats.cifs_stats.num_locks, 0); + atomic_set(&tcon->stats.cifs_stats.num_acl_get, 0); + atomic_set(&tcon->stats.cifs_stats.num_acl_set, 0); +#endif +} + +static void +cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon) +{ +#ifdef CONFIG_CIFS_STATS + seq_printf(m, " Oplocks breaks: %d", + atomic_read(&tcon->stats.cifs_stats.num_oplock_brks)); + seq_printf(m, "\nReads: %d Bytes: %llu", + atomic_read(&tcon->stats.cifs_stats.num_reads), + (long long)(tcon->bytes_read)); + seq_printf(m, "\nWrites: %d Bytes: %llu", + atomic_read(&tcon->stats.cifs_stats.num_writes), + (long long)(tcon->bytes_written)); + seq_printf(m, "\nFlushes: %d", + atomic_read(&tcon->stats.cifs_stats.num_flushes)); + seq_printf(m, "\nLocks: %d HardLinks: %d Symlinks: %d", + atomic_read(&tcon->stats.cifs_stats.num_locks), + atomic_read(&tcon->stats.cifs_stats.num_hardlinks), + atomic_read(&tcon->stats.cifs_stats.num_symlinks)); + seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d", + atomic_read(&tcon->stats.cifs_stats.num_opens), + atomic_read(&tcon->stats.cifs_stats.num_closes), + atomic_read(&tcon->stats.cifs_stats.num_deletes)); + seq_printf(m, "\nPosix Opens: %d Posix Mkdirs: %d", + atomic_read(&tcon->stats.cifs_stats.num_posixopens), + atomic_read(&tcon->stats.cifs_stats.num_posixmkdirs)); + seq_printf(m, "\nMkdirs: %d Rmdirs: %d", + atomic_read(&tcon->stats.cifs_stats.num_mkdirs), + atomic_read(&tcon->stats.cifs_stats.num_rmdirs)); + seq_printf(m, "\nRenames: %d T2 Renames %d", + atomic_read(&tcon->stats.cifs_stats.num_renames), + atomic_read(&tcon->stats.cifs_stats.num_t2renames)); + seq_printf(m, "\nFindFirst: %d FNext %d FClose %d", + atomic_read(&tcon->stats.cifs_stats.num_ffirst), + atomic_read(&tcon->stats.cifs_stats.num_fnext), + atomic_read(&tcon->stats.cifs_stats.num_fclose)); +#endif +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -537,6 +603,8 @@ struct smb_version_operations smb1_operations = { .find_mid = cifs_find_mid, .check_message = checkSMB, .dump_detail = cifs_dump_detail, + .clear_stats = cifs_clear_stats, + .print_stats = cifs_print_stats, .is_oplock_break = is_valid_oplock_break, .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, -- cgit v1.2.3 From d60622eb5a23904facf4a4efac60f5bfa810d7d4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 28 May 2012 15:19:39 +0400 Subject: CIFS: Allow SMB2 statistics to be tracked Since there are only 19 command codes, it also is easier to track by exact command code than it was for cifs. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 9 ++++++ fs/cifs/smb2ops.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.c | 4 +-- 3 files changed, 91 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0896328418aa..12b1176b87b0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -28,6 +28,9 @@ #include "cifsacl.h" #include #include +#ifdef CONFIG_CIFS_SMB2 +#include "smb2pdu.h" +#endif /* * The sizes of various internal tables and strings @@ -592,6 +595,12 @@ struct cifs_tcon { atomic_t num_acl_get; atomic_t num_acl_set; } cifs_stats; +#ifdef CONFIG_CIFS_SMB2 + struct { + atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS]; + atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS]; + } smb2_stats; +#endif /* CONFIG_CIFS_SMB2 */ } stats; #ifdef CONFIG_CIFS_STATS2 unsigned long long time_writes; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 483bd0ba2ecb..1018c5c6b5be 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -213,6 +213,85 @@ smb2_can_echo(struct TCP_Server_Info *server) return server->echoes; } +static void +smb2_clear_stats(struct cifs_tcon *tcon) +{ +#ifdef CONFIG_CIFS_STATS + int i; + for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) { + atomic_set(&tcon->stats.smb2_stats.smb2_com_sent[i], 0); + atomic_set(&tcon->stats.smb2_stats.smb2_com_failed[i], 0); + } +#endif +} + +static void +smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) +{ +#ifdef CONFIG_CIFS_STATS + atomic_t *sent = tcon->stats.smb2_stats.smb2_com_sent; + atomic_t *failed = tcon->stats.smb2_stats.smb2_com_failed; + seq_printf(m, "\nNegotiates: %d sent %d failed", + atomic_read(&sent[SMB2_NEGOTIATE_HE]), + atomic_read(&failed[SMB2_NEGOTIATE_HE])); + seq_printf(m, "\nSessionSetups: %d sent %d failed", + atomic_read(&sent[SMB2_SESSION_SETUP_HE]), + atomic_read(&failed[SMB2_SESSION_SETUP_HE])); +#define SMB2LOGOFF 0x0002 /* trivial request/resp */ + seq_printf(m, "\nLogoffs: %d sent %d failed", + atomic_read(&sent[SMB2_LOGOFF_HE]), + atomic_read(&failed[SMB2_LOGOFF_HE])); + seq_printf(m, "\nTreeConnects: %d sent %d failed", + atomic_read(&sent[SMB2_TREE_CONNECT_HE]), + atomic_read(&failed[SMB2_TREE_CONNECT_HE])); + seq_printf(m, "\nTreeDisconnects: %d sent %d failed", + atomic_read(&sent[SMB2_TREE_DISCONNECT_HE]), + atomic_read(&failed[SMB2_TREE_DISCONNECT_HE])); + seq_printf(m, "\nCreates: %d sent %d failed", + atomic_read(&sent[SMB2_CREATE_HE]), + atomic_read(&failed[SMB2_CREATE_HE])); + seq_printf(m, "\nCloses: %d sent %d failed", + atomic_read(&sent[SMB2_CLOSE_HE]), + atomic_read(&failed[SMB2_CLOSE_HE])); + seq_printf(m, "\nFlushes: %d sent %d failed", + atomic_read(&sent[SMB2_FLUSH_HE]), + atomic_read(&failed[SMB2_FLUSH_HE])); + seq_printf(m, "\nReads: %d sent %d failed", + atomic_read(&sent[SMB2_READ_HE]), + atomic_read(&failed[SMB2_READ_HE])); + seq_printf(m, "\nWrites: %d sent %d failed", + atomic_read(&sent[SMB2_WRITE_HE]), + atomic_read(&failed[SMB2_WRITE_HE])); + seq_printf(m, "\nLocks: %d sent %d failed", + atomic_read(&sent[SMB2_LOCK_HE]), + atomic_read(&failed[SMB2_LOCK_HE])); + seq_printf(m, "\nIOCTLs: %d sent %d failed", + atomic_read(&sent[SMB2_IOCTL_HE]), + atomic_read(&failed[SMB2_IOCTL_HE])); + seq_printf(m, "\nCancels: %d sent %d failed", + atomic_read(&sent[SMB2_CANCEL_HE]), + atomic_read(&failed[SMB2_CANCEL_HE])); + seq_printf(m, "\nEchos: %d sent %d failed", + atomic_read(&sent[SMB2_ECHO_HE]), + atomic_read(&failed[SMB2_ECHO_HE])); + seq_printf(m, "\nQueryDirectories: %d sent %d failed", + atomic_read(&sent[SMB2_QUERY_DIRECTORY_HE]), + atomic_read(&failed[SMB2_QUERY_DIRECTORY_HE])); + seq_printf(m, "\nChangeNotifies: %d sent %d failed", + atomic_read(&sent[SMB2_CHANGE_NOTIFY_HE]), + atomic_read(&failed[SMB2_CHANGE_NOTIFY_HE])); + seq_printf(m, "\nQueryInfos: %d sent %d failed", + atomic_read(&sent[SMB2_QUERY_INFO_HE]), + atomic_read(&failed[SMB2_QUERY_INFO_HE])); + seq_printf(m, "\nSetInfos: %d sent %d failed", + atomic_read(&sent[SMB2_SET_INFO_HE]), + atomic_read(&failed[SMB2_SET_INFO_HE])); + seq_printf(m, "\nOplockBreaks: %d sent %d failed", + atomic_read(&sent[SMB2_OPLOCK_BREAK_HE]), + atomic_read(&failed[SMB2_OPLOCK_BREAK_HE])); +#endif +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -225,6 +304,8 @@ struct smb_version_operations smb21_operations = { .find_mid = smb2_find_mid, .check_message = smb2_check_message, .dump_detail = smb2_dump_detail, + .clear_stats = smb2_clear_stats, + .print_stats = smb2_print_stats, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .sess_setup = SMB2_sess_setup, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 373b6945161f..e4eb1d3fb7d9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -282,10 +282,8 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, if (tcon != NULL) { #ifdef CONFIG_CIFS_STATS2 - /* uint16_t com_code = le16_to_cpu(smb2_command); cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]); - */ #endif cifs_stats_inc(&tcon->num_smbs_sent); } @@ -677,7 +675,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code) { - /* cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[code]); */ + cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_failed[code]); } #define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */) -- cgit v1.2.3 From 29e20f9c65fae245d6fd4fce31cc5d01cde3d93f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 13 Jul 2012 13:58:14 +0400 Subject: CIFS: Make CAP_* checks protocol independent Since both CIFS and SMB2 use ses->capabilities (server->capabilities) field but flags are different we should make such checks protocol independent. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 14 ++++++++++++-- fs/cifs/connect.c | 6 +++--- fs/cifs/dir.c | 3 +-- fs/cifs/file.c | 33 ++++++++++++++++----------------- fs/cifs/inode.c | 26 ++++++++++++-------------- fs/cifs/link.c | 6 +++--- fs/cifs/readdir.c | 16 ++++++++-------- fs/cifs/smb1ops.c | 3 +++ fs/cifs/smb2ops.c | 3 +++ fs/cifs/smb2pdu.c | 2 ++ fs/cifs/smb2pdu.h | 3 +++ 11 files changed, 66 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 12b1176b87b0..bcdf4d4420f1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -258,6 +258,9 @@ struct smb_version_values { size_t max_header_size; size_t read_rsp_size; __le16 lock_cmd; + unsigned int cap_unix; + unsigned int cap_nt_find; + unsigned int cap_large_files; }; #define HEADER_SIZE(server) (server->vals->header_size) @@ -408,7 +411,7 @@ struct TCP_Server_Info { unsigned int max_vcs; /* maximum number of smb sessions, at least those that can be specified uniquely with vcnumbers */ - int capabilities; /* allow selective disabling of caps by smb sess */ + unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ @@ -532,7 +535,7 @@ struct cifs_ses { __u64 Suid; /* remote smb uid */ uid_t linux_uid; /* overriding owner of files on the mount */ uid_t cred_uid; /* owner of credentials */ - int capabilities; + unsigned int capabilities; char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for TCP names - will ipv6 and sctp addresses fit? */ char *user_name; /* must not be null except during init of sess @@ -554,6 +557,13 @@ struct cifs_ses { which do not negotiate NTLM or POSIX dialects, but instead negotiate one of the older LANMAN dialects */ #define CIFS_SES_LANMAN 8 + +static inline bool +cap_unix(struct cifs_ses *ses) +{ + return ses->server->vals->cap_unix & ses->capabilities; +} + /* * there is one of these for each connection to a resource on a particular * session diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5ab173fd6339..6df6fa14cba8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3634,7 +3634,7 @@ try_mount_again: } /* tell server which Unix caps we support */ - if (tcon->ses->capabilities & CAP_UNIX) { + if (cap_unix(tcon->ses)) { /* reset of caps checks mount to see if unix extensions disabled for just this mount */ reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info); @@ -3993,7 +3993,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, ses->flags = 0; ses->capabilities = server->capabilities; if (linuxExtEnabled == 0) - ses->capabilities &= (~CAP_UNIX); + ses->capabilities &= (~server->vals->cap_unix); cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", server->sec_mode, server->capabilities, server->timeAdj); @@ -4100,7 +4100,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) goto out; } - if (ses->capabilities & CAP_UNIX) + if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2caba0b54acb..cbe709ad6663 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -182,8 +182,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, goto out; } - if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && - !tcon->broken_posix_open && + if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_open(full_path, &newinode, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 93b3b1358409..07e9d41cade7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -385,9 +385,8 @@ int cifs_open(struct inode *inode, struct file *file) oplock = 0; if (!tcon->broken_posix_open && tcon->unix_ext && - (tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_POSIX_PATH_OPS_CAP & - le64_to_cpu(tcon->fsUnixInfo.Capability))) { + cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { /* can not refresh inode info since size could be stale */ rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, @@ -509,10 +508,9 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) else oplock = 0; - if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && + if (tcon->unix_ext && cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & - le64_to_cpu(tcon->fsUnixInfo.Capability))) { - + le64_to_cpu(tcon->fsUnixInfo.Capability))) { /* * O_CREAT, O_EXCL and O_TRUNC already had their effect on the * original open. Must mask them off for a reopen. @@ -1071,7 +1069,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - if ((tcon->ses->capabilities & CAP_UNIX) && + if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) return cifs_push_posix_locks(cfile); @@ -1419,7 +1417,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) netfid = cfile->netfid; cinode = CIFS_I(file->f_path.dentry->d_inode); - if ((tcon->ses->capabilities & CAP_UNIX) && + if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) posix_lck = true; @@ -2745,7 +2743,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, unsigned int current_read_size; unsigned int rsize; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; unsigned int xid; char *current_offset; struct cifsFileInfo *open_file; @@ -2765,7 +2763,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, return rc; } open_file = file->private_data; - pTcon = tlink_tcon(open_file->tlink); + tcon = tlink_tcon(open_file->tlink); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -2779,11 +2777,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, read_size > total_read; total_read += bytes_read, current_offset += bytes_read) { current_read_size = min_t(uint, read_size - total_read, rsize); - - /* For windows me and 9x we do not want to request more - than it negotiated since it will refuse the read then */ - if ((pTcon->ses) && - !(pTcon->ses->capabilities & CAP_LARGE_FILES)) { + /* + * For windows me and 9x we do not want to request more than it + * negotiated since it will refuse the read then. + */ + if ((tcon->ses) && !(tcon->ses->capabilities & + tcon->ses->server->vals->cap_large_files)) { current_read_size = min_t(uint, current_read_size, CIFSMaxBufSize); } @@ -2796,7 +2795,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, } io_parms.netfid = open_file->netfid; io_parms.pid = pid; - io_parms.tcon = pTcon; + io_parms.tcon = tcon; io_parms.offset = *poffset; io_parms.length = current_read_size; rc = CIFSSMBRead(xid, &io_parms, &bytes_read, @@ -2810,7 +2809,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, return rc; } } else { - cifs_stats_bytes_read(pTcon, total_read); + cifs_stats_bytes_read(tcon, total_read); *poffset += bytes_read; } } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index def10064fe9d..35cb6a374a45 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1149,9 +1149,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - if ((tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_POSIX_PATH_OPS_CAP & - le64_to_cpu(tcon->fsUnixInfo.Capability))) { + if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1226,7 +1225,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; char *full_path = NULL; struct inode *newinode = NULL; struct cifs_fattr fattr; @@ -1237,7 +1236,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); xid = get_xid(); @@ -1247,9 +1246,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) goto mkdir_out; } - if ((pTcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_POSIX_PATH_OPS_CAP & - le64_to_cpu(pTcon->fsUnixInfo.Capability))) { + if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { u32 oplock = 0; FILE_UNIX_BASIC_INFO *pInfo = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); @@ -1259,7 +1257,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) } mode &= ~current_umask(); - rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, + rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, NULL /* netfid */, pInfo, &oplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -1303,14 +1301,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) } mkdir_retry_old: /* BB add setting the equivalent of mode via CreateX w/ACLs */ - rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, + rc = CIFSSMBMkDir(xid, tcon, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { cFYI(1, "cifs_mkdir returned 0x%x", rc); d_drop(direntry); } else { mkdir_get_info: - if (pTcon->unix_ext) + if (tcon->unix_ext) rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); else @@ -1328,7 +1326,7 @@ mkdir_get_info: if (inode->i_mode & S_ISGID) mode |= S_ISGID; - if (pTcon->unix_ext) { + if (tcon->unix_ext) { struct cifs_unix_set_info_args args = { .mode = mode, .ctime = NO_CHANGE_64, @@ -1346,7 +1344,7 @@ mkdir_get_info: args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1361,7 +1359,7 @@ mkdir_get_info: cifsInode = CIFS_I(newinode); dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; pInfo.Attributes = cpu_to_le32(dosattrs); - tmprc = CIFSSMBSetPathInfo(xid, pTcon, + tmprc = CIFSSMBSetPathInfo(xid, tcon, full_path, &pInfo, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & diff --git a/fs/cifs/link.c b/fs/cifs/link.c index f78971511f57..09e4b3ae4564 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -495,8 +495,8 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) * but there doesn't seem to be any harm in allowing the client to * read them. */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) - && !(tcon->ses->capabilities & CAP_UNIX)) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && + !cap_unix(tcon->ses)) { rc = -EACCES; goto out; } @@ -518,7 +518,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if ((rc != 0) && (tcon->ses->capabilities & CAP_UNIX)) + if ((rc != 0) && cap_unix(tcon->ses)) rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, cifs_sb->local_nls); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index da30d96a7495..d87f82678bc7 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -228,7 +228,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) struct cifsFileInfo *cifsFile; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); struct tcon_link *tlink = NULL; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; if (file->private_data == NULL) { tlink = cifs_sb_tlink(cifs_sb); @@ -242,10 +242,10 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) } file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); } else { cifsFile = file->private_data; - pTcon = tlink_tcon(cifsFile->tlink); + tcon = tlink_tcon(cifsFile->tlink); } cifsFile->invalidHandle = true; @@ -262,11 +262,11 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) ffirst_retry: /* test for Unix extensions */ /* but now check for them on the share/mount not on the SMB session */ -/* if (pTcon->ses->capabilities & CAP_UNIX) { */ - if (pTcon->unix_ext) + /* if (cap_unix(tcon->ses) { */ + if (tcon->unix_ext) cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; - else if ((pTcon->ses->capabilities & - (CAP_NT_SMBS | CAP_NT_FIND)) == 0) { + else if ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD; } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; @@ -278,7 +278,7 @@ ffirst_retry: if (backup_cred(cifs_sb)) search_flags |= CIFS_SEARCH_BACKUP_SEARCH; - rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls, + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, &cifsFile->netfid, search_flags, &cifsFile->srch_inf, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 581740998735..c40356d24c5c 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -632,4 +632,7 @@ struct smb_version_values smb1_values = { .max_header_size = MAX_CIFS_HDR_SIZE, .read_rsp_size = sizeof(READ_RSP), .lock_cmd = cpu_to_le16(SMB_COM_LOCKING_ANDX), + .cap_unix = CAP_UNIX, + .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, + .cap_large_files = CAP_LARGE_FILES, }; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1018c5c6b5be..410cf925ea26 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -325,4 +325,7 @@ struct smb_version_values smb21_values = { .header_size = sizeof(struct smb2_hdr), .max_header_size = MAX_SMB2_HDR_SIZE, .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, }; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e4eb1d3fb7d9..62b3f17d0613 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -428,6 +428,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* BB Do we need to validate the SecurityMode? */ server->sec_mode = le16_to_cpu(rsp->SecurityMode); server->capabilities = le32_to_cpu(rsp->Capabilities); + /* Internal types */ + server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES; security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, &rsp->hdr); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 59aae608d366..f37a1b41b402 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -167,6 +167,9 @@ struct smb2_negotiate_req { #define SMB2_GLOBAL_CAP_DFS 0x00000001 #define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ #define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ +/* Internal types */ +#define SMB2_NT_FIND 0x00100000 +#define SMB2_LARGE_FILES 0x00200000 struct smb2_negotiate_rsp { struct smb2_hdr hdr; -- cgit v1.2.3 From 5559b50acdcdcad7e362882d3261bf934c9436f6 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Tue, 24 Jul 2012 21:18:20 +0530 Subject: nfsd4: fix cr_principal comparison check in same_creds This fixes a wrong check for same cr_principal in same_creds Introduced by 8fbba96e5b327665265ad02b7f331b68536828bf "nfsd4: stricter cred comparison for setclientid/exchange_id". Cc: stable@vger.kernel.org Signed-off-by: Vivek Trivedi Signed-off-by: Namjae Jeon Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e404fca08260..fe96015fbfcb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1221,7 +1221,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2) return true; } -static int +static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { if ((cr1->cr_flavor != cr2->cr_flavor) @@ -1233,7 +1233,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return true; if (!cr1->cr_principal || !cr2->cr_principal) return false; - return 0 == strcmp(cr1->cr_principal, cr1->cr_principal); + return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } static void gen_clid(struct nfs4_client *clp) -- cgit v1.2.3 From a6d88f293ecd1b7444e128777f4a893e7a998852 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 25 May 2012 18:38:50 +0400 Subject: NFSd: fix locking in nfsd_forget_delegations() This patch adds recall_lock hold to nfsd_forget_delegations() to protect nfsd_process_n_delegations() call. Also, looks like it would be better to collect delegations to some local on-stack list, and then unhash collected list. This split allows to simplify locking, because delegation traversing is protected by recall_lock, when delegation unhash is protected by client_mutex. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fe96015fbfcb..d10ad8bc47aa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4575,7 +4575,7 @@ void nfsd_forget_openowners(u64 num) printk(KERN_INFO "NFSD: Forgot %d open owners", count); } -int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +int nfsd_process_n_delegations(u64 num, struct list_head *list) { int i, count = 0; struct nfs4_file *fp, *fnext; @@ -4584,7 +4584,7 @@ int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegatio for (i = 0; i < FILE_HASH_SIZE; i++) { list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { - deleg_func(dp); + list_move(&dp->dl_recall_lru, list); if (++count == num) return count; } @@ -4597,9 +4597,16 @@ int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegatio void nfsd_forget_delegations(u64 num) { unsigned int count; + LIST_HEAD(victims); + struct nfs4_delegation *dp, *dnext; + + spin_lock(&recall_lock); + count = nfsd_process_n_delegations(num, &victims); + spin_unlock(&recall_lock); nfs4_lock_state(); - count = nfsd_process_n_delegations(num, unhash_delegation); + list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) + unhash_delegation(dp); nfs4_unlock_state(); printk(KERN_INFO "NFSD: Forgot %d delegations", count); @@ -4608,12 +4615,16 @@ void nfsd_forget_delegations(u64 num) void nfsd_recall_delegations(u64 num) { unsigned int count; + LIST_HEAD(victims); + struct nfs4_delegation *dp, *dnext; - nfs4_lock_state(); spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); + count = nfsd_process_n_delegations(num, &victims); + list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) { + list_del(&dp->dl_recall_lru); + nfsd_break_one_deleg(dp); + } spin_unlock(&recall_lock); - nfs4_unlock_state(); printk(KERN_INFO "NFSD: Recalled %d delegations", count); } -- cgit v1.2.3 From a007c4c3e943ecc054a806c259d95420a188754b Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Tue, 12 Jun 2012 16:54:16 -0400 Subject: nfsd: add get_uint for u32's I don't think there's a practical difference for the range of values these interfaces should see, but it would be safer to be unambiguous. Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 6 +++--- include/linux/sunrpc/cache.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ba233499b9a5..1114463bb856 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -398,7 +398,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int migrated, i, err; /* listsize */ - err = get_int(mesg, &fsloc->locations_count); + err = get_uint(mesg, &fsloc->locations_count); if (err) return err; if (fsloc->locations_count > MAX_FS_LOCATIONS) @@ -456,7 +456,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { - err = get_int(mesg, &f->pseudoflavor); + err = get_uint(mesg, &f->pseudoflavor); if (err) return err; /* @@ -465,7 +465,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) * problem at export time instead of when a client fails * to authenticate. */ - err = get_int(mesg, &f->flags); + err = get_uint(mesg, &f->flags); if (err) return err; /* Only some flags are allowed to differ between flavors: */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index af42596a82f9..f792794f6634 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -230,6 +230,22 @@ static inline int get_int(char **bpp, int *anint) return 0; } +static inline int get_uint(char **bpp, unsigned int *anint) +{ + char buf[50]; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + if (kstrtouint(buf, 0, anint)) + return -EINVAL; + + return 0; +} + /* * timestamps kept in the cache are expressed in seconds * since boot. This is the best for measuring differences in -- cgit v1.2.3 From 19f7e2ca44dfc3c1b3f499fc46801f98d403500f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 3 Jul 2012 16:46:41 +0400 Subject: NFSd: introduce nfsd_destroy() helper Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 8 ++------ fs/nfsd/nfsd.h | 9 +++++++++ fs/nfsd/nfssvc.c | 14 +++----------- 3 files changed, 14 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c55298ed5772..fa49cff5ee65 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -673,9 +673,7 @@ static ssize_t __write_ports_addfd(char *buf) err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } @@ -744,9 +742,7 @@ out_close: svc_xprt_put(xprt); } out_err: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 6d425c2f9fcd..7b248a24d5c5 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -75,6 +75,15 @@ int nfsd_set_nrthreads(int n, int *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); +static inline void nfsd_destroy(struct net *net) +{ + int destroy = (nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nfsd_serv, net); + svc_destroy(nfsd_serv); +} + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ee709fc8f58b..8621e36ce8dd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -427,11 +427,7 @@ int nfsd_set_nrthreads(int n, int *nthreads) if (err) break; } - - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); - + nfsd_destroy(net); return err; } @@ -478,9 +474,7 @@ out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown(); out_destroy: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); /* Release server */ + nfsd_destroy(net); /* Release server */ out: mutex_unlock(&nfsd_mutex); return error; @@ -682,9 +676,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file) mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); mutex_unlock(&nfsd_mutex); return ret; } -- cgit v1.2.3 From 57c8b13e3cd0f94944c9691ce7f58e5fcef8a12d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 3 Jul 2012 16:46:41 +0400 Subject: NFSd: set nfsd_serv to NULL after service destruction In nfsd_destroy(): if (destroy) svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_server); svc_shutdown_net(nfsd_serv, net) calls nfsd_last_thread(), which sets nfsd_serv to NULL, causing a NULL dereference on the following line. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 ++ fs/nfsd/nfssvc.c | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7b248a24d5c5..2244222368ab 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -82,6 +82,8 @@ static inline void nfsd_destroy(struct net *net) if (destroy) svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); + if (destroy) + nfsd_serv = NULL; } #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 8621e36ce8dd..240473cb708f 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -254,8 +254,6 @@ static void nfsd_shutdown(void) static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { - /* When last nfsd thread exits we need to do some clean-up */ - nfsd_serv = NULL; nfsd_shutdown(); svc_rpcb_cleanup(serv, net); @@ -332,6 +330,7 @@ static int nfsd_get_default_max_blksize(void) int nfsd_create_serv(void) { int error; + struct net *net = current->nsproxy->net_ns; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { @@ -346,7 +345,7 @@ int nfsd_create_serv(void) if (nfsd_serv == NULL) return -ENOMEM; - error = svc_bind(nfsd_serv, current->nsproxy->net_ns); + error = svc_bind(nfsd_serv, net); if (error < 0) { svc_destroy(nfsd_serv); return error; @@ -557,12 +556,13 @@ nfsd(void *vrqstp) nfsdstats.th_cnt --; out: - if (rqstp->rq_server->sv_nrthreads == 1) - svc_shutdown_net(rqstp->rq_server, &init_net); + rqstp->rq_server = NULL; /* Release the thread */ svc_exit_thread(rqstp); + nfsd_destroy(&init_net); + /* Release module */ mutex_unlock(&nfsd_mutex); module_put_and_exit(0); -- cgit v1.2.3 From 362a20c5e27614739c46707d1c5f55c214d164ce Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 1 Aug 2011 18:11:57 +0200 Subject: btrfs: allow cross-subvolume file clone Lift the EXDEV condition and allow different root trees for files being cloned, then pass source inode's root when searching for extents. Cloning is not allowed to cross vfsmounts, ie. when two subvolumes from one filesystem are mounted separately. Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0e92e5763005..7011871c45b8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2340,6 +2340,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out_drop_write; } + ret = -EXDEV; + if (src_file->f_path.mnt != file->f_path.mnt) + goto out_fput; + src = src_file->f_dentry->d_inode; ret = -EINVAL; @@ -2360,7 +2364,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out_fput; ret = -EXDEV; - if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) + if (src->i_sb != inode->i_sb) goto out_fput; ret = -ENOMEM; @@ -2434,13 +2438,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * note the key will change type as we walk through the * tree. */ - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, + 0, 0); if (ret < 0) goto out; nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) goto out; if (ret > 0) -- cgit v1.2.3 From e679376911d016b670c8cfc1645c178f77e8d1d3 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 11:18:10 +0200 Subject: Btrfs: add helper for tree enumeration Often no exact match is wanted but just the next lower or higher item. There's a lot of duplicated code throughout btrfs to deal with the corner cases. This patch adds a helper function that can facilitate searching. Signed-off-by: Arne Jansen --- fs/btrfs/ctree.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 3 +++ 2 files changed, 77 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8206b3900587..c82a9e4a953e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2721,6 +2721,80 @@ done: return ret; } +/* + * helper to use instead of search slot if no exact match is needed but + * instead the next or previous item should be returned. + * When find_higher is true, the next higher item is returned, the next lower + * otherwise. + * When return_any and find_higher are both true, and no higher item is found, + * return the next lower instead. + * When return_any is true and find_higher is false, and no lower item is found, + * return the next higher instead. + * It returns 0 if any item is found, 1 if none is found (tree empty), and + * < 0 on error + */ +int btrfs_search_slot_for_read(struct btrfs_root *root, + struct btrfs_key *key, struct btrfs_path *p, + int find_higher, int return_any) +{ + int ret; + struct extent_buffer *leaf; + +again: + ret = btrfs_search_slot(NULL, root, key, p, 0, 0); + if (ret <= 0) + return ret; + /* + * a return value of 1 means the path is at the position where the + * item should be inserted. Normally this is the next bigger item, + * but in case the previous item is the last in a leaf, path points + * to the first free slot in the previous leaf, i.e. at an invalid + * item. + */ + leaf = p->nodes[0]; + + if (find_higher) { + if (p->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, p); + if (ret <= 0) + return ret; + if (!return_any) + return 1; + /* + * no higher item found, return the next + * lower instead + */ + return_any = 0; + find_higher = 0; + btrfs_release_path(p); + goto again; + } + } else { + if (p->slots[0] == 0) { + ret = btrfs_prev_leaf(root, p); + if (ret < 0) + return ret; + if (!ret) { + p->slots[0] = btrfs_header_nritems(leaf) - 1; + return 0; + } + if (!return_any) + return 1; + /* + * no lower item found, return the next + * higher instead + */ + return_any = 0; + find_higher = 1; + btrfs_release_path(p); + goto again; + } else { + --p->slots[0]; + } + } + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa5c45b39075..8cfde9326dd6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2711,6 +2711,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, u64 time_seq); +int btrfs_search_slot_for_read(struct btrfs_root *root, + struct btrfs_key *key, struct btrfs_path *p, + int find_higher, int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, -- cgit v1.2.3 From 764a1b1acecedfe204cb2e80d8e2cc7c6df1b0b8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Jul 2012 14:59:54 -0400 Subject: cifs: ensure that we always do cifsFileInfo_get under the spinlock The readpages bug is a regression that was introduced in 6993f74a5. This also fixes a couple of similar bugs in the uncached read and write codepaths. Also, prevent this sort of thing in the future by having cifsFileInfo_get take the spinlock itself, and adding a _locked variant for use in places that are already holding the lock. The _put code has always done that so this makes for a less confusing interface. Cc: # 3.5.x Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 6 +++--- fs/cifs/file.c | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bcdf4d4420f1..497da5ce704c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -765,13 +765,13 @@ struct cifs_io_parms { * Take a reference on the file private data. Must be called with * cifs_file_list_lock held. */ -static inline -struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file) +static inline void +cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) { ++cifs_file->count; - return cifs_file; } +struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); /* diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 07e9d41cade7..9154192b0683 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -284,6 +284,15 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, static void cifs_del_lock_waiters(struct cifsLockInfo *lock); +struct cifsFileInfo * +cifsFileInfo_get(struct cifsFileInfo *cifs_file) +{ + spin_lock(&cifs_file_list_lock); + cifsFileInfo_get_locked(cifs_file); + spin_unlock(&cifs_file_list_lock); + return cifs_file; +} + /* * Release a reference on the file private data. This may involve closing * the filehandle out on the server. Must be called without holding @@ -1562,7 +1571,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, if (!open_file->invalidHandle) { /* found a good file */ /* lock it so it will not be closed on us */ - cifsFileInfo_get(open_file); + cifsFileInfo_get_locked(open_file); spin_unlock(&cifs_file_list_lock); return open_file; } /* else might as well continue, and look for @@ -1614,7 +1623,7 @@ refind_writable: if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ - cifsFileInfo_get(open_file); + cifsFileInfo_get_locked(open_file); spin_unlock(&cifs_file_list_lock); return open_file; } else { @@ -1631,7 +1640,7 @@ refind_writable: if (inv_file) { any_available = false; - cifsFileInfo_get(inv_file); + cifsFileInfo_get_locked(inv_file); } spin_unlock(&cifs_file_list_lock); @@ -3082,8 +3091,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, break; } - spin_lock(&cifs_file_list_lock); - spin_unlock(&cifs_file_list_lock); rdata->cfile = cifsFileInfo_get(open_file); rdata->mapping = mapping; rdata->offset = offset; -- cgit v1.2.3 From 2b0ce2c2909368d124a78a88e5c7106fdcba6221 Mon Sep 17 00:00:00 2001 From: Mitch Harder Date: Tue, 24 Jul 2012 11:58:43 -0600 Subject: Btrfs: Check INCOMPAT flags on remount and add helper function In support of the recently added capability to remount with lzo compression, provide a helper function to check the compression INCOMPAT flags when remounting with lzo compression, and set the flags if necessary. Also, implement the new helper function when defragmenting with explicit lzo compression and when setting the default subvolume. Signed-off-by: Mitch Harder Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 17 +++++++++++++++++ fs/btrfs/ioctl.c | 16 ++-------------- fs/btrfs/super.c | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 00f9a50f986d..0f369da5cd97 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3288,6 +3288,23 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno); +#define btrfs_set_fs_incompat(__fs_info, opt) \ + __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + +static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, + u64 flag) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (!(features & flag)) { + features |= flag; + btrfs_set_super_incompat_flags(disk_super, features); + } +} + #define btrfs_abort_transaction(trans, root, errno) \ do { \ __btrfs_abort_transaction(trans, root, __func__, \ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e54b663fd3aa..3f3cbe928a1a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1053,11 +1053,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, u64 newer_than, unsigned long max_to_defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_super_block *disk_super; struct file_ra_state *ra = NULL; unsigned long last_index; u64 isize = i_size_read(inode); - u64 features; u64 last_len = 0; u64 skip = 0; u64 defrag_end = 0; @@ -1244,11 +1242,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, mutex_unlock(&inode->i_mutex); } - disk_super = root->fs_info->super_copy; - features = btrfs_super_incompat_flags(disk_super); if (range->compress_type == BTRFS_COMPRESS_LZO) { - features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; - btrfs_set_super_incompat_flags(disk_super, features); + btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); } ret = defrag_count; @@ -2784,8 +2779,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) struct btrfs_path *path; struct btrfs_key location; struct btrfs_disk_key disk_key; - struct btrfs_super_block *disk_super; - u64 features; u64 objectid = 0; u64 dir_id; @@ -2836,12 +2829,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - disk_super = root->fs_info->super_copy; - features = btrfs_super_incompat_flags(disk_super); - if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { - features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; - btrfs_set_super_incompat_flags(disk_super, features); - } + btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); btrfs_end_transaction(trans, root); return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 26da344231ac..75ee2c7791f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -401,6 +401,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) compress_type = "lzo"; info->compress_type = BTRFS_COMPRESS_LZO; btrfs_set_opt(info->mount_opt, COMPRESS); + btrfs_set_fs_incompat(info, COMPRESS_LZO); } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; info->compress_type = BTRFS_COMPRESS_NONE; -- cgit v1.2.3 From e9fbcb42201c862fd6ab45c48ead4f47bb2dea9d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jul 2012 15:57:13 -0400 Subject: Btrfs: call the ordered free operation without any locks held Each ordered operation has a free callback, and this was called with the worker spinlock held. Josef made the free callback also call iput, which we can't do with the spinlock. This drops the spinlock for the free operation and grabs it again before moving through the rest of the list. We'll circle back around to this and find a cleaner way that doesn't bounce the lock around so much. Signed-off-by: Chris Mason cc: stable@kernel.org --- fs/btrfs/async-thread.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 42704149b723..58b7d14b08ee 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and call the freeing code */ + /* now take the lock again and drop our item from the list */ spin_lock(&workers->order_lock); list_del(&work->order_list); + spin_unlock(&workers->order_lock); + + /* + * we don't want to call the ordered free functions + * with the lock held though + */ work->ordered_free(work); + spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); -- cgit v1.2.3 From cd1cfc49153ba2bef247e500d8bd4d135193ece9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jul 2012 16:03:32 -0400 Subject: Btrfs: add a barrier before a waitqueue_active check We were missing wakeups on the delayed ref waitqueue due to races on waitqueue_active. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44f06201f376..4e1b153b7c47 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5294,6 +5294,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; + smp_mb(); if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) wake_up(&root->fs_info->tree_mod_seq_wait); -- cgit v1.2.3 From 91cb916ca26feb99c78c131a1643af3d10fefd96 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Sun, 3 Jun 2012 14:23:23 +0200 Subject: Btrfs: make iref_to_path non static Make iref_to_path non static (needed in send) and rename it to btrfs_iref_to_path Signed-off-by: Alexander Block --- fs/btrfs/backref.c | 10 +++++----- fs/btrfs/backref.h | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a383c18e74e8..e99fe0e31da2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1125,10 +1125,10 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, * required for the path to fit into the buffer. in that case, the returned * value will be smaller than dest. callers must check this! */ -static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, - struct btrfs_inode_ref *iref, - struct extent_buffer *eb_in, u64 parent, - char *dest, u32 size) +char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + struct btrfs_inode_ref *iref, + struct extent_buffer *eb_in, u64 parent, + char *dest, u32 size) { u32 len; int slot; @@ -1543,7 +1543,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, ipath->fspath->bytes_left - s_ptr : 0; fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; - fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, + fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, inum, fspath_min, bytes_left); if (IS_ERR(fspath)) return PTR_ERR(fspath); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index c18d8ac7b795..1a765792fbf2 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -21,6 +21,7 @@ #include "ioctl.h" #include "ulist.h" +#include "extent_io.h" #define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) @@ -60,6 +61,9 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 delayed_ref_seq, u64 time_seq, struct ulist **roots); +char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + struct btrfs_inode_ref *iref, struct extent_buffer *eb, + u64 parent, char *dest, u32 size); struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, -- cgit v1.2.3 From 8ea05e3a4262b9e6871c349fa3486bcfc72ffd1a Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Wed, 25 Jul 2012 17:35:53 +0200 Subject: Btrfs: introduce subvol uuids and times This patch introduces uuids for subvolumes. Each subvolume has it's own uuid. In case it was snapshotted, it also contains parent_uuid. In case it was received, it also contains received_uuid. It also introduces subvolume ctime/otime/stime/rtime. The first two are comparable to the times found in inodes. otime is the origin/creation time and ctime is the change time. stime/rtime are only valid on received subvolumes. stime is the time of the subvolume when it was sent. rtime is the time of the subvolume when it was received. Additionally to the times, we have a transid for each time. They are updated at the same place as the times. btrfs receive uses stransid and rtransid to find out if a received subvolume changed in the meantime. If an older kernel mounts a filesystem with the extented fields, all fields become invalid. The next mount with a new kernel will detect this and reset the fields. Signed-off-by: Alexander Block Reviewed-by: David Sterba Reviewed-by: Arne Jansen Reviewed-by: Jan Schmidt Reviewed-by: Alex Lyakas --- fs/btrfs/check-integrity.c | 7 +-- fs/btrfs/ctree.h | 47 ++++++++++++++++++++ fs/btrfs/disk-io.c | 8 ++-- fs/btrfs/inode.c | 4 ++ fs/btrfs/ioctl.c | 100 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ioctl.h | 17 +++++++ fs/btrfs/root-tree.c | 107 ++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/transaction.c | 17 +++++++ 8 files changed, 292 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index da6e9364a5e3..9197e2e33407 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1032,6 +1032,7 @@ continue_with_current_leaf_stack_frame: struct btrfs_disk_key *disk_key; u8 type; u32 item_offset; + u32 item_size; if (disk_item_offset + sizeof(struct btrfs_item) > sf->block_ctx->len) { @@ -1047,6 +1048,7 @@ leaf_item_out_of_bounce_error: disk_item_offset, sizeof(struct btrfs_item)); item_offset = le32_to_cpu(disk_item.offset); + item_size = le32_to_cpu(disk_item.size); disk_key = &disk_item.key; type = disk_key->type; @@ -1057,14 +1059,13 @@ leaf_item_out_of_bounce_error: root_item_offset = item_offset + offsetof(struct btrfs_leaf, items); - if (root_item_offset + - sizeof(struct btrfs_root_item) > + if (root_item_offset + item_size > sf->block_ctx->len) goto leaf_item_out_of_bounce_error; btrfsic_read_from_block_data( sf->block_ctx, &root_item, root_item_offset, - sizeof(struct btrfs_root_item)); + item_size); next_bytenr = le64_to_cpu(root_item.bytenr); sf->error = diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8cfde9326dd6..d5f6d7458676 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -709,6 +709,36 @@ struct btrfs_root_item { struct btrfs_disk_key drop_progress; u8 drop_level; u8 level; + + /* + * The following fields appear after subvol_uuids+subvol_times + * were introduced. + */ + + /* + * This generation number is used to test if the new fields are valid + * and up to date while reading the root item. Everytime the root item + * is written out, the "generation" field is copied into this field. If + * anyone ever mounted the fs with an older kernel, we will have + * mismatching generation values here and thus must invalidate the + * new fields. See btrfs_update_root and btrfs_find_last_root for + * details. + * the offset of generation_v2 is also used as the start for the memset + * when invalidating the fields. + */ + __le64 generation_v2; + u8 uuid[BTRFS_UUID_SIZE]; + u8 parent_uuid[BTRFS_UUID_SIZE]; + u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* updated when an inode changes */ + __le64 otransid; /* trans when created */ + __le64 stransid; /* trans when sent. non-zero for received subvol */ + __le64 rtransid; /* trans when received. non-zero for received subvol */ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* for future */ } __attribute__ ((__packed__)); /* @@ -1416,6 +1446,8 @@ struct btrfs_root { dev_t anon_dev; int force_cow; + + spinlock_t root_times_lock; }; struct btrfs_ioctl_defrag_range_args { @@ -2189,6 +2221,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, last_snapshot, 64); +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, + generation_v2, 64); +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, + ctransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, + otransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, + stransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, + rtransid, 64); static inline bool btrfs_root_readonly(struct btrfs_root *root) { @@ -2822,6 +2864,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); +void btrfs_read_root_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot, + struct btrfs_root_item *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); @@ -2829,6 +2874,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root); void btrfs_set_root_node(struct btrfs_root_item *item, struct extent_buffer *node); void btrfs_check_and_init_root_item(struct btrfs_root_item *item); +void btrfs_update_root_times(struct btrfs_trans_handle *trans, + struct btrfs_root *root); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2936ca49b3b4..c39eb71fae31 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_running = 0; root->root_key.objectid = objectid; root->anon_dev = 0; + + spin_lock_init(&root->root_times_lock); } static int __must_check find_and_setup_root(struct btrfs_root *tree_root, @@ -1326,6 +1328,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, u64 generation; u32 blocksize; int ret = 0; + int slot; root = btrfs_alloc_root(fs_info); if (!root) @@ -1352,9 +1355,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret == 0) { l = path->nodes[0]; - read_extent_buffer(l, &root->root_item, - btrfs_item_ptr_offset(l, path->slots[0]), - sizeof(root->root_item)); + slot = path->slots[0]; + btrfs_read_root_item(tree_root, l, slot, &root->root_item); memcpy(&root->root_key, location, sizeof(*location)); } btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7d1921ac76b..4ffc87389545 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2734,6 +2734,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, */ if (!btrfs_is_free_space_inode(root, inode) && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + btrfs_update_root_times(trans, root); + ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) btrfs_set_inode_last_trans(trans, inode); @@ -4723,6 +4725,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, trace_btrfs_inode_new(inode); btrfs_set_inode_last_trans(trans, inode); + btrfs_update_root_times(trans, root); + return inode; fail: if (dir) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7011871c45b8..99fe2ce7f721 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -346,11 +347,13 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_root *new_root; struct dentry *parent = dentry->d_parent; struct inode *dir; + struct timespec cur_time = CURRENT_TIME; int ret; int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; + uuid_le new_uuid; ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); if (ret) @@ -389,8 +392,9 @@ static noinline int create_subvol(struct btrfs_root *root, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); + memset(&root_item, 0, sizeof(root_item)); + inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); inode_item->generation = cpu_to_le64(1); inode_item->size = cpu_to_le64(3); inode_item->nlink = cpu_to_le32(1); @@ -408,8 +412,15 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_set_root_used(&root_item, leaf->len); btrfs_set_root_last_snapshot(&root_item, 0); - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); - root_item.drop_level = 0; + btrfs_set_root_generation_v2(&root_item, + btrfs_root_generation(&root_item)); + uuid_le_gen(&new_uuid); + memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); + root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); + root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); + root_item.ctime = root_item.otime; + btrfs_set_root_ctransid(&root_item, trans->transid); + btrfs_set_root_otransid(&root_item, trans->transid); btrfs_tree_unlock(leaf); free_extent_buffer(leaf); @@ -3395,6 +3406,87 @@ out: return ret; } +static long btrfs_ioctl_set_received_subvol(struct file *file, + void __user *arg) +{ + struct btrfs_ioctl_received_subvol_args *sa = NULL; + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root_item *root_item = &root->root_item; + struct btrfs_trans_handle *trans; + struct timespec ct = CURRENT_TIME; + int ret = 0; + + ret = mnt_want_write_file(file); + if (ret < 0) + return ret; + + down_write(&root->fs_info->subvol_sem); + + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { + ret = -EINVAL; + goto out; + } + + if (btrfs_root_readonly(root)) { + ret = -EROFS; + goto out; + } + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + sa = NULL; + goto out; + } + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + + sa->rtransid = trans->transid; + sa->rtime.sec = ct.tv_sec; + sa->rtime.nsec = ct.tv_nsec; + + memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); + btrfs_set_root_stransid(root_item, sa->stransid); + btrfs_set_root_rtransid(root_item, sa->rtransid); + root_item->stime.sec = cpu_to_le64(sa->stime.sec); + root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); + root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); + root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); + + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + if (ret < 0) { + btrfs_end_transaction(trans, root); + trans = NULL; + goto out; + } else { + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) + goto out; + } + + ret = copy_to_user(arg, sa, sizeof(*sa)); + if (ret) + ret = -EFAULT; + +out: + kfree(sa); + up_write(&root->fs_info->subvol_sem); + mnt_drop_write_file(file); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3477,6 +3569,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_balance_ctl(root, arg); case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(root, argp); + case BTRFS_IOC_SET_RECEIVED_SUBVOL: + return btrfs_ioctl_set_received_subvol(file, argp); case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp, 0); case BTRFS_IOC_GET_AND_RESET_DEV_STATS: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index e440aa653c30..0c505d7ff8ed 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -295,6 +295,21 @@ struct btrfs_ioctl_get_dev_stats { __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ }; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -359,6 +374,8 @@ struct btrfs_ioctl_get_dev_stats { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 24fb8ce4e071..6bb465cca20f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -16,11 +16,54 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" #include "print-tree.h" +/* + * Read a root item from the tree. In case we detect a root item smaller then + * sizeof(root_item), we know it's an old version of the root structure and + * initialize all new fields to zero. The same happens if we detect mismatching + * generation numbers as then we know the root was once mounted with an older + * kernel that was not aware of the root item structure change. + */ +void btrfs_read_root_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot, + struct btrfs_root_item *item) +{ + uuid_le uuid; + int len; + int need_reset = 0; + + len = btrfs_item_size_nr(eb, slot); + read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot), + min_t(int, len, (int)sizeof(*item))); + if (len < sizeof(*item)) + need_reset = 1; + if (!need_reset && btrfs_root_generation(item) + != btrfs_root_generation_v2(item)) { + if (btrfs_root_generation_v2(item) != 0) { + printk(KERN_WARNING "btrfs: mismatching " + "generation and generation_v2 " + "found in root item. This root " + "was probably mounted with an " + "older kernel. Resetting all " + "new fields.\n"); + } + need_reset = 1; + } + if (need_reset) { + memset(&item->generation_v2, 0, + sizeof(*item) - offsetof(struct btrfs_root_item, + generation_v2)); + + uuid_le_gen(&uuid); + memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE); + } +} + /* * lookup the root with the highest offset for a given objectid. The key we do * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 @@ -61,10 +104,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, goto out; } if (item) - read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), - sizeof(*item)); + btrfs_read_root_item(root, l, slot, item); if (key) memcpy(key, &found_key, sizeof(found_key)); + ret = 0; out: btrfs_free_path(path); @@ -91,16 +134,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int slot; unsigned long ptr; + int old_len; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + if (ret < 0) + goto out_abort; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); @@ -113,16 +155,56 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root l = path->nodes[0]; slot = path->slots[0]; ptr = btrfs_item_ptr_offset(l, slot); + old_len = btrfs_item_size_nr(l, slot); + + /* + * If this is the first time we update the root item which originated + * from an older kernel, we need to enlarge the item size to make room + * for the added fields. + */ + if (old_len < sizeof(*item)) { + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, key, path, + -1, 1); + if (ret < 0) + goto out_abort; + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + goto out_abort; + btrfs_release_path(path); + ret = btrfs_insert_empty_item(trans, root, path, + key, sizeof(*item)); + if (ret < 0) + goto out_abort; + l = path->nodes[0]; + slot = path->slots[0]; + ptr = btrfs_item_ptr_offset(l, slot); + } + + /* + * Update generation_v2 so at the next mount we know the new root + * fields are valid. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_free_path(path); return ret; + +out_abort: + btrfs_abort_transaction(trans, root, ret); + goto out; } int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) { + /* + * Make sure generation v1 and v2 match. See update_root for details. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); return btrfs_insert_item(trans, root, key, item, sizeof(*item)); } @@ -454,3 +536,16 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) root_item->byte_limit = 0; } } + +void btrfs_update_root_times(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root_item *item = &root->root_item; + struct timespec ct = CURRENT_TIME; + + spin_lock(&root->root_times_lock); + item->ctransid = trans->transid; + item->ctime.sec = cpu_to_le64(ct.tv_sec); + item->ctime.nsec = cpu_to_le64(ct.tv_nsec); + spin_unlock(&root->root_times_lock); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b72b068183ec..a21f3085a334 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -926,11 +927,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; + struct timespec cur_time = CURRENT_TIME; int ret; u64 to_reserve = 0; u64 index = 0; u64 objectid; u64 root_flags; + uuid_le new_uuid; rsv = trans->block_rsv; @@ -1016,6 +1019,20 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; btrfs_set_root_flags(new_root_item, root_flags); + btrfs_set_root_generation_v2(new_root_item, + trans->transid); + uuid_le_gen(&new_uuid); + memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); + memcpy(new_root_item->parent_uuid, root->root_item.uuid, + BTRFS_UUID_SIZE); + new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); + new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); + btrfs_set_root_otransid(new_root_item, trans->transid); + memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); + memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); + btrfs_set_root_stransid(new_root_item, 0); + btrfs_set_root_rtransid(new_root_item, 0); + old = btrfs_lock_root_node(root); ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); if (ret) { -- cgit v1.2.3 From 7069830a9e381e33d44ded45095f764844c71d24 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Tue, 5 Jun 2012 21:07:48 +0200 Subject: Btrfs: add btrfs_compare_trees function This function is used to find the differences between two trees. The tree compare skips whole subtrees if it detects shared tree blocks and thus is pretty fast. Signed-off-by: Alexander Block Reviewed-by: David Sterba Reviewed-by: Arne Jansen Reviewed-by: Jan Schmidt Reviewed-by: Alex Lyakas --- fs/btrfs/ctree.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 15 ++ 2 files changed, 440 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c82a9e4a953e..4c10fd19d481 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5005,6 +5005,431 @@ out: return ret; } +static void tree_move_down(struct btrfs_root *root, + struct btrfs_path *path, + int *level, int root_level) +{ + path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], + path->slots[*level]); + path->slots[*level - 1] = 0; + (*level)--; +} + +static int tree_move_next_or_upnext(struct btrfs_root *root, + struct btrfs_path *path, + int *level, int root_level) +{ + int ret = 0; + int nritems; + nritems = btrfs_header_nritems(path->nodes[*level]); + + path->slots[*level]++; + + while (path->slots[*level] == nritems) { + if (*level == root_level) + return -1; + + /* move upnext */ + path->slots[*level] = 0; + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + (*level)++; + path->slots[*level]++; + + nritems = btrfs_header_nritems(path->nodes[*level]); + ret = 1; + } + return ret; +} + +/* + * Returns 1 if it had to move up and next. 0 is returned if it moved only next + * or down. + */ +static int tree_advance(struct btrfs_root *root, + struct btrfs_path *path, + int *level, int root_level, + int allow_down, + struct btrfs_key *key) +{ + int ret; + + if (*level == 0 || !allow_down) { + ret = tree_move_next_or_upnext(root, path, level, root_level); + } else { + tree_move_down(root, path, level, root_level); + ret = 0; + } + if (ret >= 0) { + if (*level == 0) + btrfs_item_key_to_cpu(path->nodes[*level], key, + path->slots[*level]); + else + btrfs_node_key_to_cpu(path->nodes[*level], key, + path->slots[*level]); + } + return ret; +} + +static int tree_compare_item(struct btrfs_root *left_root, + struct btrfs_path *left_path, + struct btrfs_path *right_path, + char *tmp_buf) +{ + int cmp; + int len1, len2; + unsigned long off1, off2; + + len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]); + len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]); + if (len1 != len2) + return 1; + + off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]); + off2 = btrfs_item_ptr_offset(right_path->nodes[0], + right_path->slots[0]); + + read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1); + + cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1); + if (cmp) + return 1; + return 0; +} + +#define ADVANCE 1 +#define ADVANCE_ONLY_NEXT -1 + +/* + * This function compares two trees and calls the provided callback for + * every changed/new/deleted item it finds. + * If shared tree blocks are encountered, whole subtrees are skipped, making + * the compare pretty fast on snapshotted subvolumes. + * + * This currently works on commit roots only. As commit roots are read only, + * we don't do any locking. The commit roots are protected with transactions. + * Transactions are ended and rejoined when a commit is tried in between. + * + * This function checks for modifications done to the trees while comparing. + * If it detects a change, it aborts immediately. + */ +int btrfs_compare_trees(struct btrfs_root *left_root, + struct btrfs_root *right_root, + btrfs_changed_cb_t changed_cb, void *ctx) +{ + int ret; + int cmp; + struct btrfs_trans_handle *trans = NULL; + struct btrfs_path *left_path = NULL; + struct btrfs_path *right_path = NULL; + struct btrfs_key left_key; + struct btrfs_key right_key; + char *tmp_buf = NULL; + int left_root_level; + int right_root_level; + int left_level; + int right_level; + int left_end_reached; + int right_end_reached; + int advance_left; + int advance_right; + u64 left_blockptr; + u64 right_blockptr; + u64 left_start_ctransid; + u64 right_start_ctransid; + u64 ctransid; + + left_path = btrfs_alloc_path(); + if (!left_path) { + ret = -ENOMEM; + goto out; + } + right_path = btrfs_alloc_path(); + if (!right_path) { + ret = -ENOMEM; + goto out; + } + + tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); + if (!tmp_buf) { + ret = -ENOMEM; + goto out; + } + + left_path->search_commit_root = 1; + left_path->skip_locking = 1; + right_path->search_commit_root = 1; + right_path->skip_locking = 1; + + spin_lock(&left_root->root_times_lock); + left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); + spin_unlock(&left_root->root_times_lock); + + spin_lock(&right_root->root_times_lock); + right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); + spin_unlock(&right_root->root_times_lock); + + trans = btrfs_join_transaction(left_root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + + /* + * Strategy: Go to the first items of both trees. Then do + * + * If both trees are at level 0 + * Compare keys of current items + * If left < right treat left item as new, advance left tree + * and repeat + * If left > right treat right item as deleted, advance right tree + * and repeat + * If left == right do deep compare of items, treat as changed if + * needed, advance both trees and repeat + * If both trees are at the same level but not at level 0 + * Compare keys of current nodes/leafs + * If left < right advance left tree and repeat + * If left > right advance right tree and repeat + * If left == right compare blockptrs of the next nodes/leafs + * If they match advance both trees but stay at the same level + * and repeat + * If they don't match advance both trees while allowing to go + * deeper and repeat + * If tree levels are different + * Advance the tree that needs it and repeat + * + * Advancing a tree means: + * If we are at level 0, try to go to the next slot. If that's not + * possible, go one level up and repeat. Stop when we found a level + * where we could go to the next slot. We may at this point be on a + * node or a leaf. + * + * If we are not at level 0 and not on shared tree blocks, go one + * level deeper. + * + * If we are not at level 0 and on shared tree blocks, go one slot to + * the right if possible or go up and right. + */ + + left_level = btrfs_header_level(left_root->commit_root); + left_root_level = left_level; + left_path->nodes[left_level] = left_root->commit_root; + extent_buffer_get(left_path->nodes[left_level]); + + right_level = btrfs_header_level(right_root->commit_root); + right_root_level = right_level; + right_path->nodes[right_level] = right_root->commit_root; + extent_buffer_get(right_path->nodes[right_level]); + + if (left_level == 0) + btrfs_item_key_to_cpu(left_path->nodes[left_level], + &left_key, left_path->slots[left_level]); + else + btrfs_node_key_to_cpu(left_path->nodes[left_level], + &left_key, left_path->slots[left_level]); + if (right_level == 0) + btrfs_item_key_to_cpu(right_path->nodes[right_level], + &right_key, right_path->slots[right_level]); + else + btrfs_node_key_to_cpu(right_path->nodes[right_level], + &right_key, right_path->slots[right_level]); + + left_end_reached = right_end_reached = 0; + advance_left = advance_right = 0; + + while (1) { + /* + * We need to make sure the transaction does not get committed + * while we do anything on commit roots. This means, we need to + * join and leave transactions for every item that we process. + */ + if (trans && btrfs_should_end_transaction(trans, left_root)) { + btrfs_release_path(left_path); + btrfs_release_path(right_path); + + ret = btrfs_end_transaction(trans, left_root); + trans = NULL; + if (ret < 0) + goto out; + } + /* now rejoin the transaction */ + if (!trans) { + trans = btrfs_join_transaction(left_root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + + spin_lock(&left_root->root_times_lock); + ctransid = btrfs_root_ctransid(&left_root->root_item); + spin_unlock(&left_root->root_times_lock); + if (ctransid != left_start_ctransid) + left_start_ctransid = 0; + + spin_lock(&right_root->root_times_lock); + ctransid = btrfs_root_ctransid(&right_root->root_item); + spin_unlock(&right_root->root_times_lock); + if (ctransid != right_start_ctransid) + right_start_ctransid = 0; + + if (!left_start_ctransid || !right_start_ctransid) { + WARN(1, KERN_WARNING + "btrfs: btrfs_compare_tree detected " + "a change in one of the trees while " + "iterating. This is probably a " + "bug.\n"); + ret = -EIO; + goto out; + } + + /* + * the commit root may have changed, so start again + * where we stopped + */ + left_path->lowest_level = left_level; + right_path->lowest_level = right_level; + ret = btrfs_search_slot(NULL, left_root, + &left_key, left_path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_search_slot(NULL, right_root, + &right_key, right_path, 0, 0); + if (ret < 0) + goto out; + } + + if (advance_left && !left_end_reached) { + ret = tree_advance(left_root, left_path, &left_level, + left_root_level, + advance_left != ADVANCE_ONLY_NEXT, + &left_key); + if (ret < 0) + left_end_reached = ADVANCE; + advance_left = 0; + } + if (advance_right && !right_end_reached) { + ret = tree_advance(right_root, right_path, &right_level, + right_root_level, + advance_right != ADVANCE_ONLY_NEXT, + &right_key); + if (ret < 0) + right_end_reached = ADVANCE; + advance_right = 0; + } + + if (left_end_reached && right_end_reached) { + ret = 0; + goto out; + } else if (left_end_reached) { + if (right_level == 0) { + ret = changed_cb(left_root, right_root, + left_path, right_path, + &right_key, + BTRFS_COMPARE_TREE_DELETED, + ctx); + if (ret < 0) + goto out; + } + advance_right = ADVANCE; + continue; + } else if (right_end_reached) { + if (left_level == 0) { + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, + BTRFS_COMPARE_TREE_NEW, + ctx); + if (ret < 0) + goto out; + } + advance_left = ADVANCE; + continue; + } + + if (left_level == 0 && right_level == 0) { + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); + if (cmp < 0) { + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, + BTRFS_COMPARE_TREE_NEW, + ctx); + if (ret < 0) + goto out; + advance_left = ADVANCE; + } else if (cmp > 0) { + ret = changed_cb(left_root, right_root, + left_path, right_path, + &right_key, + BTRFS_COMPARE_TREE_DELETED, + ctx); + if (ret < 0) + goto out; + advance_right = ADVANCE; + } else { + ret = tree_compare_item(left_root, left_path, + right_path, tmp_buf); + if (ret) { + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, + BTRFS_COMPARE_TREE_CHANGED, + ctx); + if (ret < 0) + goto out; + } + advance_left = ADVANCE; + advance_right = ADVANCE; + } + } else if (left_level == right_level) { + cmp = btrfs_comp_cpu_keys(&left_key, &right_key); + if (cmp < 0) { + advance_left = ADVANCE; + } else if (cmp > 0) { + advance_right = ADVANCE; + } else { + left_blockptr = btrfs_node_blockptr( + left_path->nodes[left_level], + left_path->slots[left_level]); + right_blockptr = btrfs_node_blockptr( + right_path->nodes[right_level], + right_path->slots[right_level]); + if (left_blockptr == right_blockptr) { + /* + * As we're on a shared block, don't + * allow to go deeper. + */ + advance_left = ADVANCE_ONLY_NEXT; + advance_right = ADVANCE_ONLY_NEXT; + } else { + advance_left = ADVANCE; + advance_right = ADVANCE; + } + } + } else if (left_level < right_level) { + advance_right = ADVANCE; + } else { + advance_left = ADVANCE; + } + } + +out: + btrfs_free_path(left_path); + btrfs_free_path(right_path); + kfree(tmp_buf); + + if (trans) { + if (!ret) + ret = btrfs_end_transaction(trans, left_root); + else + btrfs_end_transaction(trans, left_root); + } + + return ret; +} + /* * this is similar to btrfs_next_leaf, but does not try to preserve * and fixup the path. It looks for and returns the next key in the diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d5f6d7458676..2fbbe738caed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2722,6 +2722,21 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans); +enum btrfs_compare_tree_result { + BTRFS_COMPARE_TREE_NEW, + BTRFS_COMPARE_TREE_DELETED, + BTRFS_COMPARE_TREE_CHANGED, +}; +typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, + struct btrfs_root *right_root, + struct btrfs_path *left_path, + struct btrfs_path *right_path, + struct btrfs_key *key, + enum btrfs_compare_tree_result result, + void *ctx); +int btrfs_compare_trees(struct btrfs_root *left_root, + struct btrfs_root *right_root, + btrfs_changed_cb_t cb, void *ctx); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, -- cgit v1.2.3 From 31db9f7c23fbf7e95026143f79645de6507b583b Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Wed, 25 Jul 2012 23:19:24 +0200 Subject: Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive This patch introduces the BTRFS_IOC_SEND ioctl that is required for send. It allows btrfs-progs to implement full and incremental sends. Patches for btrfs-progs will follow. Signed-off-by: Alexander Block Reviewed-by: David Sterba Reviewed-by: Arne Jansen Reviewed-by: Jan Schmidt Reviewed-by: Alex Lyakas --- fs/btrfs/Makefile | 2 +- fs/btrfs/ioctl.c | 3 + fs/btrfs/ioctl.h | 10 + fs/btrfs/send.c | 4570 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/send.h | 133 ++ 5 files changed, 4717 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/send.c create mode 100644 fs/btrfs/send.h (limited to 'fs') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4fa2befae7..f740644bb5a5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o + reada.o backref.o ulist.o send.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 99fe2ce7f721..bca6997fdb80 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -54,6 +54,7 @@ #include "inode-map.h" #include "backref.h" #include "rcu-string.h" +#include "send.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3571,6 +3572,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_balance_progress(root, argp); case BTRFS_IOC_SET_RECEIVED_SUBVOL: return btrfs_ioctl_set_received_subvol(file, argp); + case BTRFS_IOC_SEND: + return btrfs_ioctl_send(file, argp); case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp, 0); case BTRFS_IOC_GET_AND_RESET_DEV_STATS: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 0c505d7ff8ed..27097e8bfa39 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -310,6 +310,15 @@ struct btrfs_ioctl_received_subvol_args { __u64 reserved[16]; /* in */ }; +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -376,6 +385,7 @@ struct btrfs_ioctl_received_subvol_args { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c new file mode 100644 index 000000000000..5394cb75012a --- /dev/null +++ b/fs/btrfs/send.c @@ -0,0 +1,4570 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "send.h" +#include "backref.h" +#include "locking.h" +#include "disk-io.h" +#include "btrfs_inode.h" +#include "transaction.h" + +static int g_verbose = 0; + +#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) + +/* + * A fs_path is a helper to dynamically build path names with unknown size. + * It reallocates the internal buffer on demand. + * It allows fast adding of path elements on the right side (normal path) and + * fast adding to the left side (reversed path). A reversed path can also be + * unreversed if needed. + */ +struct fs_path { + union { + struct { + char *start; + char *end; + char *prepared; + + char *buf; + int buf_len; + int reversed:1; + int virtual_mem:1; + char inline_buf[]; + }; + char pad[PAGE_SIZE]; + }; +}; +#define FS_PATH_INLINE_SIZE \ + (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) + + +/* reused for each extent */ +struct clone_root { + struct btrfs_root *root; + u64 ino; + u64 offset; + + u64 found_refs; +}; + +#define SEND_CTX_MAX_NAME_CACHE_SIZE 128 +#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) + +struct send_ctx { + struct file *send_filp; + loff_t send_off; + char *send_buf; + u32 send_size; + u32 send_max_size; + u64 total_send_size; + u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; + + struct vfsmount *mnt; + + struct btrfs_root *send_root; + struct btrfs_root *parent_root; + struct clone_root *clone_roots; + int clone_roots_cnt; + + /* current state of the compare_tree call */ + struct btrfs_path *left_path; + struct btrfs_path *right_path; + struct btrfs_key *cmp_key; + + /* + * infos of the currently processed inode. In case of deleted inodes, + * these are the values from the deleted inode. + */ + u64 cur_ino; + u64 cur_inode_gen; + int cur_inode_new; + int cur_inode_new_gen; + int cur_inode_deleted; + int cur_inode_first_ref_orphan; + u64 cur_inode_size; + u64 cur_inode_mode; + + u64 send_progress; + + struct list_head new_refs; + struct list_head deleted_refs; + + struct radix_tree_root name_cache; + struct list_head name_cache_list; + int name_cache_size; + + struct file *cur_inode_filp; + char *read_buf; +}; + +struct name_cache_entry { + struct list_head list; + struct list_head use_list; + u64 ino; + u64 gen; + u64 parent_ino; + u64 parent_gen; + int ret; + int need_later_update; + int name_len; + char name[]; +}; + +static void fs_path_reset(struct fs_path *p) +{ + if (p->reversed) { + p->start = p->buf + p->buf_len - 1; + p->end = p->start; + *p->start = 0; + } else { + p->start = p->buf; + p->end = p->start; + *p->start = 0; + } +} + +static struct fs_path *fs_path_alloc(struct send_ctx *sctx) +{ + struct fs_path *p; + + p = kmalloc(sizeof(*p), GFP_NOFS); + if (!p) + return NULL; + p->reversed = 0; + p->virtual_mem = 0; + p->buf = p->inline_buf; + p->buf_len = FS_PATH_INLINE_SIZE; + fs_path_reset(p); + return p; +} + +static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) +{ + struct fs_path *p; + + p = fs_path_alloc(sctx); + if (!p) + return NULL; + p->reversed = 1; + fs_path_reset(p); + return p; +} + +static void fs_path_free(struct send_ctx *sctx, struct fs_path *p) +{ + if (!p) + return; + if (p->buf != p->inline_buf) { + if (p->virtual_mem) + vfree(p->buf); + else + kfree(p->buf); + } + kfree(p); +} + +static int fs_path_len(struct fs_path *p) +{ + return p->end - p->start; +} + +static int fs_path_ensure_buf(struct fs_path *p, int len) +{ + char *tmp_buf; + int path_len; + int old_buf_len; + + len++; + + if (p->buf_len >= len) + return 0; + + path_len = p->end - p->start; + old_buf_len = p->buf_len; + len = PAGE_ALIGN(len); + + if (p->buf == p->inline_buf) { + tmp_buf = kmalloc(len, GFP_NOFS); + if (!tmp_buf) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + p->virtual_mem = 1; + } + memcpy(tmp_buf, p->buf, p->buf_len); + p->buf = tmp_buf; + p->buf_len = len; + } else { + if (p->virtual_mem) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + memcpy(tmp_buf, p->buf, p->buf_len); + vfree(p->buf); + } else { + tmp_buf = krealloc(p->buf, len, GFP_NOFS); + if (!tmp_buf) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + memcpy(tmp_buf, p->buf, p->buf_len); + kfree(p->buf); + p->virtual_mem = 1; + } + } + p->buf = tmp_buf; + p->buf_len = len; + } + if (p->reversed) { + tmp_buf = p->buf + old_buf_len - path_len - 1; + p->end = p->buf + p->buf_len - 1; + p->start = p->end - path_len; + memmove(p->start, tmp_buf, path_len + 1); + } else { + p->start = p->buf; + p->end = p->start + path_len; + } + return 0; +} + +static int fs_path_prepare_for_add(struct fs_path *p, int name_len) +{ + int ret; + int new_len; + + new_len = p->end - p->start + name_len; + if (p->start != p->end) + new_len++; + ret = fs_path_ensure_buf(p, new_len); + if (ret < 0) + goto out; + + if (p->reversed) { + if (p->start != p->end) + *--p->start = '/'; + p->start -= name_len; + p->prepared = p->start; + } else { + if (p->start != p->end) + *p->end++ = '/'; + p->prepared = p->end; + p->end += name_len; + *p->end = 0; + } + +out: + return ret; +} + +static int fs_path_add(struct fs_path *p, const char *name, int name_len) +{ + int ret; + + ret = fs_path_prepare_for_add(p, name_len); + if (ret < 0) + goto out; + memcpy(p->prepared, name, name_len); + p->prepared = NULL; + +out: + return ret; +} + +static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) +{ + int ret; + + ret = fs_path_prepare_for_add(p, p2->end - p2->start); + if (ret < 0) + goto out; + memcpy(p->prepared, p2->start, p2->end - p2->start); + p->prepared = NULL; + +out: + return ret; +} + +static int fs_path_add_from_extent_buffer(struct fs_path *p, + struct extent_buffer *eb, + unsigned long off, int len) +{ + int ret; + + ret = fs_path_prepare_for_add(p, len); + if (ret < 0) + goto out; + + read_extent_buffer(eb, p->prepared, off, len); + p->prepared = NULL; + +out: + return ret; +} + +static void fs_path_remove(struct fs_path *p) +{ + BUG_ON(p->reversed); + while (p->start != p->end && *p->end != '/') + p->end--; + *p->end = 0; +} + +static int fs_path_copy(struct fs_path *p, struct fs_path *from) +{ + int ret; + + p->reversed = from->reversed; + fs_path_reset(p); + + ret = fs_path_add_path(p, from); + + return ret; +} + + +static void fs_path_unreverse(struct fs_path *p) +{ + char *tmp; + int len; + + if (!p->reversed) + return; + + tmp = p->start; + len = p->end - p->start; + p->start = p->buf; + p->end = p->start + len; + memmove(p->start, tmp, len + 1); + p->reversed = 0; +} + +static struct btrfs_path *alloc_path_for_send(void) +{ + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return NULL; + path->search_commit_root = 1; + path->skip_locking = 1; + return path; +} + +static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) +{ + int ret; + mm_segment_t old_fs; + u32 pos = 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + + while (pos < len) { + ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, + &sctx->send_off); + /* TODO handle that correctly */ + /*if (ret == -ERESTARTSYS) { + continue; + }*/ + if (ret < 0) + goto out; + if (ret == 0) { + ret = -EIO; + goto out; + } + pos += ret; + } + + ret = 0; + +out: + set_fs(old_fs); + return ret; +} + +static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) +{ + struct btrfs_tlv_header *hdr; + int total_len = sizeof(*hdr) + len; + int left = sctx->send_max_size - sctx->send_size; + + if (unlikely(left < total_len)) + return -EOVERFLOW; + + hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); + hdr->tlv_type = cpu_to_le16(attr); + hdr->tlv_len = cpu_to_le16(len); + memcpy(hdr + 1, data, len); + sctx->send_size += total_len; + + return 0; +} + +#if 0 +static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) +{ + return tlv_put(sctx, attr, &value, sizeof(value)); +} + +static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) +{ + __le16 tmp = cpu_to_le16(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} + +static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value) +{ + __le32 tmp = cpu_to_le32(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} +#endif + +static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) +{ + __le64 tmp = cpu_to_le64(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} + +static int tlv_put_string(struct send_ctx *sctx, u16 attr, + const char *str, int len) +{ + if (len == -1) + len = strlen(str); + return tlv_put(sctx, attr, str, len); +} + +static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, + const u8 *uuid) +{ + return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); +} + +#if 0 +static int tlv_put_timespec(struct send_ctx *sctx, u16 attr, + struct timespec *ts) +{ + struct btrfs_timespec bts; + bts.sec = cpu_to_le64(ts->tv_sec); + bts.nsec = cpu_to_le32(ts->tv_nsec); + return tlv_put(sctx, attr, &bts, sizeof(bts)); +} +#endif + +static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, + struct extent_buffer *eb, + struct btrfs_timespec *ts) +{ + struct btrfs_timespec bts; + read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); + return tlv_put(sctx, attr, &bts, sizeof(bts)); +} + + +#define TLV_PUT(sctx, attrtype, attrlen, data) \ + do { \ + ret = tlv_put(sctx, attrtype, attrlen, data); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +#define TLV_PUT_INT(sctx, attrtype, bits, value) \ + do { \ + ret = tlv_put_u##bits(sctx, attrtype, value); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) +#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) +#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) +#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) +#define TLV_PUT_STRING(sctx, attrtype, str, len) \ + do { \ + ret = tlv_put_string(sctx, attrtype, str, len); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_PATH(sctx, attrtype, p) \ + do { \ + ret = tlv_put_string(sctx, attrtype, p->start, \ + p->end - p->start); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while(0) +#define TLV_PUT_UUID(sctx, attrtype, uuid) \ + do { \ + ret = tlv_put_uuid(sctx, attrtype, uuid); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \ + do { \ + ret = tlv_put_timespec(sctx, attrtype, ts); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ + do { \ + ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +static int send_header(struct send_ctx *sctx) +{ + struct btrfs_stream_header hdr; + + strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); + hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); + + return write_buf(sctx, &hdr, sizeof(hdr)); +} + +/* + * For each command/item we want to send to userspace, we call this function. + */ +static int begin_cmd(struct send_ctx *sctx, int cmd) +{ + struct btrfs_cmd_header *hdr; + + if (!sctx->send_buf) { + WARN_ON(1); + return -EINVAL; + } + + BUG_ON(sctx->send_size); + + sctx->send_size += sizeof(*hdr); + hdr = (struct btrfs_cmd_header *)sctx->send_buf; + hdr->cmd = cpu_to_le16(cmd); + + return 0; +} + +static int send_cmd(struct send_ctx *sctx) +{ + int ret; + struct btrfs_cmd_header *hdr; + u32 crc; + + hdr = (struct btrfs_cmd_header *)sctx->send_buf; + hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); + hdr->crc = 0; + + crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); + hdr->crc = cpu_to_le32(crc); + + ret = write_buf(sctx, sctx->send_buf, sctx->send_size); + + sctx->total_send_size += sctx->send_size; + sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; + sctx->send_size = 0; + + return ret; +} + +/* + * Sends a move instruction to user space + */ +static int send_rename(struct send_ctx *sctx, + struct fs_path *from, struct fs_path *to) +{ + int ret; + +verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends a link instruction to user space + */ +static int send_link(struct send_ctx *sctx, + struct fs_path *path, struct fs_path *lnk) +{ + int ret; + +verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends an unlink instruction to user space + */ +static int send_unlink(struct send_ctx *sctx, struct fs_path *path) +{ + int ret; + +verbose_printk("btrfs: send_unlink %s\n", path->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends a rmdir instruction to user space + */ +static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) +{ + int ret; + +verbose_printk("btrfs: send_rmdir %s\n", path->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Helper function to retrieve some fields from an inode item. + */ +static int get_inode_info(struct btrfs_root *root, + u64 ino, u64 *size, u64 *gen, + u64 *mode, u64 *uid, u64 *gid) +{ + int ret; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_path *path; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret) { + ret = -ENOENT; + goto out; + } + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + if (size) + *size = btrfs_inode_size(path->nodes[0], ii); + if (gen) + *gen = btrfs_inode_generation(path->nodes[0], ii); + if (mode) + *mode = btrfs_inode_mode(path->nodes[0], ii); + if (uid) + *uid = btrfs_inode_uid(path->nodes[0], ii); + if (gid) + *gid = btrfs_inode_gid(path->nodes[0], ii); + +out: + btrfs_free_path(path); + return ret; +} + +typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, + struct fs_path *p, + void *ctx); + +/* + * Helper function to iterate the entries in ONE btrfs_inode_ref. + * The iterate callback may return a non zero value to stop iteration. This can + * be a negative value for error codes or 1 to simply stop it. + * + * path must point to the INODE_REF when called. + */ +static int iterate_inode_ref(struct send_ctx *sctx, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *found_key, int resolve, + iterate_inode_ref_t iterate, void *ctx) +{ + struct extent_buffer *eb; + struct btrfs_item *item; + struct btrfs_inode_ref *iref; + struct btrfs_path *tmp_path; + struct fs_path *p; + u32 cur; + u32 len; + u32 total; + int slot; + u32 name_len; + char *start; + int ret = 0; + int num; + int index; + + p = fs_path_alloc_reversed(sctx); + if (!p) + return -ENOMEM; + + tmp_path = alloc_path_for_send(); + if (!tmp_path) { + fs_path_free(sctx, p); + return -ENOMEM; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item = btrfs_item_nr(eb, slot); + iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); + cur = 0; + len = 0; + total = btrfs_item_size(eb, item); + + num = 0; + while (cur < total) { + fs_path_reset(p); + + name_len = btrfs_inode_ref_name_len(eb, iref); + index = btrfs_inode_ref_index(eb, iref); + if (resolve) { + start = btrfs_iref_to_path(root, tmp_path, iref, eb, + found_key->offset, p->buf, + p->buf_len); + if (IS_ERR(start)) { + ret = PTR_ERR(start); + goto out; + } + if (start < p->buf) { + /* overflow , try again with larger buffer */ + ret = fs_path_ensure_buf(p, + p->buf_len + p->buf - start); + if (ret < 0) + goto out; + start = btrfs_iref_to_path(root, tmp_path, iref, + eb, found_key->offset, p->buf, + p->buf_len); + if (IS_ERR(start)) { + ret = PTR_ERR(start); + goto out; + } + BUG_ON(start < p->buf); + } + p->start = start; + } else { + ret = fs_path_add_from_extent_buffer(p, eb, + (unsigned long)(iref + 1), name_len); + if (ret < 0) + goto out; + } + + + len = sizeof(*iref) + name_len; + iref = (struct btrfs_inode_ref *)((char *)iref + len); + cur += len; + + ret = iterate(num, found_key->offset, index, p, ctx); + if (ret) + goto out; + + num++; + } + +out: + btrfs_free_path(tmp_path); + fs_path_free(sctx, p); + return ret; +} + +typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx); + +/* + * Helper function to iterate the entries in ONE btrfs_dir_item. + * The iterate callback may return a non zero value to stop iteration. This can + * be a negative value for error codes or 1 to simply stop it. + * + * path must point to the dir item when called. + */ +static int iterate_dir_item(struct send_ctx *sctx, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *found_key, + iterate_dir_item_t iterate, void *ctx) +{ + int ret = 0; + struct extent_buffer *eb; + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_path *tmp_path = NULL; + struct btrfs_key di_key; + char *buf = NULL; + char *buf2 = NULL; + int buf_len; + int buf_virtual = 0; + u32 name_len; + u32 data_len; + u32 cur; + u32 len; + u32 total; + int slot; + int num; + u8 type; + + buf_len = PAGE_SIZE; + buf = kmalloc(buf_len, GFP_NOFS); + if (!buf) { + ret = -ENOMEM; + goto out; + } + + tmp_path = alloc_path_for_send(); + if (!tmp_path) { + ret = -ENOMEM; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item = btrfs_item_nr(eb, slot); + di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); + cur = 0; + len = 0; + total = btrfs_item_size(eb, item); + + num = 0; + while (cur < total) { + name_len = btrfs_dir_name_len(eb, di); + data_len = btrfs_dir_data_len(eb, di); + type = btrfs_dir_type(eb, di); + btrfs_dir_item_key_to_cpu(eb, di, &di_key); + + if (name_len + data_len > buf_len) { + buf_len = PAGE_ALIGN(name_len + data_len); + if (buf_virtual) { + buf2 = vmalloc(buf_len); + if (!buf2) { + ret = -ENOMEM; + goto out; + } + vfree(buf); + } else { + buf2 = krealloc(buf, buf_len, GFP_NOFS); + if (!buf2) { + buf2 = vmalloc(buf_len); + if (!buf2) { + ret = -ENOMEM; + goto out; + } + kfree(buf); + buf_virtual = 1; + } + } + + buf = buf2; + buf2 = NULL; + } + + read_extent_buffer(eb, buf, (unsigned long)(di + 1), + name_len + data_len); + + len = sizeof(*di) + name_len + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + + ret = iterate(num, &di_key, buf, name_len, buf + name_len, + data_len, type, ctx); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + + num++; + } + +out: + btrfs_free_path(tmp_path); + if (buf_virtual) + vfree(buf); + else + kfree(buf); + return ret; +} + +static int __copy_first_ref(int num, u64 dir, int index, + struct fs_path *p, void *ctx) +{ + int ret; + struct fs_path *pt = ctx; + + ret = fs_path_copy(pt, p); + if (ret < 0) + return ret; + + /* we want the first only */ + return 1; +} + +/* + * Retrieve the first path of an inode. If an inode has more then one + * ref/hardlink, this is ignored. + */ +static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, + u64 ino, struct fs_path *path) +{ + int ret; + struct btrfs_key key, found_key; + struct btrfs_path *p; + + p = alloc_path_for_send(); + if (!p) + return -ENOMEM; + + fs_path_reset(path); + + key.objectid = ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); + if (ret < 0) + goto out; + if (ret) { + ret = 1; + goto out; + } + btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); + if (found_key.objectid != ino || + found_key.type != BTRFS_INODE_REF_KEY) { + ret = -ENOENT; + goto out; + } + + ret = iterate_inode_ref(sctx, root, p, &found_key, 1, + __copy_first_ref, path); + if (ret < 0) + goto out; + ret = 0; + +out: + btrfs_free_path(p); + return ret; +} + +struct backref_ctx { + struct send_ctx *sctx; + + /* number of total found references */ + u64 found; + + /* + * used for clones found in send_root. clones found behind cur_objectid + * and cur_offset are not considered as allowed clones. + */ + u64 cur_objectid; + u64 cur_offset; + + /* may be truncated in case it's the last extent in a file */ + u64 extent_len; + + /* Just to check for bugs in backref resolving */ + int found_in_send_root; +}; + +static int __clone_root_cmp_bsearch(const void *key, const void *elt) +{ + u64 root = (u64)key; + struct clone_root *cr = (struct clone_root *)elt; + + if (root < cr->root->objectid) + return -1; + if (root > cr->root->objectid) + return 1; + return 0; +} + +static int __clone_root_cmp_sort(const void *e1, const void *e2) +{ + struct clone_root *cr1 = (struct clone_root *)e1; + struct clone_root *cr2 = (struct clone_root *)e2; + + if (cr1->root->objectid < cr2->root->objectid) + return -1; + if (cr1->root->objectid > cr2->root->objectid) + return 1; + return 0; +} + +/* + * Called for every backref that is found for the current extent. + */ +static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) +{ + struct backref_ctx *bctx = ctx_; + struct clone_root *found; + int ret; + u64 i_size; + + /* First check if the root is in the list of accepted clone sources */ + found = bsearch((void *)root, bctx->sctx->clone_roots, + bctx->sctx->clone_roots_cnt, + sizeof(struct clone_root), + __clone_root_cmp_bsearch); + if (!found) + return 0; + + if (found->root == bctx->sctx->send_root && + ino == bctx->cur_objectid && + offset == bctx->cur_offset) { + bctx->found_in_send_root = 1; + } + + /* + * There are inodes that have extents that lie behind it's i_size. Don't + * accept clones from these extents. + */ + ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); + if (ret < 0) + return ret; + + if (offset + bctx->extent_len > i_size) + return 0; + + /* + * Make sure we don't consider clones from send_root that are + * behind the current inode/offset. + */ + if (found->root == bctx->sctx->send_root) { + /* + * TODO for the moment we don't accept clones from the inode + * that is currently send. We may change this when + * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same + * file. + */ + if (ino >= bctx->cur_objectid) + return 0; + /*if (ino > ctx->cur_objectid) + return 0; + if (offset + ctx->extent_len > ctx->cur_offset) + return 0;*/ + + bctx->found++; + found->found_refs++; + found->ino = ino; + found->offset = offset; + return 0; + } + + bctx->found++; + found->found_refs++; + if (ino < found->ino) { + found->ino = ino; + found->offset = offset; + } else if (found->ino == ino) { + /* + * same extent found more then once in the same file. + */ + if (found->offset > offset + bctx->extent_len) + found->offset = offset; + } + + return 0; +} + +/* + * path must point to the extent item when called. + */ +static int find_extent_clone(struct send_ctx *sctx, + struct btrfs_path *path, + u64 ino, u64 data_offset, + u64 ino_size, + struct clone_root **found) +{ + int ret; + int extent_type; + u64 logical; + u64 num_bytes; + u64 extent_item_pos; + struct btrfs_file_extent_item *fi; + struct extent_buffer *eb = path->nodes[0]; + struct backref_ctx backref_ctx; + struct clone_root *cur_clone_root; + struct btrfs_key found_key; + struct btrfs_path *tmp_path; + u32 i; + + tmp_path = alloc_path_for_send(); + if (!tmp_path) + return -ENOMEM; + + if (data_offset >= ino_size) { + /* + * There may be extents that lie behind the file's size. + * I at least had this in combination with snapshotting while + * writing large files. + */ + ret = 0; + goto out; + } + + fi = btrfs_item_ptr(eb, path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(eb, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + ret = -ENOENT; + goto out; + } + + num_bytes = btrfs_file_extent_num_bytes(eb, fi); + logical = btrfs_file_extent_disk_bytenr(eb, fi); + if (logical == 0) { + ret = -ENOENT; + goto out; + } + logical += btrfs_file_extent_offset(eb, fi); + + ret = extent_from_logical(sctx->send_root->fs_info, + logical, tmp_path, &found_key); + btrfs_release_path(tmp_path); + + if (ret < 0) + goto out; + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + ret = -EIO; + goto out; + } + + /* + * Setup the clone roots. + */ + for (i = 0; i < sctx->clone_roots_cnt; i++) { + cur_clone_root = sctx->clone_roots + i; + cur_clone_root->ino = (u64)-1; + cur_clone_root->offset = 0; + cur_clone_root->found_refs = 0; + } + + backref_ctx.sctx = sctx; + backref_ctx.found = 0; + backref_ctx.cur_objectid = ino; + backref_ctx.cur_offset = data_offset; + backref_ctx.found_in_send_root = 0; + backref_ctx.extent_len = num_bytes; + + /* + * The last extent of a file may be too large due to page alignment. + * We need to adjust extent_len in this case so that the checks in + * __iterate_backrefs work. + */ + if (data_offset + num_bytes >= ino_size) + backref_ctx.extent_len = ino_size - data_offset; + + /* + * Now collect all backrefs. + */ + extent_item_pos = logical - found_key.objectid; + ret = iterate_extent_inodes(sctx->send_root->fs_info, + found_key.objectid, extent_item_pos, 1, + __iterate_backrefs, &backref_ctx); + if (ret < 0) + goto out; + + if (!backref_ctx.found_in_send_root) { + /* found a bug in backref code? */ + ret = -EIO; + printk(KERN_ERR "btrfs: ERROR did not find backref in " + "send_root. inode=%llu, offset=%llu, " + "logical=%llu\n", + ino, data_offset, logical); + goto out; + } + +verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " + "ino=%llu, " + "num_bytes=%llu, logical=%llu\n", + data_offset, ino, num_bytes, logical); + + if (!backref_ctx.found) + verbose_printk("btrfs: no clones found\n"); + + cur_clone_root = NULL; + for (i = 0; i < sctx->clone_roots_cnt; i++) { + if (sctx->clone_roots[i].found_refs) { + if (!cur_clone_root) + cur_clone_root = sctx->clone_roots + i; + else if (sctx->clone_roots[i].root == sctx->send_root) + /* prefer clones from send_root over others */ + cur_clone_root = sctx->clone_roots + i; + break; + } + + } + + if (cur_clone_root) { + *found = cur_clone_root; + ret = 0; + } else { + ret = -ENOENT; + } + +out: + btrfs_free_path(tmp_path); + return ret; +} + +static int read_symlink(struct send_ctx *sctx, + struct btrfs_root *root, + u64 ino, + struct fs_path *dest) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_file_extent_item *ei; + u8 type; + u8 compression; + unsigned long off; + int len; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret); + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], ei); + compression = btrfs_file_extent_compression(path->nodes[0], ei); + BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); + BUG_ON(compression); + + off = btrfs_file_extent_inline_start(ei); + len = btrfs_file_extent_inline_len(path->nodes[0], ei); + + ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); + if (ret < 0) + goto out; + +out: + btrfs_free_path(path); + return ret; +} + +/* + * Helper function to generate a file name that is unique in the root of + * send_root and parent_root. This is used to generate names for orphan inodes. + */ +static int gen_unique_name(struct send_ctx *sctx, + u64 ino, u64 gen, + struct fs_path *dest) +{ + int ret = 0; + struct btrfs_path *path; + struct btrfs_dir_item *di; + char tmp[64]; + int len; + u64 idx = 0; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + while (1) { + len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", + ino, gen, idx); + if (len >= sizeof(tmp)) { + /* should really not happen */ + ret = -EOVERFLOW; + goto out; + } + + di = btrfs_lookup_dir_item(NULL, sctx->send_root, + path, BTRFS_FIRST_FREE_OBJECTID, + tmp, strlen(tmp), 0); + btrfs_release_path(path); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + if (di) { + /* not unique, try again */ + idx++; + continue; + } + + if (!sctx->parent_root) { + /* unique */ + ret = 0; + break; + } + + di = btrfs_lookup_dir_item(NULL, sctx->parent_root, + path, BTRFS_FIRST_FREE_OBJECTID, + tmp, strlen(tmp), 0); + btrfs_release_path(path); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + if (di) { + /* not unique, try again */ + idx++; + continue; + } + /* unique */ + break; + } + + ret = fs_path_add(dest, tmp, strlen(tmp)); + +out: + btrfs_free_path(path); + return ret; +} + +enum inode_state { + inode_state_no_change, + inode_state_will_create, + inode_state_did_create, + inode_state_will_delete, + inode_state_did_delete, +}; + +static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) +{ + int ret; + int left_ret; + int right_ret; + u64 left_gen; + u64 right_gen; + + ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, + NULL); + if (ret < 0 && ret != -ENOENT) + goto out; + left_ret = ret; + + if (!sctx->parent_root) { + right_ret = -ENOENT; + } else { + ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, + NULL, NULL, NULL); + if (ret < 0 && ret != -ENOENT) + goto out; + right_ret = ret; + } + + if (!left_ret && !right_ret) { + if (left_gen == gen && right_gen == gen) + ret = inode_state_no_change; + else if (left_gen == gen) { + if (ino < sctx->send_progress) + ret = inode_state_did_create; + else + ret = inode_state_will_create; + } else if (right_gen == gen) { + if (ino < sctx->send_progress) + ret = inode_state_did_delete; + else + ret = inode_state_will_delete; + } else { + ret = -ENOENT; + } + } else if (!left_ret) { + if (left_gen == gen) { + if (ino < sctx->send_progress) + ret = inode_state_did_create; + else + ret = inode_state_will_create; + } else { + ret = -ENOENT; + } + } else if (!right_ret) { + if (right_gen == gen) { + if (ino < sctx->send_progress) + ret = inode_state_did_delete; + else + ret = inode_state_will_delete; + } else { + ret = -ENOENT; + } + } else { + ret = -ENOENT; + } + +out: + return ret; +} + +static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) +{ + int ret; + + ret = get_cur_inode_state(sctx, ino, gen); + if (ret < 0) + goto out; + + if (ret == inode_state_no_change || + ret == inode_state_did_create || + ret == inode_state_will_delete) + ret = 1; + else + ret = 0; + +out: + return ret; +} + +/* + * Helper function to lookup a dir item in a dir. + */ +static int lookup_dir_item_inode(struct btrfs_root *root, + u64 dir, const char *name, int name_len, + u64 *found_inode, + u8 *found_type) +{ + int ret = 0; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path *path; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + di = btrfs_lookup_dir_item(NULL, root, path, + dir, name, name_len, 0); + if (!di) { + ret = -ENOENT; + goto out; + } + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + *found_inode = key.objectid; + *found_type = btrfs_dir_type(path->nodes[0], di); + +out: + btrfs_free_path(path); + return ret; +} + +static int get_first_ref(struct send_ctx *sctx, + struct btrfs_root *root, u64 ino, + u64 *dir, u64 *dir_gen, struct fs_path *name) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_path *path; + struct btrfs_inode_ref *iref; + int len; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); + if (ret < 0) + goto out; + if (!ret) + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (ret || found_key.objectid != key.objectid || + found_key.type != key.type) { + ret = -ENOENT; + goto out; + } + + iref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(path->nodes[0], iref); + ret = fs_path_add_from_extent_buffer(name, path->nodes[0], + (unsigned long)(iref + 1), len); + if (ret < 0) + goto out; + btrfs_release_path(path); + + ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, + NULL); + if (ret < 0) + goto out; + + *dir = found_key.offset; + +out: + btrfs_free_path(path); + return ret; +} + +static int is_first_ref(struct send_ctx *sctx, + struct btrfs_root *root, + u64 ino, u64 dir, + const char *name, int name_len) +{ + int ret; + struct fs_path *tmp_name; + u64 tmp_dir; + u64 tmp_dir_gen; + + tmp_name = fs_path_alloc(sctx); + if (!tmp_name) + return -ENOMEM; + + ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); + if (ret < 0) + goto out; + + if (name_len != fs_path_len(tmp_name)) { + ret = 0; + goto out; + } + + ret = memcmp(tmp_name->start, name, name_len); + if (ret) + ret = 0; + else + ret = 1; + +out: + fs_path_free(sctx, tmp_name); + return ret; +} + +static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, + const char *name, int name_len, + u64 *who_ino, u64 *who_gen) +{ + int ret = 0; + u64 other_inode = 0; + u8 other_type = 0; + + if (!sctx->parent_root) + goto out; + + ret = is_inode_existent(sctx, dir, dir_gen); + if (ret <= 0) + goto out; + + ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, + &other_inode, &other_type); + if (ret < 0 && ret != -ENOENT) + goto out; + if (ret) { + ret = 0; + goto out; + } + + if (other_inode > sctx->send_progress) { + ret = get_inode_info(sctx->parent_root, other_inode, NULL, + who_gen, NULL, NULL, NULL); + if (ret < 0) + goto out; + + ret = 1; + *who_ino = other_inode; + } else { + ret = 0; + } + +out: + return ret; +} + +static int did_overwrite_ref(struct send_ctx *sctx, + u64 dir, u64 dir_gen, + u64 ino, u64 ino_gen, + const char *name, int name_len) +{ + int ret = 0; + u64 gen; + u64 ow_inode; + u8 other_type; + + if (!sctx->parent_root) + goto out; + + ret = is_inode_existent(sctx, dir, dir_gen); + if (ret <= 0) + goto out; + + /* check if the ref was overwritten by another ref */ + ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, + &ow_inode, &other_type); + if (ret < 0 && ret != -ENOENT) + goto out; + if (ret) { + /* was never and will never be overwritten */ + ret = 0; + goto out; + } + + ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, + NULL); + if (ret < 0) + goto out; + + if (ow_inode == ino && gen == ino_gen) { + ret = 0; + goto out; + } + + /* we know that it is or will be overwritten. check this now */ + if (ow_inode < sctx->send_progress) + ret = 1; + else + ret = 0; + +out: + return ret; +} + +static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) +{ + int ret = 0; + struct fs_path *name = NULL; + u64 dir; + u64 dir_gen; + + if (!sctx->parent_root) + goto out; + + name = fs_path_alloc(sctx); + if (!name) + return -ENOMEM; + + ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name); + if (ret < 0) + goto out; + + ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, + name->start, fs_path_len(name)); + if (ret < 0) + goto out; + +out: + fs_path_free(sctx, name); + return ret; +} + +static int name_cache_insert(struct send_ctx *sctx, + struct name_cache_entry *nce) +{ + int ret = 0; + struct name_cache_entry **ncea; + + ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); + if (ncea) { + if (!ncea[0]) + ncea[0] = nce; + else if (!ncea[1]) + ncea[1] = nce; + else + BUG(); + } else { + ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); + if (!ncea) + return -ENOMEM; + + ncea[0] = nce; + ncea[1] = NULL; + ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); + if (ret < 0) + return ret; + } + list_add_tail(&nce->list, &sctx->name_cache_list); + sctx->name_cache_size++; + + return ret; +} + +static void name_cache_delete(struct send_ctx *sctx, + struct name_cache_entry *nce) +{ + struct name_cache_entry **ncea; + + ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); + BUG_ON(!ncea); + + if (ncea[0] == nce) + ncea[0] = NULL; + else if (ncea[1] == nce) + ncea[1] = NULL; + else + BUG(); + + if (!ncea[0] && !ncea[1]) { + radix_tree_delete(&sctx->name_cache, nce->ino); + kfree(ncea); + } + + list_del(&nce->list); + + sctx->name_cache_size--; +} + +static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, + u64 ino, u64 gen) +{ + struct name_cache_entry **ncea; + + ncea = radix_tree_lookup(&sctx->name_cache, ino); + if (!ncea) + return NULL; + + if (ncea[0] && ncea[0]->gen == gen) + return ncea[0]; + else if (ncea[1] && ncea[1]->gen == gen) + return ncea[1]; + return NULL; +} + +static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) +{ + list_del(&nce->list); + list_add_tail(&nce->list, &sctx->name_cache_list); +} + +static void name_cache_clean_unused(struct send_ctx *sctx) +{ + struct name_cache_entry *nce; + + if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) + return; + + while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { + nce = list_entry(sctx->name_cache_list.next, + struct name_cache_entry, list); + name_cache_delete(sctx, nce); + kfree(nce); + } +} + +static void name_cache_free(struct send_ctx *sctx) +{ + struct name_cache_entry *nce; + struct name_cache_entry *tmp; + + list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) { + name_cache_delete(sctx, nce); + } +} + +static int __get_cur_name_and_parent(struct send_ctx *sctx, + u64 ino, u64 gen, + u64 *parent_ino, + u64 *parent_gen, + struct fs_path *dest) +{ + int ret; + int nce_ret; + struct btrfs_path *path = NULL; + struct name_cache_entry *nce = NULL; + + nce = name_cache_search(sctx, ino, gen); + if (nce) { + if (ino < sctx->send_progress && nce->need_later_update) { + name_cache_delete(sctx, nce); + kfree(nce); + nce = NULL; + } else { + name_cache_used(sctx, nce); + *parent_ino = nce->parent_ino; + *parent_gen = nce->parent_gen; + ret = fs_path_add(dest, nce->name, nce->name_len); + if (ret < 0) + goto out; + ret = nce->ret; + goto out; + } + } + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + ret = is_inode_existent(sctx, ino, gen); + if (ret < 0) + goto out; + + if (!ret) { + ret = gen_unique_name(sctx, ino, gen, dest); + if (ret < 0) + goto out; + ret = 1; + goto out_cache; + } + + if (ino < sctx->send_progress) + ret = get_first_ref(sctx, sctx->send_root, ino, + parent_ino, parent_gen, dest); + else + ret = get_first_ref(sctx, sctx->parent_root, ino, + parent_ino, parent_gen, dest); + if (ret < 0) + goto out; + + ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, + dest->start, dest->end - dest->start); + if (ret < 0) + goto out; + if (ret) { + fs_path_reset(dest); + ret = gen_unique_name(sctx, ino, gen, dest); + if (ret < 0) + goto out; + ret = 1; + } + +out_cache: + nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); + if (!nce) { + ret = -ENOMEM; + goto out; + } + + nce->ino = ino; + nce->gen = gen; + nce->parent_ino = *parent_ino; + nce->parent_gen = *parent_gen; + nce->name_len = fs_path_len(dest); + nce->ret = ret; + strcpy(nce->name, dest->start); + memset(&nce->use_list, 0, sizeof(nce->use_list)); + + if (ino < sctx->send_progress) + nce->need_later_update = 0; + else + nce->need_later_update = 1; + + nce_ret = name_cache_insert(sctx, nce); + if (nce_ret < 0) + ret = nce_ret; + name_cache_clean_unused(sctx); + +out: + btrfs_free_path(path); + return ret; +} + +/* + * Magic happens here. This function returns the first ref to an inode as it + * would look like while receiving the stream at this point in time. + * We walk the path up to the root. For every inode in between, we check if it + * was already processed/sent. If yes, we continue with the parent as found + * in send_root. If not, we continue with the parent as found in parent_root. + * If we encounter an inode that was deleted at this point in time, we use the + * inodes "orphan" name instead of the real name and stop. Same with new inodes + * that were not created yet and overwritten inodes/refs. + * + * When do we have have orphan inodes: + * 1. When an inode is freshly created and thus no valid refs are available yet + * 2. When a directory lost all it's refs (deleted) but still has dir items + * inside which were not processed yet (pending for move/delete). If anyone + * tried to get the path to the dir items, it would get a path inside that + * orphan directory. + * 3. When an inode is moved around or gets new links, it may overwrite the ref + * of an unprocessed inode. If in that case the first ref would be + * overwritten, the overwritten inode gets "orphanized". Later when we + * process this overwritten inode, it is restored at a new place by moving + * the orphan inode. + * + * sctx->send_progress tells this function at which point in time receiving + * would be. + */ +static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, + struct fs_path *dest) +{ + int ret = 0; + struct fs_path *name = NULL; + u64 parent_inode = 0; + u64 parent_gen = 0; + int stop = 0; + + name = fs_path_alloc(sctx); + if (!name) { + ret = -ENOMEM; + goto out; + } + + dest->reversed = 1; + fs_path_reset(dest); + + while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { + fs_path_reset(name); + + ret = __get_cur_name_and_parent(sctx, ino, gen, + &parent_inode, &parent_gen, name); + if (ret < 0) + goto out; + if (ret) + stop = 1; + + ret = fs_path_add_path(dest, name); + if (ret < 0) + goto out; + + ino = parent_inode; + gen = parent_gen; + } + +out: + fs_path_free(sctx, name); + if (!ret) + fs_path_unreverse(dest); + return ret; +} + +/* + * Called for regular files when sending extents data. Opens a struct file + * to read from the file. + */ +static int open_cur_inode_file(struct send_ctx *sctx) +{ + int ret = 0; + struct btrfs_key key; + struct vfsmount *mnt; + struct inode *inode; + struct dentry *dentry; + struct file *filp; + int new = 0; + + if (sctx->cur_inode_filp) + goto out; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root, + &new); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + dentry = d_obtain_alias(inode); + inode = NULL; + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + mnt = mntget(sctx->mnt); + filp = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE, current_cred()); + dentry = NULL; + mnt = NULL; + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto out; + } + sctx->cur_inode_filp = filp; + +out: + /* + * no xxxput required here as every vfs op + * does it by itself on failure + */ + return ret; +} + +/* + * Closes the struct file that was created in open_cur_inode_file + */ +static int close_cur_inode_file(struct send_ctx *sctx) +{ + int ret = 0; + + if (!sctx->cur_inode_filp) + goto out; + + ret = filp_close(sctx->cur_inode_filp, NULL); + sctx->cur_inode_filp = NULL; + +out: + return ret; +} + +/* + * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace + */ +static int send_subvol_begin(struct send_ctx *sctx) +{ + int ret; + struct btrfs_root *send_root = sctx->send_root; + struct btrfs_root *parent_root = sctx->parent_root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; + char *name = NULL; + int namelen; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); + if (!name) { + btrfs_free_path(path); + return -ENOMEM; + } + + key.objectid = send_root->objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = 0; + + ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, + &key, path, 1, 0); + if (ret < 0) + goto out; + if (ret) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_ROOT_BACKREF_KEY || + key.objectid != send_root->objectid) { + ret = -ENOENT; + goto out; + } + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + namelen = btrfs_root_ref_name_len(leaf, ref); + read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); + btrfs_release_path(path); + + if (ret < 0) + goto out; + + if (parent_root) { + ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); + if (ret < 0) + goto out; + } else { + ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); + if (ret < 0) + goto out; + } + + TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, + sctx->send_root->root_item.uuid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, + sctx->send_root->root_item.ctransid); + if (parent_root) { + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + sctx->parent_root->root_item.uuid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, + sctx->parent_root->root_item.ctransid); + } + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + btrfs_free_path(path); + kfree(name); + return ret; +} + +static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) +{ + int ret = 0; + struct fs_path *p; + +verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, ino, gen, p); + if (ret < 0) + goto out; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + +static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) +{ + int ret = 0; + struct fs_path *p; + +verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, ino, gen, p); + if (ret < 0) + goto out; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + +static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) +{ + int ret = 0; + struct fs_path *p; + +verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, ino, gen, p); + if (ret < 0) + goto out; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + +static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) +{ + int ret = 0; + struct fs_path *p = NULL; + struct btrfs_inode_item *ii; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + struct btrfs_key key; + int slot; + +verbose_printk("btrfs: send_utimes %llu\n", ino); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + path = alloc_path_for_send(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + eb = path->nodes[0]; + slot = path->slots[0]; + ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + + ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, ino, gen, p); + if (ret < 0) + goto out; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, + btrfs_inode_atime(ii)); + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, + btrfs_inode_mtime(ii)); + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, + btrfs_inode_ctime(ii)); + /* TODO otime? */ + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + btrfs_free_path(path); + return ret; +} + +/* + * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have + * a valid path yet because we did not process the refs yet. So, the inode + * is created as orphan. + */ +static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, + struct btrfs_key *key) +{ + int ret = 0; + struct extent_buffer *eb = path->nodes[0]; + struct btrfs_inode_item *ii; + struct fs_path *p; + int slot = path->slots[0]; + int cmd; + u64 mode; + +verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + mode = btrfs_inode_mode(eb, ii); + + if (S_ISREG(mode)) + cmd = BTRFS_SEND_C_MKFILE; + else if (S_ISDIR(mode)) + cmd = BTRFS_SEND_C_MKDIR; + else if (S_ISLNK(mode)) + cmd = BTRFS_SEND_C_SYMLINK; + else if (S_ISCHR(mode) || S_ISBLK(mode)) + cmd = BTRFS_SEND_C_MKNOD; + else if (S_ISFIFO(mode)) + cmd = BTRFS_SEND_C_MKFIFO; + else if (S_ISSOCK(mode)) + cmd = BTRFS_SEND_C_MKSOCK; + else { + printk(KERN_WARNING "btrfs: unexpected inode type %o", + (int)(mode & S_IFMT)); + ret = -ENOTSUPP; + goto out; + } + + ret = begin_cmd(sctx, cmd); + if (ret < 0) + goto out; + + ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino); + + if (S_ISLNK(mode)) { + fs_path_reset(p); + ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); + if (ret < 0) + goto out; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); + } else if (S_ISCHR(mode) || S_ISBLK(mode) || + S_ISFIFO(mode) || S_ISSOCK(mode)) { + TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); + } + + ret = send_cmd(sctx); + if (ret < 0) + goto out; + + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + +struct recorded_ref { + struct list_head list; + char *dir_path; + char *name; + struct fs_path *full_path; + u64 dir; + u64 dir_gen; + int dir_path_len; + int name_len; +}; + +/* + * We need to process new refs before deleted refs, but compare_tree gives us + * everything mixed. So we first record all refs and later process them. + * This function is a helper to record one ref. + */ +static int record_ref(struct list_head *head, u64 dir, + u64 dir_gen, struct fs_path *path) +{ + struct recorded_ref *ref; + char *tmp; + + ref = kmalloc(sizeof(*ref), GFP_NOFS); + if (!ref) + return -ENOMEM; + + ref->dir = dir; + ref->dir_gen = dir_gen; + ref->full_path = path; + + tmp = strrchr(ref->full_path->start, '/'); + if (!tmp) { + ref->name_len = ref->full_path->end - ref->full_path->start; + ref->name = ref->full_path->start; + ref->dir_path_len = 0; + ref->dir_path = ref->full_path->start; + } else { + tmp++; + ref->name_len = ref->full_path->end - tmp; + ref->name = tmp; + ref->dir_path = ref->full_path->start; + ref->dir_path_len = ref->full_path->end - + ref->full_path->start - 1 - ref->name_len; + } + + list_add_tail(&ref->list, head); + return 0; +} + +static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) +{ + struct recorded_ref *cur; + struct recorded_ref *tmp; + + list_for_each_entry_safe(cur, tmp, head, list) { + fs_path_free(sctx, cur->full_path); + kfree(cur); + } + INIT_LIST_HEAD(head); +} + +static void free_recorded_refs(struct send_ctx *sctx) +{ + __free_recorded_refs(sctx, &sctx->new_refs); + __free_recorded_refs(sctx, &sctx->deleted_refs); +} + +/* + * Renames/moves a file/dir to it's orphan name. Used when the first + * ref of an unprocessed inode gets overwritten and for all non empty + * directories. + */ +static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, + struct fs_path *path) +{ + int ret; + struct fs_path *orphan; + + orphan = fs_path_alloc(sctx); + if (!orphan) + return -ENOMEM; + + ret = gen_unique_name(sctx, ino, gen, orphan); + if (ret < 0) + goto out; + + ret = send_rename(sctx, path, orphan); + +out: + fs_path_free(sctx, orphan); + return ret; +} + +/* + * Returns 1 if a directory can be removed at this point in time. + * We check this by iterating all dir items and checking if the inode behind + * the dir item was already processed. + */ +static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) +{ + int ret = 0; + struct btrfs_root *root = sctx->parent_root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_key loc; + struct btrfs_dir_item *di; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = dir; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = 0; + + while (1) { + ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); + if (ret < 0) + goto out; + if (!ret) { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + } + if (ret || found_key.objectid != key.objectid || + found_key.type != key.type) { + break; + } + + di = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); + + if (loc.objectid > send_progress) { + ret = 0; + goto out; + } + + btrfs_release_path(path); + key.offset = found_key.offset + 1; + } + + ret = 1; + +out: + btrfs_free_path(path); + return ret; +} + +struct finish_unordered_dir_ctx { + struct send_ctx *sctx; + struct fs_path *cur_path; + struct fs_path *dir_path; + u64 dir_ino; + int need_delete; + int delete_pass; +}; + +int __finish_unordered_dir(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx) +{ + int ret = 0; + struct finish_unordered_dir_ctx *fctx = ctx; + struct send_ctx *sctx = fctx->sctx; + u64 di_gen; + u64 di_mode; + int is_orphan = 0; + + if (di_key->objectid >= fctx->dir_ino) + goto out; + + fs_path_reset(fctx->cur_path); + + ret = get_inode_info(sctx->send_root, di_key->objectid, + NULL, &di_gen, &di_mode, NULL, NULL); + if (ret < 0) + goto out; + + ret = is_first_ref(sctx, sctx->send_root, di_key->objectid, + fctx->dir_ino, name, name_len); + if (ret < 0) + goto out; + if (ret) { + is_orphan = 1; + ret = gen_unique_name(sctx, di_key->objectid, di_gen, + fctx->cur_path); + } else { + ret = get_cur_path(sctx, di_key->objectid, di_gen, + fctx->cur_path); + } + if (ret < 0) + goto out; + + ret = fs_path_add(fctx->dir_path, name, name_len); + if (ret < 0) + goto out; + + if (!fctx->delete_pass) { + if (S_ISDIR(di_mode)) { + ret = send_rename(sctx, fctx->cur_path, + fctx->dir_path); + } else { + ret = send_link(sctx, fctx->dir_path, + fctx->cur_path); + if (is_orphan) + fctx->need_delete = 1; + } + } else if (!S_ISDIR(di_mode)) { + ret = send_unlink(sctx, fctx->cur_path); + } else { + ret = 0; + } + + fs_path_remove(fctx->dir_path); + +out: + return ret; +} + +/* + * Go through all dir items and see if we find refs which could not be created + * in the past because the dir did not exist at that time. + */ +static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen) +{ + int ret = 0; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *eb; + struct finish_unordered_dir_ctx fctx; + int slot; + + path = alloc_path_for_send(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + memset(&fctx, 0, sizeof(fctx)); + fctx.sctx = sctx; + fctx.cur_path = fs_path_alloc(sctx); + fctx.dir_path = fs_path_alloc(sctx); + if (!fctx.cur_path || !fctx.dir_path) { + ret = -ENOMEM; + goto out; + } + fctx.dir_ino = dir; + + ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path); + if (ret < 0) + goto out; + + /* + * We do two passes. The first links in the new refs and the second + * deletes orphans if required. Deletion of orphans is not required for + * directory inodes, as we always have only one ref and use rename + * instead of link for those. + */ + +again: + key.objectid = dir; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = 0; + while (1) { + ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, + 1, 0); + if (ret < 0) + goto out; + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + + if (found_key.objectid != key.objectid || + found_key.type != key.type) { + btrfs_release_path(path); + break; + } + + ret = iterate_dir_item(sctx, sctx->send_root, path, + &found_key, __finish_unordered_dir, + &fctx); + if (ret < 0) + goto out; + + key.offset = found_key.offset + 1; + btrfs_release_path(path); + } + + if (!fctx.delete_pass && fctx.need_delete) { + fctx.delete_pass = 1; + goto again; + } + +out: + btrfs_free_path(path); + fs_path_free(sctx, fctx.cur_path); + fs_path_free(sctx, fctx.dir_path); + return ret; +} + +/* + * This does all the move/link/unlink/rmdir magic. + */ +static int process_recorded_refs(struct send_ctx *sctx) +{ + int ret = 0; + struct recorded_ref *cur; + struct ulist *check_dirs = NULL; + struct ulist_iterator uit; + struct ulist_node *un; + struct fs_path *valid_path = NULL; + u64 ow_inode; + u64 ow_gen; + int did_overwrite = 0; + int is_orphan = 0; + +verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); + + valid_path = fs_path_alloc(sctx); + if (!valid_path) { + ret = -ENOMEM; + goto out; + } + + check_dirs = ulist_alloc(GFP_NOFS); + if (!check_dirs) { + ret = -ENOMEM; + goto out; + } + + /* + * First, check if the first ref of the current inode was overwritten + * before. If yes, we know that the current inode was already orphanized + * and thus use the orphan name. If not, we can use get_cur_path to + * get the path of the first ref as it would like while receiving at + * this point in time. + * New inodes are always orphan at the beginning, so force to use the + * orphan name in this case. + * The first ref is stored in valid_path and will be updated if it + * gets moved around. + */ + if (!sctx->cur_inode_new) { + ret = did_overwrite_first_ref(sctx, sctx->cur_ino, + sctx->cur_inode_gen); + if (ret < 0) + goto out; + if (ret) + did_overwrite = 1; + } + if (sctx->cur_inode_new || did_overwrite) { + ret = gen_unique_name(sctx, sctx->cur_ino, + sctx->cur_inode_gen, valid_path); + if (ret < 0) + goto out; + is_orphan = 1; + } else { + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, + valid_path); + if (ret < 0) + goto out; + } + + list_for_each_entry(cur, &sctx->new_refs, list) { + /* + * Check if this new ref would overwrite the first ref of + * another unprocessed inode. If yes, orphanize the + * overwritten inode. If we find an overwritten ref that is + * not the first ref, simply unlink it. + */ + ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, + cur->name, cur->name_len, + &ow_inode, &ow_gen); + if (ret < 0) + goto out; + if (ret) { + ret = is_first_ref(sctx, sctx->parent_root, + ow_inode, cur->dir, cur->name, + cur->name_len); + if (ret < 0) + goto out; + if (ret) { + ret = orphanize_inode(sctx, ow_inode, ow_gen, + cur->full_path); + if (ret < 0) + goto out; + } else { + ret = send_unlink(sctx, cur->full_path); + if (ret < 0) + goto out; + } + } + + /* + * link/move the ref to the new place. If we have an orphan + * inode, move it and update valid_path. If not, link or move + * it depending on the inode mode. + */ + if (is_orphan && !sctx->cur_inode_first_ref_orphan) { + ret = send_rename(sctx, valid_path, cur->full_path); + if (ret < 0) + goto out; + is_orphan = 0; + ret = fs_path_copy(valid_path, cur->full_path); + if (ret < 0) + goto out; + } else { + if (S_ISDIR(sctx->cur_inode_mode)) { + /* + * Dirs can't be linked, so move it. For moved + * dirs, we always have one new and one deleted + * ref. The deleted ref is ignored later. + */ + ret = send_rename(sctx, valid_path, + cur->full_path); + if (ret < 0) + goto out; + ret = fs_path_copy(valid_path, cur->full_path); + if (ret < 0) + goto out; + } else { + ret = send_link(sctx, cur->full_path, + valid_path); + if (ret < 0) + goto out; + } + } + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, + GFP_NOFS); + if (ret < 0) + goto out; + } + + if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { + /* + * Check if we can already rmdir the directory. If not, + * orphanize it. For every dir item inside that gets deleted + * later, we do this check again and rmdir it then if possible. + * See the use of check_dirs for more details. + */ + ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); + if (ret < 0) + goto out; + if (ret) { + ret = send_rmdir(sctx, valid_path); + if (ret < 0) + goto out; + } else if (!is_orphan) { + ret = orphanize_inode(sctx, sctx->cur_ino, + sctx->cur_inode_gen, valid_path); + if (ret < 0) + goto out; + is_orphan = 1; + } + + list_for_each_entry(cur, &sctx->deleted_refs, list) { + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, + GFP_NOFS); + if (ret < 0) + goto out; + } + } else if (!S_ISDIR(sctx->cur_inode_mode)) { + /* + * We have a non dir inode. Go through all deleted refs and + * unlink them if they were not already overwritten by other + * inodes. + */ + list_for_each_entry(cur, &sctx->deleted_refs, list) { + ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, + sctx->cur_ino, sctx->cur_inode_gen, + cur->name, cur->name_len); + if (ret < 0) + goto out; + if (!ret) { + /* + * In case the inode was moved to a directory + * that was not created yet (see + * __record_new_ref), we can not unlink the ref + * as it will be needed later when the parent + * directory is created, so that we can move in + * the inode to the new dir. + */ + if (!is_orphan && + sctx->cur_inode_first_ref_orphan) { + ret = orphanize_inode(sctx, + sctx->cur_ino, + sctx->cur_inode_gen, + cur->full_path); + if (ret < 0) + goto out; + ret = gen_unique_name(sctx, + sctx->cur_ino, + sctx->cur_inode_gen, + valid_path); + if (ret < 0) + goto out; + is_orphan = 1; + + } else { + ret = send_unlink(sctx, cur->full_path); + if (ret < 0) + goto out; + } + } + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, + GFP_NOFS); + if (ret < 0) + goto out; + } + + /* + * If the inode is still orphan, unlink the orphan. This may + * happen when a previous inode did overwrite the first ref + * of this inode and no new refs were added for the current + * inode. + * We can however not delete the orphan in case the inode relies + * in a directory that was not created yet (see + * __record_new_ref) + */ + if (is_orphan && !sctx->cur_inode_first_ref_orphan) { + ret = send_unlink(sctx, valid_path); + if (ret < 0) + goto out; + } + } + + /* + * We did collect all parent dirs where cur_inode was once located. We + * now go through all these dirs and check if they are pending for + * deletion and if it's finally possible to perform the rmdir now. + * We also update the inode stats of the parent dirs here. + */ + ULIST_ITER_INIT(&uit); + while ((un = ulist_next(check_dirs, &uit))) { + if (un->val > sctx->cur_ino) + continue; + + ret = get_cur_inode_state(sctx, un->val, un->aux); + if (ret < 0) + goto out; + + if (ret == inode_state_did_create || + ret == inode_state_no_change) { + /* TODO delayed utimes */ + ret = send_utimes(sctx, un->val, un->aux); + if (ret < 0) + goto out; + } else if (ret == inode_state_did_delete) { + ret = can_rmdir(sctx, un->val, sctx->cur_ino); + if (ret < 0) + goto out; + if (ret) { + ret = get_cur_path(sctx, un->val, un->aux, + valid_path); + if (ret < 0) + goto out; + ret = send_rmdir(sctx, valid_path); + if (ret < 0) + goto out; + } + } + } + + /* + * Current inode is now at it's new position, so we must increase + * send_progress + */ + sctx->send_progress = sctx->cur_ino + 1; + + /* + * We may have a directory here that has pending refs which could not + * be created before (because the dir did not exist before, see + * __record_new_ref). finish_outoforder_dir will link/move the pending + * refs. + */ + if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) { + ret = finish_outoforder_dir(sctx, sctx->cur_ino, + sctx->cur_inode_gen); + if (ret < 0) + goto out; + } + + ret = 0; + +out: + free_recorded_refs(sctx); + ulist_free(check_dirs); + fs_path_free(sctx, valid_path); + return ret; +} + +static int __record_new_ref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx) +{ + int ret = 0; + struct send_ctx *sctx = ctx; + struct fs_path *p; + u64 gen; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, + NULL); + if (ret < 0) + goto out; + + /* + * The parent may be non-existent at this point in time. This happens + * if the ino of the parent dir is higher then the current ino. In this + * case, we can not process this ref until the parent dir is finally + * created. If we reach the parent dir later, process_recorded_refs + * will go through all dir items and process the refs that could not be + * processed before. In case this is the first ref, we set + * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to + * keep an orphan of the inode so that it later can be used for + * link/move + */ + ret = is_inode_existent(sctx, dir, gen); + if (ret < 0) + goto out; + if (!ret) { + ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir, + name->start, fs_path_len(name)); + if (ret < 0) + goto out; + if (ret) + sctx->cur_inode_first_ref_orphan = 1; + ret = 0; + goto out; + } + + ret = get_cur_path(sctx, dir, gen, p); + if (ret < 0) + goto out; + ret = fs_path_add_path(p, name); + if (ret < 0) + goto out; + + ret = record_ref(&sctx->new_refs, dir, gen, p); + +out: + if (ret) + fs_path_free(sctx, p); + return ret; +} + +static int __record_deleted_ref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx) +{ + int ret = 0; + struct send_ctx *sctx = ctx; + struct fs_path *p; + u64 gen; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, + NULL); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, dir, gen, p); + if (ret < 0) + goto out; + ret = fs_path_add_path(p, name); + if (ret < 0) + goto out; + + ret = record_ref(&sctx->deleted_refs, dir, gen, p); + +out: + if (ret) + fs_path_free(sctx, p); + return ret; +} + +static int record_new_ref(struct send_ctx *sctx) +{ + int ret; + + ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, + sctx->cmp_key, 0, __record_new_ref, sctx); + if (ret < 0) + goto out; + ret = 0; + +out: + return ret; +} + +static int record_deleted_ref(struct send_ctx *sctx) +{ + int ret; + + ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, 0, __record_deleted_ref, sctx); + if (ret < 0) + goto out; + ret = 0; + +out: + return ret; +} + +struct find_ref_ctx { + u64 dir; + struct fs_path *name; + int found_idx; +}; + +static int __find_iref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx_) +{ + struct find_ref_ctx *ctx = ctx_; + + if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && + strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { + ctx->found_idx = num; + return 1; + } + return 0; +} + +static int find_iref(struct send_ctx *sctx, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 dir, struct fs_path *name) +{ + int ret; + struct find_ref_ctx ctx; + + ctx.dir = dir; + ctx.name = name; + ctx.found_idx = -1; + + ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx); + if (ret < 0) + return ret; + + if (ctx.found_idx == -1) + return -ENOENT; + + return ctx.found_idx; +} + +static int __record_changed_new_ref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + + ret = find_iref(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, dir, name); + if (ret == -ENOENT) + ret = __record_new_ref(num, dir, index, name, sctx); + else if (ret > 0) + ret = 0; + + return ret; +} + +static int __record_changed_deleted_ref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + + ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, + dir, name); + if (ret == -ENOENT) + ret = __record_deleted_ref(num, dir, index, name, sctx); + else if (ret > 0) + ret = 0; + + return ret; +} + +static int record_changed_ref(struct send_ctx *sctx) +{ + int ret = 0; + + ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, + sctx->cmp_key, 0, __record_changed_new_ref, sctx); + if (ret < 0) + goto out; + ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); + if (ret < 0) + goto out; + ret = 0; + +out: + return ret; +} + +/* + * Record and process all refs at once. Needed when an inode changes the + * generation number, which means that it was deleted and recreated. + */ +static int process_all_refs(struct send_ctx *sctx, + enum btrfs_compare_tree_result cmd) +{ + int ret; + struct btrfs_root *root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *eb; + int slot; + iterate_inode_ref_t cb; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + if (cmd == BTRFS_COMPARE_TREE_NEW) { + root = sctx->send_root; + cb = __record_new_ref; + } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { + root = sctx->parent_root; + cb = __record_deleted_ref; + } else { + BUG(); + } + + key.objectid = sctx->cmp_key->objectid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + while (1) { + ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); + if (ret < 0) { + btrfs_release_path(path); + goto out; + } + if (ret) { + btrfs_release_path(path); + break; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + + if (found_key.objectid != key.objectid || + found_key.type != key.type) { + btrfs_release_path(path); + break; + } + + ret = iterate_inode_ref(sctx, sctx->parent_root, path, + &found_key, 0, cb, sctx); + btrfs_release_path(path); + if (ret < 0) + goto out; + + key.offset = found_key.offset + 1; + } + + ret = process_recorded_refs(sctx); + +out: + btrfs_free_path(path); + return ret; +} + +static int send_set_xattr(struct send_ctx *sctx, + struct fs_path *path, + const char *name, int name_len, + const char *data, int data_len) +{ + int ret = 0; + + ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); + TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +static int send_remove_xattr(struct send_ctx *sctx, + struct fs_path *path, + const char *name, int name_len) +{ + int ret = 0; + + ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +static int __process_new_xattr(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + struct fs_path *p; + posix_acl_xattr_header dummy_acl; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + /* + * This hack is needed because empty acl's are stored as zero byte + * data in xattrs. Problem with that is, that receiving these zero byte + * acl's will fail later. To fix this, we send a dummy acl list that + * only contains the version number and no entries. + */ + if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || + !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { + if (data_len == 0) { + dummy_acl.a_version = + cpu_to_le32(POSIX_ACL_XATTR_VERSION); + data = (char *)&dummy_acl; + data_len = sizeof(dummy_acl); + } + } + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + ret = send_set_xattr(sctx, p, name, name_len, data, data_len); + +out: + fs_path_free(sctx, p); + return ret; +} + +static int __process_deleted_xattr(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + struct fs_path *p; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + ret = send_remove_xattr(sctx, p, name, name_len); + +out: + fs_path_free(sctx, p); + return ret; +} + +static int process_new_xattr(struct send_ctx *sctx) +{ + int ret = 0; + + ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, + sctx->cmp_key, __process_new_xattr, sctx); + + return ret; +} + +static int process_deleted_xattr(struct send_ctx *sctx) +{ + int ret; + + ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, __process_deleted_xattr, sctx); + + return ret; +} + +struct find_xattr_ctx { + const char *name; + int name_len; + int found_idx; + char *found_data; + int found_data_len; +}; + +static int __find_xattr(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *vctx) +{ + struct find_xattr_ctx *ctx = vctx; + + if (name_len == ctx->name_len && + strncmp(name, ctx->name, name_len) == 0) { + ctx->found_idx = num; + ctx->found_data_len = data_len; + ctx->found_data = kmalloc(data_len, GFP_NOFS); + if (!ctx->found_data) + return -ENOMEM; + memcpy(ctx->found_data, data, data_len); + return 1; + } + return 0; +} + +static int find_xattr(struct send_ctx *sctx, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + const char *name, int name_len, + char **data, int *data_len) +{ + int ret; + struct find_xattr_ctx ctx; + + ctx.name = name; + ctx.name_len = name_len; + ctx.found_idx = -1; + ctx.found_data = NULL; + ctx.found_data_len = 0; + + ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx); + if (ret < 0) + return ret; + + if (ctx.found_idx == -1) + return -ENOENT; + if (data) { + *data = ctx.found_data; + *data_len = ctx.found_data_len; + } else { + kfree(ctx.found_data); + } + return ctx.found_idx; +} + + +static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + char *found_data = NULL; + int found_data_len = 0; + struct fs_path *p = NULL; + + ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, name, name_len, &found_data, + &found_data_len); + if (ret == -ENOENT) { + ret = __process_new_xattr(num, di_key, name, name_len, data, + data_len, type, ctx); + } else if (ret >= 0) { + if (data_len != found_data_len || + memcmp(data, found_data, data_len)) { + ret = __process_new_xattr(num, di_key, name, name_len, + data, data_len, type, ctx); + } else { + ret = 0; + } + } + + kfree(found_data); + fs_path_free(sctx, p); + return ret; +} + +static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx) +{ + int ret; + struct send_ctx *sctx = ctx; + + ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, + name, name_len, NULL, NULL); + if (ret == -ENOENT) + ret = __process_deleted_xattr(num, di_key, name, name_len, data, + data_len, type, ctx); + else if (ret >= 0) + ret = 0; + + return ret; +} + +static int process_changed_xattr(struct send_ctx *sctx) +{ + int ret = 0; + + ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, + sctx->cmp_key, __process_changed_new_xattr, sctx); + if (ret < 0) + goto out; + ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, + sctx->cmp_key, __process_changed_deleted_xattr, sctx); + +out: + return ret; +} + +static int process_all_new_xattrs(struct send_ctx *sctx) +{ + int ret; + struct btrfs_root *root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *eb; + int slot; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + root = sctx->send_root; + + key.objectid = sctx->cmp_key->objectid; + key.type = BTRFS_XATTR_ITEM_KEY; + key.offset = 0; + while (1) { + ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + + if (found_key.objectid != key.objectid || + found_key.type != key.type) { + ret = 0; + goto out; + } + + ret = iterate_dir_item(sctx, root, path, &found_key, + __process_new_xattr, sctx); + if (ret < 0) + goto out; + + btrfs_release_path(path); + key.offset = found_key.offset + 1; + } + +out: + btrfs_free_path(path); + return ret; +} + +/* + * Read some bytes from the current inode/file and send a write command to + * user space. + */ +static int send_write(struct send_ctx *sctx, u64 offset, u32 len) +{ + int ret = 0; + struct fs_path *p; + loff_t pos = offset; + int readed; + mm_segment_t old_fs; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + /* + * vfs normally only accepts user space buffers for security reasons. + * we only read from the file and also only provide the read_buf buffer + * to vfs. As this buffer does not come from a user space call, it's + * ok to temporary allow kernel space buffers. + */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + +verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); + + ret = open_cur_inode_file(sctx); + if (ret < 0) + goto out; + + ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); + if (ret < 0) + goto out; + readed = ret; + if (!readed) + goto out; + + ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + set_fs(old_fs); + if (ret < 0) + return ret; + return readed; +} + +/* + * Send a clone command to user space. + */ +static int send_clone(struct send_ctx *sctx, + u64 offset, u32 len, + struct clone_root *clone_root) +{ + int ret = 0; + struct btrfs_root *clone_root2 = clone_root->root; + struct fs_path *p; + u64 gen; + +verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " + "clone_inode=%llu, clone_offset=%llu\n", offset, len, + clone_root->root->objectid, clone_root->ino, + clone_root->offset); + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + + if (clone_root2 == sctx->send_root) { + ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, + &gen, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = get_cur_path(sctx, clone_root->ino, gen, p); + } else { + ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); + } + if (ret < 0) + goto out; + + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + clone_root2->root_item.uuid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, + clone_root2->root_item.ctransid); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, + clone_root->offset); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + +static int send_write_or_clone(struct send_ctx *sctx, + struct btrfs_path *path, + struct btrfs_key *key, + struct clone_root *clone_root) +{ + int ret = 0; + struct btrfs_file_extent_item *ei; + u64 offset = key->offset; + u64 pos = 0; + u64 len; + u32 l; + u8 type; + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], ei); + if (type == BTRFS_FILE_EXTENT_INLINE) + len = btrfs_file_extent_inline_len(path->nodes[0], ei); + else + len = btrfs_file_extent_num_bytes(path->nodes[0], ei); + + if (offset + len > sctx->cur_inode_size) + len = sctx->cur_inode_size - offset; + if (len == 0) { + ret = 0; + goto out; + } + + if (!clone_root) { + while (pos < len) { + l = len - pos; + if (l > BTRFS_SEND_READ_SIZE) + l = BTRFS_SEND_READ_SIZE; + ret = send_write(sctx, pos + offset, l); + if (ret < 0) + goto out; + if (!ret) + break; + pos += ret; + } + ret = 0; + } else { + ret = send_clone(sctx, offset, len, clone_root); + } + +out: + return ret; +} + +static int is_extent_unchanged(struct send_ctx *sctx, + struct btrfs_path *left_path, + struct btrfs_key *ekey) +{ + int ret = 0; + struct btrfs_key key; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + int slot; + struct btrfs_key found_key; + struct btrfs_file_extent_item *ei; + u64 left_disknr; + u64 right_disknr; + u64 left_offset; + u64 right_offset; + u64 left_offset_fixed; + u64 left_len; + u64 right_len; + u8 left_type; + u8 right_type; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + eb = left_path->nodes[0]; + slot = left_path->slots[0]; + + ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + left_type = btrfs_file_extent_type(eb, ei); + left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); + left_len = btrfs_file_extent_num_bytes(eb, ei); + left_offset = btrfs_file_extent_offset(eb, ei); + + if (left_type != BTRFS_FILE_EXTENT_REG) { + ret = 0; + goto out; + } + + /* + * Following comments will refer to these graphics. L is the left + * extents which we are checking at the moment. 1-8 are the right + * extents that we iterate. + * + * |-----L-----| + * |-1-|-2a-|-3-|-4-|-5-|-6-| + * + * |-----L-----| + * |--1--|-2b-|...(same as above) + * + * Alternative situation. Happens on files where extents got split. + * |-----L-----| + * |-----------7-----------|-6-| + * + * Alternative situation. Happens on files which got larger. + * |-----L-----| + * |-8-| + * Nothing follows after 8. + */ + + key.objectid = ekey->objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = ekey->offset; + ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + + /* + * Handle special case where the right side has no extents at all. + */ + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + if (found_key.objectid != key.objectid || + found_key.type != key.type) { + ret = 0; + goto out; + } + + /* + * We're now on 2a, 2b or 7. + */ + key = found_key; + while (key.offset < ekey->offset + left_len) { + ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + right_type = btrfs_file_extent_type(eb, ei); + right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); + right_len = btrfs_file_extent_num_bytes(eb, ei); + right_offset = btrfs_file_extent_offset(eb, ei); + + if (right_type != BTRFS_FILE_EXTENT_REG) { + ret = 0; + goto out; + } + + /* + * Are we at extent 8? If yes, we know the extent is changed. + * This may only happen on the first iteration. + */ + if (found_key.offset + right_len < ekey->offset) { + ret = 0; + goto out; + } + + left_offset_fixed = left_offset; + if (key.offset < ekey->offset) { + /* Fix the right offset for 2a and 7. */ + right_offset += ekey->offset - key.offset; + } else { + /* Fix the left offset for all behind 2a and 2b */ + left_offset_fixed += key.offset - ekey->offset; + } + + /* + * Check if we have the same extent. + */ + if (left_disknr + left_offset_fixed != + right_disknr + right_offset) { + ret = 0; + goto out; + } + + /* + * Go to the next extent. + */ + ret = btrfs_next_item(sctx->parent_root, path); + if (ret < 0) + goto out; + if (!ret) { + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + } + if (ret || found_key.objectid != key.objectid || + found_key.type != key.type) { + key.offset += right_len; + break; + } else { + if (found_key.offset != key.offset + right_len) { + /* Should really not happen */ + ret = -EIO; + goto out; + } + } + key = found_key; + } + + /* + * We're now behind the left extent (treat as unchanged) or at the end + * of the right side (treat as changed). + */ + if (key.offset >= ekey->offset + left_len) + ret = 1; + else + ret = 0; + + +out: + btrfs_free_path(path); + return ret; +} + +static int process_extent(struct send_ctx *sctx, + struct btrfs_path *path, + struct btrfs_key *key) +{ + int ret = 0; + struct clone_root *found_clone = NULL; + + if (S_ISLNK(sctx->cur_inode_mode)) + return 0; + + if (sctx->parent_root && !sctx->cur_inode_new) { + ret = is_extent_unchanged(sctx, path, key); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + } + + ret = find_extent_clone(sctx, path, key->objectid, key->offset, + sctx->cur_inode_size, &found_clone); + if (ret != -ENOENT && ret < 0) + goto out; + + ret = send_write_or_clone(sctx, path, key, found_clone); + +out: + return ret; +} + +static int process_all_extents(struct send_ctx *sctx) +{ + int ret; + struct btrfs_root *root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *eb; + int slot; + + root = sctx->send_root; + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = sctx->cmp_key->objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + while (1) { + ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + + if (found_key.objectid != key.objectid || + found_key.type != key.type) { + ret = 0; + goto out; + } + + ret = process_extent(sctx, path, &found_key); + if (ret < 0) + goto out; + + btrfs_release_path(path); + key.offset = found_key.offset + 1; + } + +out: + btrfs_free_path(path); + return ret; +} + +static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) +{ + int ret = 0; + + if (sctx->cur_ino == 0) + goto out; + if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && + sctx->cmp_key->type <= BTRFS_INODE_REF_KEY) + goto out; + if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) + goto out; + + ret = process_recorded_refs(sctx); + +out: + return ret; +} + +static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) +{ + int ret = 0; + u64 left_mode; + u64 left_uid; + u64 left_gid; + u64 right_mode; + u64 right_uid; + u64 right_gid; + int need_chmod = 0; + int need_chown = 0; + + ret = process_recorded_refs_if_needed(sctx, at_end); + if (ret < 0) + goto out; + + if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) + goto out; + if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) + goto out; + + ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, + &left_mode, &left_uid, &left_gid); + if (ret < 0) + goto out; + + if (!S_ISLNK(sctx->cur_inode_mode)) { + if (!sctx->parent_root || sctx->cur_inode_new) { + need_chmod = 1; + need_chown = 1; + } else { + ret = get_inode_info(sctx->parent_root, sctx->cur_ino, + NULL, NULL, &right_mode, &right_uid, + &right_gid); + if (ret < 0) + goto out; + + if (left_uid != right_uid || left_gid != right_gid) + need_chown = 1; + if (left_mode != right_mode) + need_chmod = 1; + } + } + + if (S_ISREG(sctx->cur_inode_mode)) { + ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, + sctx->cur_inode_size); + if (ret < 0) + goto out; + } + + if (need_chown) { + ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, + left_uid, left_gid); + if (ret < 0) + goto out; + } + if (need_chmod) { + ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, + left_mode); + if (ret < 0) + goto out; + } + + /* + * Need to send that every time, no matter if it actually changed + * between the two trees as we have done changes to the inode before. + */ + ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); + if (ret < 0) + goto out; + +out: + return ret; +} + +static int changed_inode(struct send_ctx *sctx, + enum btrfs_compare_tree_result result) +{ + int ret = 0; + struct btrfs_key *key = sctx->cmp_key; + struct btrfs_inode_item *left_ii = NULL; + struct btrfs_inode_item *right_ii = NULL; + u64 left_gen = 0; + u64 right_gen = 0; + + ret = close_cur_inode_file(sctx); + if (ret < 0) + goto out; + + sctx->cur_ino = key->objectid; + sctx->cur_inode_new_gen = 0; + sctx->cur_inode_first_ref_orphan = 0; + sctx->send_progress = sctx->cur_ino; + + if (result == BTRFS_COMPARE_TREE_NEW || + result == BTRFS_COMPARE_TREE_CHANGED) { + left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], + sctx->left_path->slots[0], + struct btrfs_inode_item); + left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], + left_ii); + } else { + right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], + sctx->right_path->slots[0], + struct btrfs_inode_item); + right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], + right_ii); + } + if (result == BTRFS_COMPARE_TREE_CHANGED) { + right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], + sctx->right_path->slots[0], + struct btrfs_inode_item); + + right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], + right_ii); + if (left_gen != right_gen) + sctx->cur_inode_new_gen = 1; + } + + if (result == BTRFS_COMPARE_TREE_NEW) { + sctx->cur_inode_gen = left_gen; + sctx->cur_inode_new = 1; + sctx->cur_inode_deleted = 0; + sctx->cur_inode_size = btrfs_inode_size( + sctx->left_path->nodes[0], left_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->left_path->nodes[0], left_ii); + if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) + ret = send_create_inode(sctx, sctx->left_path, + sctx->cmp_key); + } else if (result == BTRFS_COMPARE_TREE_DELETED) { + sctx->cur_inode_gen = right_gen; + sctx->cur_inode_new = 0; + sctx->cur_inode_deleted = 1; + sctx->cur_inode_size = btrfs_inode_size( + sctx->right_path->nodes[0], right_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->right_path->nodes[0], right_ii); + } else if (result == BTRFS_COMPARE_TREE_CHANGED) { + if (sctx->cur_inode_new_gen) { + sctx->cur_inode_gen = right_gen; + sctx->cur_inode_new = 0; + sctx->cur_inode_deleted = 1; + sctx->cur_inode_size = btrfs_inode_size( + sctx->right_path->nodes[0], right_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->right_path->nodes[0], right_ii); + ret = process_all_refs(sctx, + BTRFS_COMPARE_TREE_DELETED); + if (ret < 0) + goto out; + + sctx->cur_inode_gen = left_gen; + sctx->cur_inode_new = 1; + sctx->cur_inode_deleted = 0; + sctx->cur_inode_size = btrfs_inode_size( + sctx->left_path->nodes[0], left_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->left_path->nodes[0], left_ii); + ret = send_create_inode(sctx, sctx->left_path, + sctx->cmp_key); + if (ret < 0) + goto out; + + ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); + if (ret < 0) + goto out; + ret = process_all_extents(sctx); + if (ret < 0) + goto out; + ret = process_all_new_xattrs(sctx); + if (ret < 0) + goto out; + } else { + sctx->cur_inode_gen = left_gen; + sctx->cur_inode_new = 0; + sctx->cur_inode_new_gen = 0; + sctx->cur_inode_deleted = 0; + sctx->cur_inode_size = btrfs_inode_size( + sctx->left_path->nodes[0], left_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->left_path->nodes[0], left_ii); + } + } + +out: + return ret; +} + +static int changed_ref(struct send_ctx *sctx, + enum btrfs_compare_tree_result result) +{ + int ret = 0; + + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + + if (!sctx->cur_inode_new_gen && + sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { + if (result == BTRFS_COMPARE_TREE_NEW) + ret = record_new_ref(sctx); + else if (result == BTRFS_COMPARE_TREE_DELETED) + ret = record_deleted_ref(sctx); + else if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = record_changed_ref(sctx); + } + + return ret; +} + +static int changed_xattr(struct send_ctx *sctx, + enum btrfs_compare_tree_result result) +{ + int ret = 0; + + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + + if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { + if (result == BTRFS_COMPARE_TREE_NEW) + ret = process_new_xattr(sctx); + else if (result == BTRFS_COMPARE_TREE_DELETED) + ret = process_deleted_xattr(sctx); + else if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = process_changed_xattr(sctx); + } + + return ret; +} + +static int changed_extent(struct send_ctx *sctx, + enum btrfs_compare_tree_result result) +{ + int ret = 0; + + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + + if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { + if (result != BTRFS_COMPARE_TREE_DELETED) + ret = process_extent(sctx, sctx->left_path, + sctx->cmp_key); + } + + return ret; +} + + +static int changed_cb(struct btrfs_root *left_root, + struct btrfs_root *right_root, + struct btrfs_path *left_path, + struct btrfs_path *right_path, + struct btrfs_key *key, + enum btrfs_compare_tree_result result, + void *ctx) +{ + int ret = 0; + struct send_ctx *sctx = ctx; + + sctx->left_path = left_path; + sctx->right_path = right_path; + sctx->cmp_key = key; + + ret = finish_inode_if_needed(sctx, 0); + if (ret < 0) + goto out; + + if (key->type == BTRFS_INODE_ITEM_KEY) + ret = changed_inode(sctx, result); + else if (key->type == BTRFS_INODE_REF_KEY) + ret = changed_ref(sctx, result); + else if (key->type == BTRFS_XATTR_ITEM_KEY) + ret = changed_xattr(sctx, result); + else if (key->type == BTRFS_EXTENT_DATA_KEY) + ret = changed_extent(sctx, result); + +out: + return ret; +} + +static int full_send_tree(struct send_ctx *sctx) +{ + int ret; + struct btrfs_trans_handle *trans = NULL; + struct btrfs_root *send_root = sctx->send_root; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_path *path; + struct extent_buffer *eb; + int slot; + u64 start_ctransid; + u64 ctransid; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + spin_lock(&send_root->root_times_lock); + start_ctransid = btrfs_root_ctransid(&send_root->root_item); + spin_unlock(&send_root->root_times_lock); + + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + +join_trans: + /* + * We need to make sure the transaction does not get committed + * while we do anything on commit roots. Join a transaction to prevent + * this. + */ + trans = btrfs_join_transaction(send_root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + + /* + * Make sure the tree has not changed + */ + spin_lock(&send_root->root_times_lock); + ctransid = btrfs_root_ctransid(&send_root->root_item); + spin_unlock(&send_root->root_times_lock); + + if (ctransid != start_ctransid) { + WARN(1, KERN_WARNING "btrfs: the root that you're trying to " + "send was modified in between. This is " + "probably a bug.\n"); + ret = -EIO; + goto out; + } + + ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); + if (ret < 0) + goto out; + if (ret) + goto out_finish; + + while (1) { + /* + * When someone want to commit while we iterate, end the + * joined transaction and rejoin. + */ + if (btrfs_should_end_transaction(trans, send_root)) { + ret = btrfs_end_transaction(trans, send_root); + trans = NULL; + if (ret < 0) + goto out; + btrfs_release_path(path); + goto join_trans; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + + ret = changed_cb(send_root, NULL, path, NULL, + &found_key, BTRFS_COMPARE_TREE_NEW, sctx); + if (ret < 0) + goto out; + + key.objectid = found_key.objectid; + key.type = found_key.type; + key.offset = found_key.offset + 1; + + ret = btrfs_next_item(send_root, path); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + break; + } + } + +out_finish: + ret = finish_inode_if_needed(sctx, 1); + +out: + btrfs_free_path(path); + if (trans) { + if (!ret) + ret = btrfs_end_transaction(trans, send_root); + else + btrfs_end_transaction(trans, send_root); + } + return ret; +} + +static int send_subvol(struct send_ctx *sctx) +{ + int ret; + + ret = send_header(sctx); + if (ret < 0) + goto out; + + ret = send_subvol_begin(sctx); + if (ret < 0) + goto out; + + if (sctx->parent_root) { + ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, + changed_cb, sctx); + if (ret < 0) + goto out; + ret = finish_inode_if_needed(sctx, 1); + if (ret < 0) + goto out; + } else { + ret = full_send_tree(sctx); + if (ret < 0) + goto out; + } + +out: + if (!ret) + ret = close_cur_inode_file(sctx); + else + close_cur_inode_file(sctx); + + free_recorded_refs(sctx); + return ret; +} + +long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) +{ + int ret = 0; + struct btrfs_root *send_root; + struct btrfs_root *clone_root; + struct btrfs_fs_info *fs_info; + struct btrfs_ioctl_send_args *arg = NULL; + struct btrfs_key key; + struct file *filp = NULL; + struct send_ctx *sctx = NULL; + u32 i; + u64 *clone_sources_tmp = NULL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root; + fs_info = send_root->fs_info; + + arg = memdup_user(arg_, sizeof(*arg)); + if (IS_ERR(arg)) { + ret = PTR_ERR(arg); + arg = NULL; + goto out; + } + + if (!access_ok(VERIFY_READ, arg->clone_sources, + sizeof(*arg->clone_sources * + arg->clone_sources_count))) { + ret = -EFAULT; + goto out; + } + + sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); + if (!sctx) { + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&sctx->new_refs); + INIT_LIST_HEAD(&sctx->deleted_refs); + INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); + INIT_LIST_HEAD(&sctx->name_cache_list); + + sctx->send_filp = fget(arg->send_fd); + if (IS_ERR(sctx->send_filp)) { + ret = PTR_ERR(sctx->send_filp); + goto out; + } + + sctx->mnt = mnt_file->f_path.mnt; + + sctx->send_root = send_root; + sctx->clone_roots_cnt = arg->clone_sources_count; + + sctx->send_max_size = BTRFS_SEND_BUF_SIZE; + sctx->send_buf = vmalloc(sctx->send_max_size); + if (!sctx->send_buf) { + ret = -ENOMEM; + goto out; + } + + sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); + if (!sctx->read_buf) { + ret = -ENOMEM; + goto out; + } + + sctx->clone_roots = vzalloc(sizeof(struct clone_root) * + (arg->clone_sources_count + 1)); + if (!sctx->clone_roots) { + ret = -ENOMEM; + goto out; + } + + if (arg->clone_sources_count) { + clone_sources_tmp = vmalloc(arg->clone_sources_count * + sizeof(*arg->clone_sources)); + if (!clone_sources_tmp) { + ret = -ENOMEM; + goto out; + } + + ret = copy_from_user(clone_sources_tmp, arg->clone_sources, + arg->clone_sources_count * + sizeof(*arg->clone_sources)); + if (ret) { + ret = -EFAULT; + goto out; + } + + for (i = 0; i < arg->clone_sources_count; i++) { + key.objectid = clone_sources_tmp[i]; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + clone_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (!clone_root) { + ret = -EINVAL; + goto out; + } + if (IS_ERR(clone_root)) { + ret = PTR_ERR(clone_root); + goto out; + } + sctx->clone_roots[i].root = clone_root; + } + vfree(clone_sources_tmp); + clone_sources_tmp = NULL; + } + + if (arg->parent_root) { + key.objectid = arg->parent_root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (!sctx->parent_root) { + ret = -EINVAL; + goto out; + } + } + + /* + * Clones from send_root are allowed, but only if the clone source + * is behind the current send position. This is checked while searching + * for possible clone sources. + */ + sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; + + /* We do a bsearch later */ + sort(sctx->clone_roots, sctx->clone_roots_cnt, + sizeof(*sctx->clone_roots), __clone_root_cmp_sort, + NULL); + + ret = send_subvol(sctx); + if (ret < 0) + goto out; + + ret = begin_cmd(sctx, BTRFS_SEND_C_END); + if (ret < 0) + goto out; + ret = send_cmd(sctx); + if (ret < 0) + goto out; + +out: + if (filp) + fput(filp); + kfree(arg); + vfree(clone_sources_tmp); + + if (sctx) { + if (sctx->send_filp) + fput(sctx->send_filp); + + vfree(sctx->clone_roots); + vfree(sctx->send_buf); + vfree(sctx->read_buf); + + name_cache_free(sctx); + + kfree(sctx); + } + + return ret; +} diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h new file mode 100644 index 000000000000..9934e948e57f --- /dev/null +++ b/fs/btrfs/send.h @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" + +#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +#define BTRFS_SEND_STREAM_VERSION 1 + +#define BTRFS_SEND_BUF_SIZE (1024 * 64) +#define BTRFS_SEND_READ_SIZE (1024 * 48) + +enum btrfs_tlv_type { + BTRFS_TLV_U8, + BTRFS_TLV_U16, + BTRFS_TLV_U32, + BTRFS_TLV_U64, + BTRFS_TLV_BINARY, + BTRFS_TLV_STRING, + BTRFS_TLV_UUID, + BTRFS_TLV_TIMESPEC, +}; + +struct btrfs_stream_header { + char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)]; + __le32 version; +} __attribute__ ((__packed__)); + +struct btrfs_cmd_header { + /* len excluding the header */ + __le32 len; + __le16 cmd; + /* crc including the header with zero crc field */ + __le32 crc; +} __attribute__ ((__packed__)); + +struct btrfs_tlv_header { + __le16 tlv_type; + /* len excluding the header */ + __le16 tlv_len; +} __attribute__ ((__packed__)); + +/* commands */ +enum btrfs_send_cmd { + BTRFS_SEND_C_UNSPEC, + + BTRFS_SEND_C_SUBVOL, + BTRFS_SEND_C_SNAPSHOT, + + BTRFS_SEND_C_MKFILE, + BTRFS_SEND_C_MKDIR, + BTRFS_SEND_C_MKNOD, + BTRFS_SEND_C_MKFIFO, + BTRFS_SEND_C_MKSOCK, + BTRFS_SEND_C_SYMLINK, + + BTRFS_SEND_C_RENAME, + BTRFS_SEND_C_LINK, + BTRFS_SEND_C_UNLINK, + BTRFS_SEND_C_RMDIR, + + BTRFS_SEND_C_SET_XATTR, + BTRFS_SEND_C_REMOVE_XATTR, + + BTRFS_SEND_C_WRITE, + BTRFS_SEND_C_CLONE, + + BTRFS_SEND_C_TRUNCATE, + BTRFS_SEND_C_CHMOD, + BTRFS_SEND_C_CHOWN, + BTRFS_SEND_C_UTIMES, + + BTRFS_SEND_C_END, + __BTRFS_SEND_C_MAX, +}; +#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) + +/* attributes in send stream */ +enum { + BTRFS_SEND_A_UNSPEC, + + BTRFS_SEND_A_UUID, + BTRFS_SEND_A_CTRANSID, + + BTRFS_SEND_A_INO, + BTRFS_SEND_A_SIZE, + BTRFS_SEND_A_MODE, + BTRFS_SEND_A_UID, + BTRFS_SEND_A_GID, + BTRFS_SEND_A_RDEV, + BTRFS_SEND_A_CTIME, + BTRFS_SEND_A_MTIME, + BTRFS_SEND_A_ATIME, + BTRFS_SEND_A_OTIME, + + BTRFS_SEND_A_XATTR_NAME, + BTRFS_SEND_A_XATTR_DATA, + + BTRFS_SEND_A_PATH, + BTRFS_SEND_A_PATH_TO, + BTRFS_SEND_A_PATH_LINK, + + BTRFS_SEND_A_FILE_OFFSET, + BTRFS_SEND_A_DATA, + + BTRFS_SEND_A_CLONE_UUID, + BTRFS_SEND_A_CLONE_CTRANSID, + BTRFS_SEND_A_CLONE_PATH, + BTRFS_SEND_A_CLONE_OFFSET, + BTRFS_SEND_A_CLONE_LEN, + + __BTRFS_SEND_A_MAX, +}; +#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) + +#ifdef __KERNEL__ +long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); +#endif -- cgit v1.2.3 From b24baf6917a376420d535548e1f88744028bcf24 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jul 2012 19:21:10 -0400 Subject: Btrfs: uninit variable fixes in send/receive Signed-off-by: Chris Mason --- fs/btrfs/send.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 5394cb75012a..bf232c88a0bf 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2676,7 +2676,7 @@ static int process_recorded_refs(struct send_ctx *sctx) struct ulist_iterator uit; struct ulist_node *un; struct fs_path *valid_path = NULL; - u64 ow_inode; + u64 ow_inode = 0; u64 ow_gen; int did_overwrite = 0; int is_orphan = 0; @@ -3553,7 +3553,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) int ret = 0; struct fs_path *p; loff_t pos = offset; - int readed; + int readed = 0; mm_segment_t old_fs; p = fs_path_alloc(sctx); -- cgit v1.2.3 From 8ded2bbc1845e19c771eb55209aab166ef011243 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 25 Jul 2012 10:40:34 -0400 Subject: posix_types.h: Cleanup stale __NFDBITS and related definitions Recently, glibc made a change to suppress sign-conversion warnings in FD_SET (glibc commit ceb9e56b3d1). This uncovered an issue with the kernel's definition of __NFDBITS if applications #include after including . A build failure would be seen when passing the -Werror=sign-compare and -D_FORTIFY_SOURCE=2 flags to gcc. It was suggested that the kernel should either match the glibc definition of __NFDBITS or remove that entirely. The current in-kernel uses of __NFDBITS can be replaced with BITS_PER_LONG, and there are no uses of the related __FDELT and __FDMASK defines. Given that, we'll continue the cleanup that was started with commit 8b3d1cda4f5f ("posix_types: Remove fd_set macros") and drop the remaining unused macros. Additionally, linux/time.h has similar macros defined that expand to nothing so we'll remove those at the same time. Reported-by: Jeff Law Suggested-by: Linus Torvalds CC: Signed-off-by: Josh Boyer [ .. and fix up whitespace as per akpm ] Signed-off-by: Linus Torvalds --- arch/mips/kernel/kspd.c | 2 +- fs/exec.c | 2 +- fs/select.c | 10 +++++----- include/linux/posix_types.h | 18 +++--------------- include/linux/time.h | 8 -------- kernel/exit.c | 2 +- security/selinux/hooks.c | 2 +- 7 files changed, 12 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index 84d0639e4580..b77f56bbb477 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -323,7 +323,7 @@ static void sp_cleanup(void) fdt = files_fdtable(files); for (;;) { unsigned long set; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; if (i >= fdt->max_fds) break; set = fdt->open_fds[j++]; diff --git a/fs/exec.c b/fs/exec.c index da27b91ff1e8..e95aeeddd25c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1020,7 +1020,7 @@ static void flush_old_files(struct files_struct * files) unsigned long set, i; j++; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; fdt = files_fdtable(files); if (i >= fdt->max_fds) break; diff --git a/fs/select.c b/fs/select.c index bae321569dfa..db14c781335e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -345,8 +345,8 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) struct fdtable *fdt; /* handle last in-complete long-word first */ - set = ~(~0UL << (n & (__NFDBITS-1))); - n /= __NFDBITS; + set = ~(~0UL << (n & (BITS_PER_LONG-1))); + n /= BITS_PER_LONG; fdt = files_fdtable(current->files); open_fds = fdt->open_fds + n; max = 0; @@ -373,7 +373,7 @@ get_max: max++; set >>= 1; } while (set); - max += n * __NFDBITS; + max += n * BITS_PER_LONG; } return max; @@ -435,11 +435,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { - i += __NFDBITS; + i += BITS_PER_LONG; continue; } - for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { + for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { int fput_needed; if (i >= n) break; diff --git a/include/linux/posix_types.h b/include/linux/posix_types.h index f04c98cf44f3..988f76e636e3 100644 --- a/include/linux/posix_types.h +++ b/include/linux/posix_types.h @@ -15,26 +15,14 @@ */ /* - * Those macros may have been defined in . But we always - * use the ones here. + * This macro may have been defined in . But we always + * use the one here. */ -#undef __NFDBITS -#define __NFDBITS (8 * sizeof(unsigned long)) - #undef __FD_SETSIZE #define __FD_SETSIZE 1024 -#undef __FDSET_LONGS -#define __FDSET_LONGS (__FD_SETSIZE/__NFDBITS) - -#undef __FDELT -#define __FDELT(d) ((d) / __NFDBITS) - -#undef __FDMASK -#define __FDMASK(d) (1UL << ((d) % __NFDBITS)) - typedef struct { - unsigned long fds_bits [__FDSET_LONGS]; + unsigned long fds_bits[__FD_SETSIZE / (8 * sizeof(long))]; } __kernel_fd_set; /* Type of a signal handler. */ diff --git a/include/linux/time.h b/include/linux/time.h index 179f4d6755fc..c81c5e40fcb5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -257,14 +257,6 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) #endif /* __KERNEL__ */ -#define NFDBITS __NFDBITS - -#define FD_SETSIZE __FD_SETSIZE -#define FD_SET(fd,fdsetp) __FD_SET(fd,fdsetp) -#define FD_CLR(fd,fdsetp) __FD_CLR(fd,fdsetp) -#define FD_ISSET(fd,fdsetp) __FD_ISSET(fd,fdsetp) -#define FD_ZERO(fdsetp) __FD_ZERO(fdsetp) - /* * Names of the interval timers, and structure * defining a timer setting: diff --git a/kernel/exit.c b/kernel/exit.c index d17f6c4ddfa9..f65345f9e5bb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -483,7 +483,7 @@ static void close_files(struct files_struct * files) rcu_read_unlock(); for (;;) { unsigned long set; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; if (i >= fdt->max_fds) break; set = fdt->open_fds[j++]; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 689fe2d22165..94c45a1531a4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2129,7 +2129,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, int fd; j++; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; fdt = files_fdtable(files); if (i >= fdt->max_fds) break; -- cgit v1.2.3 From a1857ebe752d77d96c89d964500a9528df6d320e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 27 Jul 2012 10:11:13 +1000 Subject: Btrfs: using vmalloc and friends needs vmalloc.h On powerpc, we don't get the implicit vmalloc.h include, and as a result the build fails noisily: fs/btrfs/send.c: In function 'fs_path_free': fs/btrfs/send.c:185:4: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration] fs/btrfs/send.c: In function 'fs_path_ensure_buf': fs/btrfs/send.c:215:4: error: implicit declaration of function 'vmalloc' [-Werror=implicit-function-declaration] fs/btrfs/send.c:215:12: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:225:12: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:233:13: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c: In function 'iterate_dir_item': fs/btrfs/send.c:900:10: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:909:11: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c: In function 'btrfs_ioctl_send': fs/btrfs/send.c:4463:17: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:4469:17: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:4475:2: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration] fs/btrfs/send.c:4475:20: warning: assignment makes pointer from integer without a cast [enabled by default] fs/btrfs/send.c:4483:21: warning: assignment makes pointer from integer without a cast [enabled by default] Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- fs/btrfs/send.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c8ca49b1bb4d..fb5ffe95f869 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "send.h" #include "backref.h" -- cgit v1.2.3 From 3b6e2723f32de42028617f2c99b244ccd72cd959 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Fri, 27 Jul 2012 00:42:52 -0400 Subject: locks: prevent side-effects of locks_release_private before file_lock is initialized When calling fcntl(fd, F_SETLEASE, lck) [with lck=F_WRLCK or F_RDLCK], the custom signal or owner (if any were previously set using F_SETSIG or F_SETOWN fcntls) would be reset when F_SETLEASE was called for the second time on the same file descriptor. This bug is a regression of 2.6.37 and is described here: https://bugzilla.kernel.org/show_bug.cgi?id=43336 This patch reverts a commit from Oct 2004 (with subject "nfs4 lease: move the f_delown processing") which originally introduced the lm_release_private callback. Signed-off-by: Filipe Brandenburger Signed-off-by: J. Bruce Fields --- fs/locks.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 814c51d0de47..86668dd211ae 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -427,18 +427,8 @@ static void lease_break_callback(struct file_lock *fl) kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); } -static void lease_release_private_callback(struct file_lock *fl) -{ - if (!fl->fl_file) - return; - - f_delown(fl->fl_file); - fl->fl_file->f_owner.signum = 0; -} - static const struct lock_manager_operations lease_manager_ops = { .lm_break = lease_break_callback, - .lm_release_private = lease_release_private_callback, .lm_change = lease_modify, }; @@ -1155,8 +1145,13 @@ int lease_modify(struct file_lock **before, int arg) return error; lease_clear_pending(fl, arg); locks_wake_up_blocks(fl); - if (arg == F_UNLCK) + if (arg == F_UNLCK) { + struct file *filp = fl->fl_file; + + f_delown(filp); + filp->f_owner.signum = 0; locks_delete_lock(before); + } return 0; } -- cgit v1.2.3 From ff691e969433a54e26fb6502a6613e02c680e8ee Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 13 Jul 2012 14:04:46 +0400 Subject: CIFS: Simplify cifs_mkdir call Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 4 +- fs/cifs/cifssmb.c | 8 +- fs/cifs/inode.c | 295 ++++++++++++++++++++++++++++------------------------ 3 files changed, 167 insertions(+), 140 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index cf7fb185103c..cc39cc331bb3 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -289,10 +289,10 @@ extern int CIFSSMBUnixSetFileInfo(const unsigned int xid, u16 fid, u32 pid_of_opener); extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, - struct cifs_tcon *tcon, char *file_name, + struct cifs_tcon *tcon, const char *file_name, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, - int remap_special_chars); + int remap); extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *newName, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cabc7a01f5df..01808eb3af47 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5943,7 +5943,7 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, int CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, - char *fileName, + const char *file_name, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, int remap) { @@ -5964,14 +5964,14 @@ setPermsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(file_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + strncpy(pSMB->FileName, file_name, name_len); } params = 6 + name_len; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35cb6a374a45..e9ba1a150fe3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1219,16 +1219,165 @@ unlink_out: return rc; } +static int +cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, + const char *full_path, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid) +{ + int rc = 0; + struct inode *newinode = NULL; + + if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + xid); + else + rc = cifs_get_inode_info(&newinode, full_path, NULL, + inode->i_sb, xid, NULL); + if (rc) + return rc; + + d_instantiate(dentry, newinode); + /* + * setting nlink not necessary except in cases where we failed to get it + * from the server or was set bogus + */ + if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) + set_nlink(dentry->d_inode, 2); + + mode &= ~current_umask(); + /* must turn on setgid bit if parent dir has it */ + if (inode->i_mode & S_ISGID) + mode |= S_ISGID; + + if (tcon->unix_ext) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + args.uid = (__u64)current_fsuid(); + if (inode->i_mode & S_ISGID) + args.gid = (__u64)inode->i_gid; + else + args.gid = (__u64)current_fsgid(); + } else { + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; + } + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + } else { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + (mode & S_IWUGO) == 0) { + FILE_BASIC_INFO info; + struct cifsInodeInfo *cifsInode; + u32 dosattrs; + int tmprc; + + memset(&info, 0, sizeof(info)); + cifsInode = CIFS_I(newinode); + dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; + info.Attributes = cpu_to_le32(dosattrs); + tmprc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tmprc == 0) + cifsInode->cifsAttrs = dosattrs; + } + if (dentry->d_inode) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + dentry->d_inode->i_mode = (mode | S_IFDIR); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + dentry->d_inode->i_uid = current_fsuid(); + if (inode->i_mode & S_ISGID) + dentry->d_inode->i_gid = inode->i_gid; + else + dentry->d_inode->i_gid = + current_fsgid(); + } + } + } + return rc; +} + +static int +cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, + const char *full_path, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid) +{ + int rc = 0; + u32 oplock = 0; + FILE_UNIX_BASIC_INFO *info = NULL; + struct inode *newinode = NULL; + struct cifs_fattr fattr; + + info = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); + if (info == NULL) { + rc = -ENOMEM; + goto posix_mkdir_out; + } + + mode &= ~current_umask(); + rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, + NULL /* netfid */, info, &oplock, full_path, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == -EOPNOTSUPP) + goto posix_mkdir_out; + else if (rc) { + cFYI(1, "posix mkdir returned 0x%x", rc); + d_drop(dentry); + goto posix_mkdir_out; + } + + if (info->Type == cpu_to_le32(-1)) + /* no return info, go query for it */ + goto posix_mkdir_get_info; + /* + * BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if + * need to set uid/gid. + */ + + cifs_unix_basic_to_fattr(&fattr, info, cifs_sb); + cifs_fill_uniqueid(inode->i_sb, &fattr); + newinode = cifs_iget(inode->i_sb, &fattr); + if (!newinode) + goto posix_mkdir_get_info; + + d_instantiate(dentry, newinode); + +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, "instantiated dentry %p %s to inode %p", dentry, + dentry->d_name.name, newinode); + + if (newinode->i_nlink != 2) + cFYI(1, "unexpected number of links %d", newinode->i_nlink); +#endif + +posix_mkdir_out: + kfree(info); + return rc; +posix_mkdir_get_info: + rc = cifs_mkdir_qinfo(inode, dentry, mode, full_path, cifs_sb, tcon, + xid); + goto posix_mkdir_out; +} + int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) { - int rc = 0, tmprc; + int rc = 0; unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; - char *full_path = NULL; - struct inode *newinode = NULL; - struct cifs_fattr fattr; + char *full_path; cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode); @@ -1248,145 +1397,23 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - u32 oplock = 0; - FILE_UNIX_BASIC_INFO *pInfo = - kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); - if (pInfo == NULL) { - rc = -ENOMEM; + rc = cifs_posix_mkdir(inode, direntry, mode, full_path, cifs_sb, + tcon, xid); + if (rc != -EOPNOTSUPP) goto mkdir_out; - } - - mode &= ~current_umask(); - rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, - mode, NULL /* netfid */, pInfo, &oplock, - full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == -EOPNOTSUPP) { - kfree(pInfo); - goto mkdir_retry_old; - } else if (rc) { - cFYI(1, "posix mkdir returned 0x%x", rc); - d_drop(direntry); - } else { - if (pInfo->Type == cpu_to_le32(-1)) { - /* no return info, go query for it */ - kfree(pInfo); - goto mkdir_get_info; - } -/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need - to set uid/gid */ - - cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); - cifs_fill_uniqueid(inode->i_sb, &fattr); - newinode = cifs_iget(inode->i_sb, &fattr); - if (!newinode) { - kfree(pInfo); - goto mkdir_get_info; - } - - d_instantiate(direntry, newinode); - -#ifdef CONFIG_CIFS_DEBUG2 - cFYI(1, "instantiated dentry %p %s to inode %p", - direntry, direntry->d_name.name, newinode); - - if (newinode->i_nlink != 2) - cFYI(1, "unexpected number of links %d", - newinode->i_nlink); -#endif - } - kfree(pInfo); - goto mkdir_out; } -mkdir_retry_old: + /* BB add setting the equivalent of mode via CreateX w/ACLs */ rc = CIFSSMBMkDir(xid, tcon, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { cFYI(1, "cifs_mkdir returned 0x%x", rc); d_drop(direntry); - } else { -mkdir_get_info: - if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb, xid); - else - rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb, xid, NULL); - - d_instantiate(direntry, newinode); - /* setting nlink not necessary except in cases where we - * failed to get it from the server or was set bogus */ - if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) - set_nlink(direntry->d_inode, 2); - - mode &= ~current_umask(); - /* must turn on setgid bit if parent dir has it */ - if (inode->i_mode & S_ISGID) - mode |= S_ISGID; - - if (tcon->unix_ext) { - struct cifs_unix_set_info_args args = { - .mode = mode, - .ctime = NO_CHANGE_64, - .atime = NO_CHANGE_64, - .mtime = NO_CHANGE_64, - .device = 0, - }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - args.uid = (__u64)current_fsuid(); - if (inode->i_mode & S_ISGID) - args.gid = (__u64)inode->i_gid; - else - args.gid = (__u64)current_fsgid(); - } else { - args.uid = NO_CHANGE_64; - args.gid = NO_CHANGE_64; - } - CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - } else { - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && - (mode & S_IWUGO) == 0) { - FILE_BASIC_INFO pInfo; - struct cifsInodeInfo *cifsInode; - u32 dosattrs; - - memset(&pInfo, 0, sizeof(pInfo)); - cifsInode = CIFS_I(newinode); - dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; - pInfo.Attributes = cpu_to_le32(dosattrs); - tmprc = CIFSSMBSetPathInfo(xid, tcon, - full_path, &pInfo, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (tmprc == 0) - cifsInode->cifsAttrs = dosattrs; - } - if (direntry->d_inode) { - if (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_DYNPERM) - direntry->d_inode->i_mode = - (mode | S_IFDIR); - - if (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_SET_UID) { - direntry->d_inode->i_uid = - current_fsuid(); - if (inode->i_mode & S_ISGID) - direntry->d_inode->i_gid = - inode->i_gid; - else - direntry->d_inode->i_gid = - current_fsgid(); - } - } - } + goto mkdir_out; } + + rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon, + xid); mkdir_out: /* * Force revalidate to get parent dir info when needed since cached -- cgit v1.2.3 From f436720e94ac53413e20c48b02d16e2ef180e166 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 17 Mar 2012 11:41:12 +0300 Subject: CIFS: Separate protocol specific part from mkdir Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 7 +++++++ fs/cifs/cifsproto.h | 4 +--- fs/cifs/cifssmb.c | 8 +++++--- fs/cifs/inode.c | 32 +++++++++++++------------------- fs/cifs/smb1ops.c | 23 +++++++++++++++++++++++ 5 files changed, 49 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 497da5ce704c..939f91aac162 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -246,6 +246,13 @@ struct smb_version_operations { bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ int (*echo)(struct TCP_Server_Info *); + /* create directory */ + int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); + /* set info on created directory */ + void (*mkdir_setinfo)(struct inode *, const char *, + struct cifs_sb_info *, struct cifs_tcon *, + const unsigned int); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index cc39cc331bb3..5e128fb2b618 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -295,9 +295,7 @@ extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, int remap); extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *newName, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, const struct nls_table *nls_codepage, int remap_special_chars); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 01808eb3af47..eb74cceef480 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -992,14 +992,15 @@ RmDirRetry: } int -CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, const struct nls_table *nls_codepage, int remap) +CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { int rc = 0; CREATE_DIRECTORY_REQ *pSMB = NULL; CREATE_DIRECTORY_RSP *pSMBr = NULL; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBMkDir"); MkDirRetry: @@ -1010,7 +1011,8 @@ MkDirRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, - PATH_MAX, nls_codepage, remap); + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e9ba1a150fe3..d7e74b1268cb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1272,24 +1272,11 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { + struct TCP_Server_Info *server = tcon->ses->server; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && - (mode & S_IWUGO) == 0) { - FILE_BASIC_INFO info; - struct cifsInodeInfo *cifsInode; - u32 dosattrs; - int tmprc; - - memset(&info, 0, sizeof(info)); - cifsInode = CIFS_I(newinode); - dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; - info.Attributes = cpu_to_le32(dosattrs); - tmprc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (tmprc == 0) - cifsInode->cifsAttrs = dosattrs; - } + (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) + server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, + tcon, xid); if (dentry->d_inode) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) dentry->d_inode->i_mode = (mode | S_IFDIR); @@ -1377,6 +1364,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; char *full_path; cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode); @@ -1403,9 +1391,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) goto mkdir_out; } + server = tcon->ses->server; + + if (!server->ops->mkdir) { + rc = -ENOSYS; + goto mkdir_out; + } + /* BB add setting the equivalent of mode via CreateX w/ACLs */ - rc = CIFSSMBMkDir(xid, tcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb); if (rc) { cFYI(1, "cifs_mkdir returned 0x%x", rc); d_drop(direntry); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c40356d24c5c..861e2df0c37d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -586,6 +586,27 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon) #endif } +static void +cifs_mkdir_setinfo(struct inode *inode, const char *full_path, + struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, + const unsigned int xid) +{ + FILE_BASIC_INFO info; + struct cifsInodeInfo *cifsInode; + u32 dosattrs; + int rc; + + memset(&info, 0, sizeof(info)); + cifsInode = CIFS_I(inode); + dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; + info.Attributes = cpu_to_le32(dosattrs); + rc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) + cifsInode->cifsAttrs = dosattrs; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -620,6 +641,8 @@ struct smb_version_operations smb1_operations = { .get_srv_inum = cifs_get_srv_inum, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, + .mkdir = CIFSSMBMkDir, + .mkdir_setinfo = cifs_mkdir_setinfo, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From a0e731839dd461eee0fe2dc026e0965e961e2730 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 19 Jul 2011 12:56:37 +0400 Subject: CIFS: Add SMB2 support for mkdir operation Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 30 ++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 2 ++ fs/cifs/smb2proto.h | 6 ++++++ 3 files changed, 38 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1ba5c405315c..e129527a707a 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -122,3 +122,33 @@ out: kfree(smb2_data); return rc; } + +int +smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR); +} + +void +smb2_mkdir_setinfo(struct inode *inode, const char *name, + struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, + const unsigned int xid) +{ + FILE_BASIC_INFO data; + struct cifsInodeInfo *cifs_i; + u32 dosattrs; + int tmprc; + + memset(&data, 0, sizeof(data)); + cifs_i = CIFS_I(inode); + dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; + data.Attributes = cpu_to_le32(dosattrs); + tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO); + if (tmprc == 0) + cifs_i->cifsAttrs = dosattrs; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 410cf925ea26..cc74871d2598 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -318,6 +318,8 @@ struct smb_version_operations smb21_operations = { .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, .build_path_to_root = smb2_build_path_to_root, + .mkdir = smb2_mkdir, + .mkdir_setinfo = smb2_mkdir_setinfo, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 902bbe2b5ad3..f9925082737c 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -52,6 +52,12 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, bool *adjust_tz); +extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); +extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, + struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid); + /* * SMB2 Worker functions - most of protocol specific implementation details * are contained within these calls. -- cgit v1.2.3 From f958ca5d88e6071767b10549d544b3475dfb6996 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 10 Jul 2012 16:14:18 +0400 Subject: CIFS: Move rmdir code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifsproto.h | 3 +-- fs/cifs/cifssmb.c | 15 ++++++++------- fs/cifs/inode.c | 15 +++++++++++---- fs/cifs/smb1ops.c | 1 + 5 files changed, 24 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 939f91aac162..977dc0e85ccb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -253,6 +253,9 @@ struct smb_version_operations { void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, const unsigned int); + /* remove directory */ + int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5e128fb2b618..f1bbf8305d3a 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -297,8 +297,7 @@ extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, __u16 type, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index eb74cceef480..074923ce593d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -948,15 +948,15 @@ DelFileRetry: } int -CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *dirName, const struct nls_table *nls_codepage, - int remap) +CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { DELETE_DIRECTORY_REQ *pSMB = NULL; DELETE_DIRECTORY_RSP *pSMBr = NULL; int rc = 0; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBRmDir"); RmDirRetry: @@ -966,14 +966,15 @@ RmDirRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, dirName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(dirName, PATH_MAX); + name_len = strnlen(name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->DirName, dirName, name_len); + strncpy(pSMB->DirName, name, name_len); } pSMB->BufferFormat = 0x04; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d7e74b1268cb..7354877fa3bd 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1426,7 +1426,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; char *full_path = NULL; struct cifsInodeInfo *cifsInode; @@ -1446,10 +1447,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) rc = PTR_ERR(tlink); goto rmdir_exit; } - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (!server->ops->rmdir) { + rc = -ENOSYS; + cifs_put_tlink(tlink); + goto rmdir_exit; + } - rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb); cifs_put_tlink(tlink); if (!rc) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 861e2df0c37d..3129ac74b819 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -643,6 +643,7 @@ struct smb_version_operations smb1_operations = { .echo = CIFSSMBEcho, .mkdir = CIFSSMBMkDir, .mkdir_setinfo = cifs_mkdir_setinfo, + .rmdir = CIFSSMBRmDir, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 1a500f010fb2d121c58f77ddfde2eca1bde3bfcd Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 10 Jul 2012 16:14:38 +0400 Subject: CIFS: Add SMB2 support for rmdir Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 9 +++++++++ fs/cifs/smb2ops.c | 1 + fs/cifs/smb2proto.h | 2 ++ 3 files changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index e129527a707a..2aa5cb08c526 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -152,3 +152,12 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } + +int +smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, + 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, + NULL, SMB2_OP_DELETE); +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index cc74871d2598..826209bf3684 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -320,6 +320,7 @@ struct smb_version_operations smb21_operations = { .build_path_to_root = smb2_build_path_to_root, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, + .rmdir = smb2_rmdir, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index f9925082737c..bfaa7b148afd 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -57,6 +57,8 @@ extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, const unsigned int xid); +extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); /* * SMB2 Worker functions - most of protocol specific implementation details -- cgit v1.2.3 From 96d6d59ceaeaacba4088862f3c57fcd011f52832 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Fri, 27 Jul 2012 16:18:00 -0400 Subject: locks: move lease-specific code out of locks_delete_lock No point putting something only used by one caller into common code. Signed-off-by: J. Bruce Fields --- fs/locks.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 86668dd211ae..541075a41527 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -570,12 +570,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) fl->fl_next = NULL; list_del_init(&fl->fl_link); - fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); - if (fl->fl_fasync != NULL) { - printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); - fl->fl_fasync = NULL; - } - if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; @@ -1150,6 +1144,11 @@ int lease_modify(struct file_lock **before, int arg) f_delown(filp); filp->f_owner.signum = 0; + fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); + if (fl->fl_fasync != NULL) { + printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); + fl->fl_fasync = NULL; + } locks_delete_lock(before); } return 0; -- cgit v1.2.3 From 99dbb8fe0992ecefd061e5efa7604b92eab58ccc Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Fri, 27 Jul 2012 16:30:12 -0400 Subject: nfsd4: fix missing fault_inject.h include Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d10ad8bc47aa..fddb18b2e877 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,6 +44,7 @@ #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" +#include "fault_inject.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -- cgit v1.2.3 From b26411f85d3763ec5fc553854d9c3c0966072090 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:55:54 +0400 Subject: LockD: mark host per network namespace on garbage collect This is required for per-network NLM shutdown and cleanup. This patch passes init_net for a while. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 3 ++- fs/lockd/svcsubs.c | 19 +++++++++++++------ include/linux/lockd/lockd.h | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index eb75ca7c2d6e..2c5f41b098e9 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -628,13 +628,14 @@ nlm_gc_hosts(void) struct hlist_head *chain; struct hlist_node *pos, *next; struct nlm_host *host; + struct net *net = &init_net; dprintk("lockd: host garbage collection\n"); for_each_host(host, pos, chain, nlm_server_hosts) host->h_inuse = 0; /* Mark all hosts that hold locks, blocks or shares */ - nlmsvc_mark_resources(); + nlmsvc_mark_resources(net); for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { if (atomic_read(&host->h_count) || host->h_inuse diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 2240d384d787..0deb5f6c9dd4 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -309,7 +309,8 @@ nlm_release_file(struct nlm_file *file) * Helpers function for resource traversal * * nlmsvc_mark_host: - * used by the garbage collector; simply sets h_inuse. + * used by the garbage collector; simply sets h_inuse only for those + * hosts, which passed network check. * Always returns 0. * * nlmsvc_same_host: @@ -320,12 +321,15 @@ nlm_release_file(struct nlm_file *file) * returns 1 iff the host is a client. * Used by nlmsvc_invalidate_all */ + static int -nlmsvc_mark_host(void *data, struct nlm_host *dummy) +nlmsvc_mark_host(void *data, struct nlm_host *hint) { struct nlm_host *host = data; - host->h_inuse = 1; + if ((hint->net == NULL) || + (host->net == hint->net)) + host->h_inuse = 1; return 0; } @@ -358,10 +362,13 @@ nlmsvc_is_client(void *data, struct nlm_host *dummy) * Mark all hosts that still hold resources */ void -nlmsvc_mark_resources(void) +nlmsvc_mark_resources(struct net *net) { - dprintk("lockd: nlmsvc_mark_resources\n"); - nlm_traverse_files(NULL, nlmsvc_mark_host, NULL); + struct nlm_host hint; + + dprintk("lockd: nlmsvc_mark_resources for net %p\n", net); + hint.net = net; + nlm_traverse_files(&hint, nlmsvc_mark_host, NULL); } /* diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f04ce6ac6d04..50e31a2c1a97 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -279,7 +279,7 @@ void nlmsvc_release_call(struct nlm_rqst *); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nfs_fh *); void nlm_release_file(struct nlm_file *); -void nlmsvc_mark_resources(void); +void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); -- cgit v1.2.3 From 27adaddc8de7f523a172246d5104cf1cd5e2191b Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:03 +0400 Subject: LockD: make garbage collector network namespace aware. Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 2c5f41b098e9..991274a55664 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -45,7 +45,7 @@ static unsigned long next_gc; static unsigned long nrhosts; static DEFINE_MUTEX(nlm_host_mutex); -static void nlm_gc_hosts(void); +static void nlm_gc_hosts(struct net *net); struct nlm_lookup_host_info { const int server; /* search for server|client */ @@ -345,7 +345,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, mutex_lock(&nlm_host_mutex); if (time_after_eq(jiffies, next_gc)) - nlm_gc_hosts(); + nlm_gc_hosts(net); chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { @@ -588,7 +588,7 @@ nlm_shutdown_hosts_net(struct net *net) } /* Then, perform a garbage collection pass */ - nlm_gc_hosts(); + nlm_gc_hosts(net); mutex_unlock(&nlm_host_mutex); } @@ -623,27 +623,31 @@ nlm_shutdown_hosts(void) * mark & sweep for resources held by remote clients. */ static void -nlm_gc_hosts(void) +nlm_gc_hosts(struct net *net) { struct hlist_head *chain; struct hlist_node *pos, *next; struct nlm_host *host; - struct net *net = &init_net; - dprintk("lockd: host garbage collection\n"); - for_each_host(host, pos, chain, nlm_server_hosts) + dprintk("lockd: host garbage collection for net %p\n", net); + for_each_host(host, pos, chain, nlm_server_hosts) { + if (net && host->net != net) + continue; host->h_inuse = 0; + } /* Mark all hosts that hold locks, blocks or shares */ nlmsvc_mark_resources(net); for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { + if (net && host->net != net) + continue; if (atomic_read(&host->h_count) || host->h_inuse || time_before(jiffies, host->h_expires)) { dprintk("nlm_gc_hosts skipping %s " - "(cnt %d use %d exp %ld)\n", + "(cnt %d use %d exp %ld net %p)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires); + host->h_inuse, host->h_expires, host->net); continue; } nlm_destroy_host_locked(host); -- cgit v1.2.3 From 3cf7fb07e077e599d8343113cf4ef81adb2ca627 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:11 +0400 Subject: LockD: manage garbage collection timeout per networks namespace This patch moves next_gc to per-net data. Note: passed network can be NULL (when Lockd kthread is exiting of Lockd module is removing). Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 12 +++++++++--- fs/lockd/netns.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 991274a55664..3636734fe2ba 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -21,6 +21,8 @@ #include +#include "netns.h" + #define NLMDBG_FACILITY NLMDBG_HOSTCACHE #define NLM_HOST_NRHASH 32 #define NLM_HOST_REBIND (60 * HZ) @@ -41,7 +43,6 @@ static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH]; hlist_for_each_entry_safe((host), (pos), (next), \ (chain), h_hash) -static unsigned long next_gc; static unsigned long nrhosts; static DEFINE_MUTEX(nlm_host_mutex); @@ -337,6 +338,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, .hostname_len = hostname_len, .net = net, }; + struct lockd_net *ln = net_generic(net, lockd_net_id); dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, (int)hostname_len, hostname, rqstp->rq_vers, @@ -344,7 +346,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, mutex_lock(&nlm_host_mutex); - if (time_after_eq(jiffies, next_gc)) + if (time_after_eq(jiffies, ln->next_gc)) nlm_gc_hosts(net); chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; @@ -653,5 +655,9 @@ nlm_gc_hosts(struct net *net) nlm_destroy_host_locked(host); } - next_gc = jiffies + NLM_HOST_COLLECT; + if (net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + ln->next_gc = jiffies + NLM_HOST_COLLECT; + } } diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index ce227e0fbc5c..97c6c771133f 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -5,6 +5,7 @@ struct lockd_net { unsigned int nlmsvc_users; + unsigned long next_gc; }; extern int lockd_net_id; -- cgit v1.2.3 From caa4e76b6f284bab535a98fd37b9c46856158bcb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:19 +0400 Subject: LockD: manage used host count per networks namespace This patch introduces moves nrhosts in per-net data. It also adds kernel warning to nlm_shutdown_hosts_net() about remaining hosts in specified network namespace context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 18 ++++++++++++++++++ fs/lockd/netns.h | 1 + 2 files changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 3636734fe2ba..6c56090ca531 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -173,6 +173,7 @@ out: static void nlm_destroy_host_locked(struct nlm_host *host) { struct rpc_clnt *clnt; + struct lockd_net *ln = net_generic(host->net, lockd_net_id); dprintk("lockd: destroy host %s\n", host->h_name); @@ -189,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host) rpc_shutdown_client(clnt); kfree(host); + ln->nrhosts--; nrhosts--; } @@ -229,6 +231,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, struct hlist_node *pos; struct nlm_host *host; struct nsm_handle *nsm = NULL; + struct lockd_net *ln = net_generic(net, lockd_net_id); dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, (hostname ? hostname : ""), version, @@ -263,6 +266,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, goto out; hlist_add_head(&host->h_hash, chain); + ln->nrhosts++; nrhosts++; dprintk("lockd: %s created host %s (%s)\n", __func__, @@ -384,6 +388,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, memcpy(nlm_srcaddr(host), src_sap, src_len); host->h_srcaddrlen = src_len; hlist_add_head(&host->h_hash, chain); + ln->nrhosts++; nrhosts++; dprintk("lockd: %s created host %s (%s)\n", @@ -592,6 +597,19 @@ nlm_shutdown_hosts_net(struct net *net) /* Then, perform a garbage collection pass */ nlm_gc_hosts(net); mutex_unlock(&nlm_host_mutex); + + /* complain if any hosts are left */ + if (net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); + dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); + for_each_host(host, pos, chain, nlm_server_hosts) { + dprintk(" %s (cnt %d use %d exp %ld net %p)\n", + host->h_name, atomic_read(&host->h_count), + host->h_inuse, host->h_expires, host->net); + } + } } /* diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 97c6c771133f..44c8f0b9230a 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -6,6 +6,7 @@ struct lockd_net { unsigned int nlmsvc_users; unsigned long next_gc; + unsigned long nrhosts; }; extern int lockd_net_id; -- cgit v1.2.3 From d5850ff9eaaa9bed0f0b56702db105e02ce4b709 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:27 +0400 Subject: Lockd: host complaining function introduced Just a small cleanup. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 6c56090ca531..8cbf53d2c1bb 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -572,6 +572,35 @@ void nlm_host_rebooted(const struct nlm_reboot *info) nsm_release(nsm); } +static void nlm_complain_hosts(struct net *net) +{ + struct hlist_head *chain; + struct hlist_node *pos; + struct nlm_host *host; + + if (net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + if (ln->nrhosts == 0) + return; + printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); + dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); + } else { + if (nrhosts == 0) + return; + printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); + dprintk("lockd: %lu hosts left:\n", nrhosts); + } + + for_each_host(host, pos, chain, nlm_server_hosts) { + if (net && host->net != net) + continue; + dprintk(" %s (cnt %d use %d exp %ld net %p)\n", + host->h_name, atomic_read(&host->h_count), + host->h_inuse, host->h_expires, host->net); + } +} + void nlm_shutdown_hosts_net(struct net *net) { @@ -598,18 +627,7 @@ nlm_shutdown_hosts_net(struct net *net) nlm_gc_hosts(net); mutex_unlock(&nlm_host_mutex); - /* complain if any hosts are left */ - if (net) { - struct lockd_net *ln = net_generic(net, lockd_net_id); - - printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); - dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); - for_each_host(host, pos, chain, nlm_server_hosts) { - dprintk(" %s (cnt %d use %d exp %ld net %p)\n", - host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); - } - } + nlm_complain_hosts(net); } /* @@ -619,22 +637,7 @@ nlm_shutdown_hosts_net(struct net *net) void nlm_shutdown_hosts(void) { - struct hlist_head *chain; - struct hlist_node *pos; - struct nlm_host *host; - nlm_shutdown_hosts_net(NULL); - - /* complain if any hosts are left */ - if (nrhosts != 0) { - printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); - dprintk("lockd: %lu hosts left:\n", nrhosts); - for_each_host(host, pos, chain, nlm_server_hosts) { - dprintk(" %s (cnt %d use %d exp %ld net %p)\n", - host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); - } - } } /* -- cgit v1.2.3 From e2edaa98cb2527c0f1c2d825ddb45a8b2d026669 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:35 +0400 Subject: Lockd: add more debug to host shutdown functions Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 8cbf53d2c1bb..0084ab853a2b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -608,11 +608,10 @@ nlm_shutdown_hosts_net(struct net *net) struct hlist_node *pos; struct nlm_host *host; - dprintk("lockd: shutting down host module\n"); mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ - dprintk("lockd: nuking all hosts...\n"); + dprintk("lockd: nuking all hosts in net %p...\n", net); for_each_host(host, pos, chain, nlm_server_hosts) { if (net && host->net != net) continue; @@ -637,6 +636,7 @@ nlm_shutdown_hosts_net(struct net *net) void nlm_shutdown_hosts(void) { + dprintk("lockd: shutting down host module\n"); nlm_shutdown_hosts_net(NULL); } -- cgit v1.2.3 From 66547b0251b0b62dcb637631f566410a0e1e47a8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:43 +0400 Subject: LockD: manage grace period per network namespace Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/netns.h | 2 ++ fs/lockd/svc.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 44c8f0b9230a..94653aecfffb 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -7,6 +7,8 @@ struct lockd_net { unsigned int nlmsvc_users; unsigned long next_gc; unsigned long nrhosts; + + struct delayed_work grace_period_end; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 80938fda67e0..70c417758eb9 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -95,21 +95,22 @@ static void grace_ender(struct work_struct *not_used) locks_end_grace(&lockd_manager); } -static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); - static void set_grace_period(void) { unsigned long grace_period = get_lockd_grace_period(); + struct lockd_net *ln = net_generic(&init_net, lockd_net_id); locks_start_grace(&lockd_manager); - cancel_delayed_work_sync(&grace_period_end); - schedule_delayed_work(&grace_period_end, grace_period); + cancel_delayed_work_sync(&ln->grace_period_end); + schedule_delayed_work(&ln->grace_period_end, grace_period); } static void restart_grace(void) { if (nlmsvc_ops) { - cancel_delayed_work_sync(&grace_period_end); + struct lockd_net *ln = net_generic(&init_net, lockd_net_id); + + cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&lockd_manager); nlmsvc_invalidate_all(); set_grace_period(); @@ -124,6 +125,7 @@ lockd(void *vrqstp) { int err = 0, preverr = 0; struct svc_rqst *rqstp = vrqstp; + struct lockd_net *ln = net_generic(&init_net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -184,7 +186,7 @@ lockd(void *vrqstp) svc_process(rqstp); } flush_signals(current); - cancel_delayed_work_sync(&grace_period_end); + cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&lockd_manager); if (nlmsvc_ops) nlmsvc_invalidate_all(); @@ -589,6 +591,9 @@ module_param(nlm_max_connections, uint, 0644); static int lockd_init_net(struct net *net) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); return 0; } -- cgit v1.2.3 From 08d44a35a9e71a132c8e8abb0451b7b5e5b3dfee Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:50 +0400 Subject: LockD: make lockd manager allocated per network namespace Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/netns.h | 2 ++ fs/lockd/svc.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 94653aecfffb..e78650cb937c 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -1,6 +1,7 @@ #ifndef __LOCKD_NETNS_H__ #define __LOCKD_NETNS_H__ +#include #include struct lockd_net { @@ -9,6 +10,7 @@ struct lockd_net { unsigned long nrhosts; struct delayed_work grace_period_end; + struct lock_manager lockd_manager; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 70c417758eb9..a9c436bc450c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -87,12 +87,14 @@ static unsigned long get_lockd_grace_period(void) return nlm_timeout * 5 * HZ; } -static struct lock_manager lockd_manager = { -}; - -static void grace_ender(struct work_struct *not_used) +static void grace_ender(struct work_struct *grace) { - locks_end_grace(&lockd_manager); + struct delayed_work *dwork = container_of(grace, struct delayed_work, + work); + struct lockd_net *ln = container_of(dwork, struct lockd_net, + grace_period_end); + + locks_end_grace(&ln->lockd_manager); } static void set_grace_period(void) @@ -100,7 +102,7 @@ static void set_grace_period(void) unsigned long grace_period = get_lockd_grace_period(); struct lockd_net *ln = net_generic(&init_net, lockd_net_id); - locks_start_grace(&lockd_manager); + locks_start_grace(&ln->lockd_manager); cancel_delayed_work_sync(&ln->grace_period_end); schedule_delayed_work(&ln->grace_period_end, grace_period); } @@ -111,7 +113,7 @@ static void restart_grace(void) struct lockd_net *ln = net_generic(&init_net, lockd_net_id); cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&lockd_manager); + locks_end_grace(&ln->lockd_manager); nlmsvc_invalidate_all(); set_grace_period(); } @@ -187,7 +189,7 @@ lockd(void *vrqstp) } flush_signals(current); cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&lockd_manager); + locks_end_grace(&ln->lockd_manager); if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); -- cgit v1.2.3 From 5e1533c7880bb0df98f71fa683979ec296aa947d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:56:58 +0400 Subject: NFSd: make nfsd4_manager allocated per network namespace context. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 2 ++ fs/nfsd/nfs4state.c | 32 +++++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 39365636b244..e99767d987c8 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -34,6 +34,8 @@ struct nfsd_net { struct cache_detail *idtoname_cache; struct cache_detail *nametoid_cache; + + struct lock_manager nfsd4_manager; }; extern int nfsd_net_id; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fddb18b2e877..4a44b50c2f58 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -46,6 +46,8 @@ #include "current_stateid.h" #include "fault_inject.h" +#include "netns.h" + #define NFSDDBG_FACILITY NFSDDBG_PROC /* Globals */ @@ -3116,22 +3118,21 @@ out: return status; } -static struct lock_manager nfsd4_manager = { -}; - static bool grace_ended; static void -nfsd4_end_grace(void) +nfsd4_end_grace(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + /* do nothing if grace period already ended */ if (grace_ended) return; dprintk("NFSD: end of grace period\n"); grace_ended = true; - nfsd4_record_grace_done(&init_net, boot_time); - locks_end_grace(&nfsd4_manager); + nfsd4_record_grace_done(net, boot_time); + locks_end_grace(&nn->nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and * to see the (possibly new, possibly shorter) lease time, we @@ -3154,7 +3155,7 @@ nfs4_laundromat(void) nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - nfsd4_end_grace(); + nfsd4_end_grace(&init_net); INIT_LIST_HEAD(&reaplist); spin_lock(&client_lock); list_for_each_safe(pos, next, &client_lru) { @@ -4688,6 +4689,8 @@ set_max_delegations(void) int nfs4_state_start(void) { + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; /* @@ -4697,10 +4700,10 @@ nfs4_state_start(void) * to that instead and then do most of the rest of this on a per-net * basis. */ - get_net(&init_net); - nfsd4_client_tracking_init(&init_net); + get_net(net); + nfsd4_client_tracking_init(net); boot_time = get_seconds(); - locks_start_grace(&nfsd4_manager); + locks_start_grace(&nn->nfsd4_manager); grace_ended = false; printk(KERN_INFO "NFSD: starting %ld-second grace period\n", nfsd4_grace); @@ -4723,8 +4726,8 @@ nfs4_state_start(void) out_free_laundry: destroy_workqueue(laundry_wq); out_recovery: - nfsd4_client_tracking_exit(&init_net); - put_net(&init_net); + nfsd4_client_tracking_exit(net); + put_net(net); return ret; } @@ -4765,9 +4768,12 @@ __nfs4_state_shutdown(void) void nfs4_state_shutdown(void) { + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); - locks_end_grace(&nfsd4_manager); + locks_end_grace(&nn->nfsd4_manager); nfs4_lock_state(); __nfs4_state_shutdown(); nfs4_unlock_state(); -- cgit v1.2.3 From 9695c7057f4887ed54dc1e6c2ef22f72a2be1175 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:06 +0400 Subject: SUNRPC: service request network namespace helper introduced This is a cleanup patch - makes code looks simplier. It replaces widely used rqstp->rq_xprt->xpt_net by introduced SVC_NET(rqstp). Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 2 +- fs/nfs/callback_xdr.c | 4 ++-- fs/nfsd/export.c | 4 ++-- fs/nfsd/nfs4idmap.c | 4 ++-- include/linux/sunrpc/svc.h | 2 ++ 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0084ab853a2b..f9b22e58f78f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -331,7 +331,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); size_t src_len = rqstp->rq_daddrlen; - struct net *net = rqstp->rq_xprt->xpt_net; + struct net *net = SVC_NET(rqstp); struct nlm_lookup_host_info ni = { .server = 1, .sap = svc_addr(rqstp), diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e64b01d2a338..742ff4ffced7 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -863,7 +863,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r .drc_status = 0, .clp = NULL, .slotid = NFS4_NO_SLOT, - .net = rqstp->rq_xprt->xpt_net, + .net = SVC_NET(rqstp), }; unsigned int nops = 0; @@ -879,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 1114463bb856..a3946cf13fc8 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -929,7 +929,7 @@ struct svc_export * rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); - struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) @@ -960,7 +960,7 @@ struct svc_export * rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); - struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index dae36f1dee95..fdc91a6fc9c4 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -546,7 +546,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen .type = type, }; int ret; - struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (namelen + 1 > sizeof(key.name)) return nfserr_badowner; @@ -571,7 +571,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) .type = type, }; int ret; - struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 40e0a273faea..d83db800fe02 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -278,6 +278,8 @@ struct svc_rqst { struct task_struct *rq_task; /* service thread */ }; +#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) + /* * Rigorous type checking on sockaddr type conversions */ -- cgit v1.2.3 From db9c4553412d72c6a05e0168d1d487f66e0660b3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:13 +0400 Subject: LockD: manage grace list per network namespace Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/grace.c | 14 +++++++++++--- fs/lockd/netns.h | 1 + fs/lockd/svc.c | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c index 183cc1f0af1c..8dbaff782098 100644 --- a/fs/lockd/grace.c +++ b/fs/lockd/grace.c @@ -4,8 +4,10 @@ #include #include +#include + +#include "netns.h" -static LIST_HEAD(grace_list); static DEFINE_SPINLOCK(grace_lock); /** @@ -21,8 +23,11 @@ static DEFINE_SPINLOCK(grace_lock); */ void locks_start_grace(struct lock_manager *lm) { + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); + spin_lock(&grace_lock); - list_add(&lm->list, &grace_list); + list_add(&lm->list, &ln->grace_list); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); @@ -54,6 +59,9 @@ EXPORT_SYMBOL_GPL(locks_end_grace); */ int locks_in_grace(void) { - return !list_empty(&grace_list); + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); + + return !list_empty(&ln->grace_list); } EXPORT_SYMBOL_GPL(locks_in_grace); diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index e78650cb937c..4eee248ba96e 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -11,6 +11,7 @@ struct lockd_net { struct delayed_work grace_period_end; struct lock_manager lockd_manager; + struct list_head grace_list; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a9c436bc450c..834dfe2ed2e9 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -596,6 +596,7 @@ static int lockd_init_net(struct net *net) struct lockd_net *ln = net_generic(net, lockd_net_id); INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); + INIT_LIST_HEAD(&ln->grace_list); return 0; } -- cgit v1.2.3 From 5ccb0066f2d561549cc4d73d7f56b4ce3ca7a8a1 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:22 +0400 Subject: LockD: pass actual network namespace to grace period management functions Passed network namespace replaced hard-coded init_net Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/grace.c | 6 ++---- fs/lockd/svc.c | 16 +++++++++------- fs/lockd/svc4proc.c | 13 +++++++------ fs/lockd/svclock.c | 16 ++++++++-------- fs/lockd/svcproc.c | 15 +++++++++------ fs/nfsd/nfs4proc.c | 18 ++++++++++-------- fs/nfsd/nfs4state.c | 29 +++++++++++++++-------------- fs/nfsd/state.h | 3 ++- include/linux/fs.h | 5 +++-- include/linux/lockd/lockd.h | 4 ++-- 10 files changed, 67 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c index 8dbaff782098..6d1ee7204c88 100644 --- a/fs/lockd/grace.c +++ b/fs/lockd/grace.c @@ -21,9 +21,8 @@ static DEFINE_SPINLOCK(grace_lock); * * This function is called to start a grace period. */ -void locks_start_grace(struct lock_manager *lm) +void locks_start_grace(struct net *net, struct lock_manager *lm) { - struct net *net = &init_net; struct lockd_net *ln = net_generic(net, lockd_net_id); spin_lock(&grace_lock); @@ -57,9 +56,8 @@ EXPORT_SYMBOL_GPL(locks_end_grace); * to answer ordinary lock requests, and when they should accept only * lock reclaims. */ -int locks_in_grace(void) +int locks_in_grace(struct net *net) { - struct net *net = &init_net; struct lockd_net *ln = net_generic(net, lockd_net_id); return !list_empty(&ln->grace_list); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 834dfe2ed2e9..68271c206bdc 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -97,12 +97,12 @@ static void grace_ender(struct work_struct *grace) locks_end_grace(&ln->lockd_manager); } -static void set_grace_period(void) +static void set_grace_period(struct net *net) { unsigned long grace_period = get_lockd_grace_period(); - struct lockd_net *ln = net_generic(&init_net, lockd_net_id); + struct lockd_net *ln = net_generic(net, lockd_net_id); - locks_start_grace(&ln->lockd_manager); + locks_start_grace(net, &ln->lockd_manager); cancel_delayed_work_sync(&ln->grace_period_end); schedule_delayed_work(&ln->grace_period_end, grace_period); } @@ -110,12 +110,13 @@ static void set_grace_period(void) static void restart_grace(void) { if (nlmsvc_ops) { - struct lockd_net *ln = net_generic(&init_net, lockd_net_id); + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); nlmsvc_invalidate_all(); - set_grace_period(); + set_grace_period(net); } } @@ -127,7 +128,8 @@ lockd(void *vrqstp) { int err = 0, preverr = 0; struct svc_rqst *rqstp = vrqstp; - struct lockd_net *ln = net_generic(&init_net, lockd_net_id); + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -141,7 +143,7 @@ lockd(void *vrqstp) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - set_grace_period(); + set_grace_period(net); /* * The main request loop. We don't terminate until the last diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 9a41fdc19511..4a43d253c045 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -11,6 +11,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT @@ -151,7 +152,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(SVC_NET(rqstp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -161,7 +162,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Try to cancel request. */ - resp->status = nlmsvc_cancel_blocked(file, &argp->lock); + resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); nlmsvc_release_host(host); @@ -184,7 +185,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(SVC_NET(rqstp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -194,7 +195,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Now try to remove the lock */ - resp->status = nlmsvc_unlock(file, &argp->lock); + resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); nlmsvc_release_host(host); @@ -321,7 +322,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (locks_in_grace() && !argp->reclaim) { + if (locks_in_grace(SVC_NET(rqstp)) && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -354,7 +355,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(SVC_NET(rqstp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e46353f41a42..afe4488c33d8 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -447,11 +447,11 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - if (locks_in_grace() && !reclaim) { + if (locks_in_grace(SVC_NET(rqstp)) && !reclaim) { ret = nlm_lck_denied_grace_period; goto out; } - if (reclaim && !locks_in_grace()) { + if (reclaim && !locks_in_grace(SVC_NET(rqstp))) { ret = nlm_lck_denied_grace_period; goto out; } @@ -559,7 +559,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - if (locks_in_grace()) { + if (locks_in_grace(SVC_NET(rqstp))) { ret = nlm_lck_denied_grace_period; goto out; } @@ -603,7 +603,7 @@ out: * must be removed. */ __be32 -nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock) +nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) { int error; @@ -615,7 +615,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock) (long long)lock->fl.fl_end); /* First, cancel any lock that might be there */ - nlmsvc_cancel_blocked(file, lock); + nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); @@ -631,7 +631,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock) * The calling procedure must check whether the file can be closed. */ __be32 -nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) +nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *lock) { struct nlm_block *block; int status = 0; @@ -643,7 +643,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); - if (locks_in_grace()) + if (locks_in_grace(net)) return nlm_lck_denied_grace_period; mutex_lock(&file->f_mutex); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d27aab11f324..de8f2caa2235 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -11,6 +11,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT @@ -175,13 +176,14 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; + struct net *net = SVC_NET(rqstp); dprintk("lockd: CANCEL called\n"); resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(net)) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -191,7 +193,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Try to cancel request. */ - resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock)); + resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock)); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); nlmsvc_release_host(host); @@ -208,13 +210,14 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; + struct net *net = SVC_NET(rqstp); dprintk("lockd: UNLOCK called\n"); resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(net)) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -224,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; /* Now try to remove the lock */ - resp->status = cast_status(nlmsvc_unlock(file, &argp->lock)); + resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock)); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); nlmsvc_release_host(host); @@ -361,7 +364,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (locks_in_grace() && !argp->reclaim) { + if (locks_in_grace(SVC_NET(rqstp)) && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -394,7 +397,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (locks_in_grace()) { + if (locks_in_grace(SVC_NET(rqstp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 987e719fbae8..c9c1c0a25417 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -354,10 +354,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -686,7 +686,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, + if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, &read->rd_stateid, RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; @@ -741,7 +742,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (locks_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -760,8 +761,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags - & NFSEXP_NOSUBTREECHECK)) + if (locks_in_grace(SVC_NET(rqstp)) && + !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, @@ -845,7 +846,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(cstate, + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &setattr->sa_stateid, WR_STATE, NULL); nfs4_unlock_state(); if (status) { @@ -890,7 +891,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, stateid, WR_STATE, &filp); if (filp) get_file(filp); nfs4_unlock_state(); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4a44b50c2f58..34f65f10fa43 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2885,7 +2885,8 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * Attempt to hand out a delegation. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct net *net, struct svc_fh *fh, + struct nfsd4_open *open, struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); @@ -2906,7 +2907,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_ case NFS4_OPEN_CLAIM_NULL: /* Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (locks_in_grace()) + if (locks_in_grace(net)) goto out; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out; @@ -3040,7 +3041,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); + nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); nodeleg: status = nfs_ok; @@ -3279,11 +3280,11 @@ out: } static inline __be32 -check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) +check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags) { if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (locks_in_grace()) { + else if (locks_in_grace(net)) { /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -3300,9 +3301,9 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) * that are not able to provide mandatory locking. */ static inline int -grace_disallows_io(struct inode *inode) +grace_disallows_io(struct net *net, struct inode *inode) { - return locks_in_grace() && mandatory_lock(inode); + return locks_in_grace(net) && mandatory_lock(inode); } /* Returns true iff a is later than b: */ @@ -3393,7 +3394,7 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, s * Checks for stateid operations */ __be32 -nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, +nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { struct nfs4_stid *s; @@ -3406,11 +3407,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (filpp) *filpp = NULL; - if (grace_disallows_io(ino)) + if (grace_disallows_io(net, ino)) return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(current_fh, stateid, flags); + return check_special_stateids(net, current_fh, stateid, flags); status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); if (status) @@ -4107,10 +4108,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfserr_grace; - if (locks_in_grace() && !lock->lk_reclaim) + if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!locks_in_grace() && lock->lk_reclaim) + if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim) goto out; locks_init_lock(&file_lock); @@ -4210,7 +4211,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_lockowner *lo; __be32 status; - if (locks_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) @@ -4703,7 +4704,7 @@ nfs4_state_start(void) get_net(net); nfsd4_client_tracking_init(net); boot_time = get_seconds(); - locks_start_grace(&nn->nfsd4_manager); + locks_start_grace(net, &nn->nfsd4_manager); grace_ended = false; printk(KERN_INFO "NFSD: starting %ld-second grace period\n", nfsd4_grace); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 495df4e3aa67..981ef10141b3 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -451,7 +451,8 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) struct nfsd4_compound_state; -extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, +extern __be32 nfs4_preprocess_stateid_op(struct net *net, + struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 17fd887c798f..a1e77270f5a5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1163,9 +1163,10 @@ struct lock_manager { struct list_head list; }; -void locks_start_grace(struct lock_manager *); +struct net; +void locks_start_grace(struct net *, struct lock_manager *); void locks_end_grace(struct lock_manager *); -int locks_in_grace(void); +int locks_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 50e31a2c1a97..f5a051a79273 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -262,11 +262,11 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref); __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, struct nlm_cookie *, int); -__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); +__be32 nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *); __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); -__be32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); +__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); unsigned long nlmsvc_retry_blocked(void); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); -- cgit v1.2.3 From 5630f7fa97e8dfa2b3c6e7370c1702180336e493 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:29 +0400 Subject: Lockd: move grace period management from lockd() to per-net functions Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 68271c206bdc..31a63f87b806 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -128,8 +128,6 @@ lockd(void *vrqstp) { int err = 0, preverr = 0; struct svc_rqst *rqstp = vrqstp; - struct net *net = &init_net; - struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -143,8 +141,6 @@ lockd(void *vrqstp) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - set_grace_period(net); - /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. @@ -190,8 +186,6 @@ lockd(void *vrqstp) svc_process(rqstp); } flush_signals(current); - cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&ln->lockd_manager); if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); @@ -272,6 +266,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) error = make_socks(serv, net); if (error < 0) goto err_socks; + set_grace_period(net); dprintk("lockd_up_net: per-net data created; net=%p\n", net); return 0; @@ -289,6 +284,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); } -- cgit v1.2.3 From a51c84ed502c25fed996afb7696fd7db2fa32fe2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:37 +0400 Subject: NFSd: make grace end flag per network namespace Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 1 + fs/nfsd/nfs4state.c | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index e99767d987c8..b6deebd08ef2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -36,6 +36,7 @@ struct nfsd_net { struct cache_detail *nametoid_cache; struct lock_manager nfsd4_manager; + bool grace_ended; }; extern int nfsd_net_id; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 34f65f10fa43..aebb58d3ac4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3119,19 +3119,17 @@ out: return status; } -static bool grace_ended; - static void nfsd4_end_grace(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); /* do nothing if grace period already ended */ - if (grace_ended) + if (nn->grace_ended) return; dprintk("NFSD: end of grace period\n"); - grace_ended = true; + nn->grace_ended = true; nfsd4_record_grace_done(net, boot_time); locks_end_grace(&nn->nfsd4_manager); /* @@ -4705,7 +4703,7 @@ nfs4_state_start(void) nfsd4_client_tracking_init(net); boot_time = get_seconds(); locks_start_grace(net, &nn->nfsd4_manager); - grace_ended = false; + nn->grace_ended = false; printk(KERN_INFO "NFSD: starting %ld-second grace period\n", nfsd4_grace); ret = set_callback_cred(); -- cgit v1.2.3 From 2c142baa7b237584bae7dc28630851701497e1ef Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:45 +0400 Subject: NFSd: make boot_time variable per network namespace NFSd's boot_time represents grace period start point in time. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 1 + fs/nfsd/nfs4state.c | 39 +++++++++++++++++++++++---------------- fs/nfsd/state.h | 1 + 3 files changed, 25 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index b6deebd08ef2..65c2431ea32f 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -37,6 +37,7 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; + time_t boot_time; }; extern int nfsd_net_id; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aebb58d3ac4a..cc894eda385a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -53,7 +53,6 @@ /* Globals */ time_t nfsd4_lease = 90; /* default lease time */ time_t nfsd4_grace = 90; -static time_t boot_time; #define all_ones {{~0,~0},~0} static const stateid_t one_stateid = { @@ -1056,12 +1055,12 @@ renew_client(struct nfs4_client *clp) /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int -STALE_CLIENTID(clientid_t *clid) +STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { - if (clid->cl_boot == boot_time) + if (clid->cl_boot == nn->boot_time) return 0; dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", - clid->cl_boot, clid->cl_id, boot_time); + clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -1242,8 +1241,9 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) static void gen_clid(struct nfs4_client *clp) { static u32 current_clientid = 1; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); - clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_boot = nn->boot_time; clp->cl_clientid.cl_id = current_clientid++; } @@ -2226,8 +2226,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); - if (STALE_CLIENTID(clid)) + if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; nfs4_lock_state(); @@ -2586,8 +2587,9 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, unsigned int strhashval; struct nfs4_openowner *oo = NULL; __be32 status; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); - if (STALE_CLIENTID(&open->op_clientid)) + if (STALE_CLIENTID(&open->op_clientid, nn)) return nfserr_stale_clientid; /* * In case we need it later, after we've already created the @@ -3095,12 +3097,13 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_client *clp; __be32 status; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) + if (STALE_CLIENTID(clid, nn)) goto out; clp = find_confirmed_client(clid); status = nfserr_expired; @@ -3130,7 +3133,7 @@ nfsd4_end_grace(struct net *net) dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; - nfsd4_record_grace_done(net, boot_time); + nfsd4_record_grace_done(net, nn->boot_time); locks_end_grace(&nn->nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and @@ -3236,9 +3239,9 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s } static int -STALE_STATEID(stateid_t *stateid) +STALE_STATEID(stateid_t *stateid, struct nfsd_net *nn) { - if (stateid->si_opaque.so_clid.cl_boot == boot_time) + if (stateid->si_opaque.so_clid.cl_boot == nn->boot_time) return 0; dprintk("NFSD: stale stateid " STATEID_FMT "!\n", STATEID_VAL(stateid)); @@ -3373,10 +3376,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) { struct nfs4_client *cl; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - if (STALE_STATEID(stateid)) + if (STALE_STATEID(stateid, nn)) return nfserr_stale_stateid; cl = find_confirmed_client(&stateid->si_opaque.so_clid); if (!cl) @@ -4048,6 +4052,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool new_state = false; int lkflg; int err; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, @@ -4074,7 +4079,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, sizeof(clientid_t)); status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid, nn)) goto out; /* validate and update open stateid and open seqid */ @@ -4208,6 +4213,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock file_lock; struct nfs4_lockowner *lo; __be32 status; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; @@ -4218,7 +4224,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) + if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn)) goto out; if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) @@ -4367,6 +4373,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct list_head matches; unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); @@ -4374,7 +4381,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, /* XXX check for lease expiration */ status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) + if (STALE_CLIENTID(clid, nn)) return status; nfs4_lock_state(); @@ -4701,7 +4708,7 @@ nfs4_state_start(void) */ get_net(net); nfsd4_client_tracking_init(net); - boot_time = get_seconds(); + nn->boot_time = get_seconds(); locks_start_grace(net, &nn->nfsd4_manager); nn->grace_ended = false; printk(KERN_INFO "NFSD: starting %ld-second grace period\n", diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 981ef10141b3..e6173147f982 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -450,6 +450,7 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) #define WR_STATE 0x00000020 struct nfsd4_compound_state; +struct nfsd_net; extern __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, -- cgit v1.2.3 From 921a1650de9eed40dd64d681aba4a4d98856f289 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 01:15:31 +0400 Subject: new helper: done_path_create() releases what needs to be released after {kern,user}_path_create() Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/syscalls.c | 4 +--- drivers/base/devtmpfs.c | 9 ++------- fs/namei.c | 24 ++++++++++++------------ fs/ocfs2/refcounttree.c | 4 +--- include/linux/namei.h | 1 + net/unix/af_unix.c | 9 ++++----- 6 files changed, 21 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 8591bb62d7fc..5b7d8ffbf890 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -70,9 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, ret = PTR_ERR(dentry); if (!IS_ERR(dentry)) { ret = spufs_create(&path, dentry, flags, mode, neighbor); - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(dentry); - path_put(&path); + done_path_create(&path, dentry); } return ret; diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index d91a3a0b2325..deb4a456cf83 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -156,9 +156,7 @@ static int dev_mkdir(const char *name, umode_t mode) if (!err) /* mark as kernel-created inode */ dentry->d_inode->i_private = &thread; - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return err; } @@ -218,10 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev) /* mark as kernel-created inode */ dentry->d_inode->i_private = &thread; } - dput(dentry); - - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return err; } diff --git a/fs/namei.c b/fs/namei.c index 2ccc35c4dc24..5bc6f3d1dc8a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2893,6 +2893,14 @@ out: } EXPORT_SYMBOL(kern_path_create); +void done_path_create(struct path *path, struct dentry *dentry) +{ + dput(dentry); + mutex_unlock(&path->dentry->d_inode->i_mutex); + path_put(path); +} +EXPORT_SYMBOL(done_path_create); + struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) { char *tmp = getname(pathname); @@ -2989,9 +2997,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return error; } @@ -3048,9 +3054,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return error; } @@ -3334,9 +3338,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); out_putname: putname(from); return error; @@ -3446,9 +3448,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, out_drop_write: mnt_drop_write(new_path.mnt); out_dput: - dput(new_dentry); - mutex_unlock(&new_path.dentry->d_inode->i_mutex); - path_put(&new_path); + done_path_create(&new_path, new_dentry); out: path_put(&old_path); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9f32d7cbb7a3..23cf78f68503 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4477,9 +4477,7 @@ int ocfs2_reflink_ioctl(struct inode *inode, new_dentry, preserve); mnt_drop_write(new_path.mnt); out_dput: - dput(new_dentry); - mutex_unlock(&new_path.dentry->d_inode->i_mutex); - path_put(&new_path); + done_path_create(&new_path, new_dentry); out: path_put(&old_path); diff --git a/include/linux/namei.h b/include/linux/namei.h index d2ef8b34b967..4bf19d8174ed 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -67,6 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, int); extern struct dentry *user_path_create(int, const char __user *, struct path *, int); +extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f165..e8239540683a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -887,8 +887,9 @@ out_mknod_drop_write: mnt_drop_write(path.mnt); if (err) goto out_mknod_dput; - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(path.dentry); + mntget(path.mnt); + dget(dentry); + done_path_create(&path, dentry); path.dentry = dentry; addr->hash = UNIX_HASH_SIZE; @@ -923,9 +924,7 @@ out: return err; out_mknod_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); out_mknod_parent: if (err == -EEXIST) err = -EADDRINUSE; -- cgit v1.2.3 From 8e4bfca1d1f0de62301dd223675717e7a5f63a27 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 01:17:26 +0400 Subject: mknod: take sanity checks on mode into the very beginning Note that applying umask can't affect their results. While that affects errno in cases like mknod("/no_such_directory/a", 030000) yielding -EINVAL (due to impossible mode_t) instead of -ENOENT (due to inexistent directory), IMO that makes a lot more sense, POSIX allows to return either and any software that relies on getting -ENOENT instead of -EINVAL in that case deserves everything it gets. Signed-off-by: Al Viro --- fs/namei.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 5bc6f3d1dc8a..cf362dc9d1fd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2964,8 +2964,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, struct path path; int error; - if (S_ISDIR(mode)) - return -EPERM; + error = may_mknod(mode); + if (error) + return error; dentry = user_path_create(dfd, filename, &path, 0); if (IS_ERR(dentry)) @@ -2973,9 +2974,6 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = may_mknod(mode); - if (error) - goto out_dput; error = mnt_want_write(path.mnt); if (error) goto out_dput; -- cgit v1.2.3 From a8104a9fcdeb82e22d7acd55fca20746581067d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 02:25:00 +0400 Subject: pull mnt_want_write()/mnt_drop_write() into kern_path_create()/done_path_create() resp. One side effect - attempt to create a cross-device link on a read-only fs fails with EROFS instead of EXDEV now. Makes more sense, POSIX allows, etc. Signed-off-by: Al Viro --- fs/namei.c | 57 ++++++++++++++++--------------------------------- fs/ocfs2/refcounttree.c | 7 ------ net/unix/af_unix.c | 4 ---- 3 files changed, 18 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index cf362dc9d1fd..a3fb78fd70d2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2865,10 +2865,11 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) - goto fail; + goto unlock; + error = -EEXIST; if (dentry->d_inode) - goto eexist; + goto fail; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -2876,16 +2877,18 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path * been asking for (non-existent) directory. -ENOENT for you. */ if (unlikely(!is_dir && nd.last.name[nd.last.len])) { - dput(dentry); - dentry = ERR_PTR(-ENOENT); + error = -ENOENT; goto fail; } + error = mnt_want_write(nd.path.mnt); + if (error) + goto fail; *path = nd.path; return dentry; -eexist: - dput(dentry); - dentry = ERR_PTR(-EEXIST); fail: + dput(dentry); + dentry = ERR_PTR(error); +unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); out: path_put(&nd.path); @@ -2897,6 +2900,7 @@ void done_path_create(struct path *path, struct dentry *dentry) { dput(dentry); mutex_unlock(&path->dentry->d_inode->i_mutex); + mnt_drop_write(path->mnt); path_put(path); } EXPORT_SYMBOL(done_path_create); @@ -2974,12 +2978,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mknod(&path, dentry, mode, dev); if (error) - goto out_drop_write; + goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(path.dentry->d_inode,dentry,mode,true); @@ -2992,11 +2993,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); break; } -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: +out: done_path_create(&path, dentry); - return error; } @@ -3042,16 +3040,9 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mkdir(&path, dentry, mode); - if (error) - goto out_drop_write; - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: + if (!error) + error = vfs_mkdir(path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); return error; } @@ -3326,16 +3317,9 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (IS_ERR(dentry)) goto out_putname; - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_symlink(&path, dentry, from); - if (error) - goto out_drop_write; - error = vfs_symlink(path.dentry->d_inode, dentry, from); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: + if (!error) + error = vfs_symlink(path.dentry->d_inode, dentry, from); done_path_create(&path, dentry); out_putname: putname(from); @@ -3436,15 +3420,10 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; - error = mnt_want_write(new_path.mnt); - if (error) - goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) - goto out_drop_write; + goto out_dput; error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); -out_drop_write: - mnt_drop_write(new_path.mnt); out_dput: done_path_create(&new_path, new_dentry); out: diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 23cf78f68503..30a055049e16 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4466,16 +4466,9 @@ int ocfs2_reflink_ioctl(struct inode *inode, goto out_dput; } - error = mnt_want_write(new_path.mnt); - if (error) { - mlog_errno(error); - goto out_dput; - } - error = ocfs2_vfs_reflink(old_path.dentry, new_path.dentry->d_inode, new_dentry, preserve); - mnt_drop_write(new_path.mnt); out_dput: done_path_create(&new_path, new_dentry); out: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e8239540683a..88ab72820b9f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -876,15 +876,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(path.mnt); - if (err) - goto out_mknod_dput; err = security_path_mknod(&path, dentry, mode, 0); if (err) goto out_mknod_drop_write; err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); out_mknod_drop_write: - mnt_drop_write(path.mnt); if (err) goto out_mknod_dput; mntget(path.mnt); -- cgit v1.2.3 From bc65a1215eda3e067801e0a8f3eeffb62800f355 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 12:03:41 +0400 Subject: sanitize ecryptfs_lookup() * ->lookup() never gets hit with . or .. * dentry it gets is unhashed, so unless we had gone and hashed it ourselves, there's no need to d_drop() the sucker. * wrong name printed in one of the printks (NULL, in fact) Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ffa2be57804d..eeb734aea5ba 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -381,12 +381,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct dentry *lower_dir_dentry, *lower_dentry; int rc = 0; - if ((ecryptfs_dentry->d_name.len == 1 - && !strcmp(ecryptfs_dentry->d_name.name, ".")) - || (ecryptfs_dentry->d_name.len == 2 - && !strcmp(ecryptfs_dentry->d_name.name, ".."))) { - goto out_d_drop; - } lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); mutex_lock(&lower_dir_dentry->d_inode->i_mutex); lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, @@ -397,8 +391,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, rc = PTR_ERR(lower_dentry); ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, - encrypted_and_encoded_name); - goto out_d_drop; + ecryptfs_dentry->d_name.name); + goto out; } if (lower_dentry->d_inode) goto interpose; @@ -415,7 +409,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, if (rc) { printk(KERN_ERR "%s: Error attempting to encrypt and encode " "filename; rc = [%d]\n", __func__, rc); - goto out_d_drop; + goto out; } mutex_lock(&lower_dir_dentry->d_inode->i_mutex); lower_dentry = lookup_one_len(encrypted_and_encoded_name, @@ -427,14 +421,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); - goto out_d_drop; + goto out; } interpose: rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, ecryptfs_dir_inode); - goto out; -out_d_drop: - d_drop(ecryptfs_dentry); out: kfree(encrypted_and_encoded_name); return ERR_PTR(rc); -- cgit v1.2.3 From 0b1d90119a479ca3b70d871da4b2ce6c4ef9eff0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 12:09:19 +0400 Subject: ecryptfs_lookup_interpose(): allocate dentry_info first less work on failure that way Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index eeb734aea5ba..c3ca12c33ca2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, struct vfsmount *lower_mnt; int rc = 0; - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); - fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); - BUG_ON(!lower_dentry->d_count); - dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); - ecryptfs_set_dentry_private(dentry, dentry_info); if (!dentry_info) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to allocate ecryptfs_dentry_info struct\n", __func__); dput(lower_dentry); - mntput(lower_mnt); - d_drop(dentry); return -ENOMEM; } + + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); + BUG_ON(!lower_dentry->d_count); + + ecryptfs_set_dentry_private(dentry, dentry_info); ecryptfs_set_dentry_lower(dentry, lower_dentry); ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); -- cgit v1.2.3 From 5c33b183a36500a5b0a3c53c11c431f0fec6efc8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 23:05:59 +0400 Subject: uninline file_free_rcu() What inline? Its only use is passing its address to call_rcu(), for fuck sake! Signed-off-by: Al Viro --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index b3fc4d67a26b..b54bf7fd0b15 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; -static inline void file_free_rcu(struct rcu_head *head) +static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_u.fu_rcuhead); -- cgit v1.2.3 From b5bcdda32736b94a7d178d156d80a69f536ad468 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 23:28:46 +0400 Subject: take grabbing f->f_path to do_dentry_open() Signed-off-by: Al Viro --- fs/open.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 1e914b397e12..8d2c8970029c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -654,6 +654,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; + path_get(&f->f_path); inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, f->f_path.mnt); @@ -739,9 +740,7 @@ int finish_open(struct file *file, struct dentry *dentry, int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(file->f_path.mnt); - file->f_path.dentry = dget(dentry); - + file->f_path.dentry = dentry; error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -784,7 +783,6 @@ struct file *dentry_open(const struct path *path, int flags, f->f_flags = flags; f->f_path = *path; - path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); -- cgit v1.2.3 From e4fad8e5d220e3dfb1050eee752ee5058f29a232 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Jul 2012 15:33:25 +0400 Subject: consolidate pipe file creation Signed-off-by: Al Viro --- fs/exec.c | 19 ++++-------- fs/pipe.c | 75 ++++++++++++++++------------------------------- include/linux/fs.h | 3 -- include/linux/pipe_fs_i.h | 2 ++ 4 files changed, 34 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index da27b91ff1e8..b800fb87f6ce 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file) */ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { - struct file *rp, *wp; + struct file *files[2]; struct fdtable *fdt; struct coredump_params *cp = (struct coredump_params *)info->data; struct files_struct *cf = current->files; + int err = create_pipe_files(files, 0); + if (err) + return err; - wp = create_write_pipe(0); - if (IS_ERR(wp)) - return PTR_ERR(wp); - - rp = create_read_pipe(wp, 0); - if (IS_ERR(rp)) { - free_write_pipe(wp); - return PTR_ERR(rp); - } - - cp->file = wp; + cp->file = files[1]; sys_close(0); - fd_install(0, rp); + fd_install(0, files[0]); spin_lock(&cf->file_lock); fdt = files_fdtable(cf); __set_open_fd(0, fdt); diff --git a/fs/pipe.c b/fs/pipe.c index 49c1065256fd..7523d9d2a998 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1016,18 +1016,16 @@ fail_inode: return NULL; } -struct file *create_write_pipe(int flags) +int create_pipe_files(struct file **res, int flags) { int err; - struct inode *inode; + struct inode *inode = get_pipe_inode(); struct file *f; struct path path; - struct qstr name = { .name = "" }; + static struct qstr name = { .name = "" }; - err = -ENFILE; - inode = get_pipe_inode(); if (!inode) - goto err; + return -ENFILE; err = -ENOMEM; path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); @@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags) f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); if (!f) goto err_dentry; - f->f_mapping = inode->i_mapping; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); - f->f_version = 0; - return f; + res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); + if (!res[0]) + goto err_file; + + path_get(&path); + res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); + res[1] = f; + return 0; - err_dentry: +err_file: + put_filp(f); +err_dentry: free_pipe_info(inode); path_put(&path); - return ERR_PTR(err); + return err; - err_inode: +err_inode: free_pipe_info(inode); iput(inode); - err: - return ERR_PTR(err); -} - -void free_write_pipe(struct file *f) -{ - free_pipe_info(f->f_dentry->d_inode); - path_put(&f->f_path); - put_filp(f); -} - -struct file *create_read_pipe(struct file *wrf, int flags) -{ - /* Grab pipe from the writer */ - struct file *f = alloc_file(&wrf->f_path, FMODE_READ, - &read_pipefifo_fops); - if (!f) - return ERR_PTR(-ENFILE); - - path_get(&wrf->f_path); - f->f_flags = O_RDONLY | (flags & O_NONBLOCK); - - return f; + return err; } int do_pipe_flags(int *fd, int flags) { - struct file *fw, *fr; + struct file *files[2]; int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; - fw = create_write_pipe(flags); - if (IS_ERR(fw)) - return PTR_ERR(fw); - fr = create_read_pipe(fw, flags); - error = PTR_ERR(fr); - if (IS_ERR(fr)) - goto err_write_pipe; + error = create_pipe_files(files, flags); + if (error) + return error; error = get_unused_fd_flags(flags); if (error < 0) @@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags) fdw = error; audit_fd_pair(fdr, fdw); - fd_install(fdr, fr); - fd_install(fdw, fw); + fd_install(fdr, files[0]); + fd_install(fdw, files[1]); fd[0] = fdr; fd[1] = fdw; @@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags) err_fdr: put_unused_fd(fdr); err_read_pipe: - path_put(&fr->f_path); - put_filp(fr); - err_write_pipe: - free_write_pipe(fw); + fput(files[0]); + fput(files[1]); return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 8fabb037a48d..478237844648 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2326,9 +2326,6 @@ static inline void i_readcount_inc(struct inode *inode) } #endif extern int do_pipe_flags(int *, int); -extern struct file *create_read_pipe(struct file *f, int flags); -extern struct file *create_write_pipe(int flags); -extern void free_write_pipe(struct file *); extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e1ac1ce16fb0..e16dcb31f0c7 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -162,4 +162,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file); +int create_pipe_files(struct file **, int); + #endif -- cgit v1.2.3 From 3134f37e931d75931bdf6d4eacd82a3fd26eca7c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Jul 2012 10:19:47 -0400 Subject: vfs: don't let do_last pass negative dentry to audit_inode I can reliably reproduce the following panic by simply setting an audit rule on a recent 3.5.0+ kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] audit_copy_inode+0x10/0x90 PGD 7acd9067 PUD 7b8fb067 PMD 0 Oops: 0000 [#86] SMP Modules linked in: nfs nfs_acl auth_rpcgss fscache lockd sunrpc tpm_bios btrfs zlib_deflate libcrc32c kvm_amd kvm joydev virtio_net pcspkr i2c_piix4 floppy virtio_balloon microcode virtio_blk cirrus drm_kms_helper ttm drm i2c_core [last unloaded: scsi_wait_scan] CPU 0 Pid: 1286, comm: abrt-dump-oops Tainted: G D 3.5.0+ #1 Bochs Bochs RIP: 0010:[] [] audit_copy_inode+0x10/0x90 RSP: 0018:ffff88007aebfc38 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88003692d860 RCX: 00000000000038c4 RDX: 0000000000000000 RSI: ffff88006baf5d80 RDI: ffff88003692d860 RBP: ffff88007aebfc68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffff880036d30f00 R14: ffff88006baf5d80 R15: ffff88003692d800 FS: 00007f7562634740(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 000000003643d000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process abrt-dump-oops (pid: 1286, threadinfo ffff88007aebe000, task ffff880079614530) Stack: ffff88007aebfdf8 ffff88007aebff28 ffff88007aebfc98 ffffffff81211358 ffff88003692d860 0000000000000000 ffff88007aebfcc8 ffffffff810d4968 ffff88007aebfcc8 ffff8800000038c4 0000000000000000 0000000000000000 Call Trace: [] ? ext4_lookup+0xe8/0x160 [] __audit_inode+0x118/0x2d0 [] do_last+0x999/0xe80 [] ? inode_permission+0x18/0x50 [] ? kmem_cache_alloc_trace+0x11a/0x130 [] path_openat+0xba/0x420 [] do_filp_open+0x41/0xa0 [] ? alloc_fd+0x4d/0x120 [] do_sys_open+0xed/0x1c0 [] ? __audit_syscall_entry+0xcc/0x300 [] sys_open+0x21/0x30 [] system_call_fastpath+0x16/0x1b RSP CR2: 0000000000000040 The problem is that do_last is passing a negative dentry to audit_inode. The comments on lookup_open note that it can pass back a negative dentry if O_CREAT is not set. This patch fixes the oops, but I'm not clear on whether there's a better approach. Cc: Miklos Szeredi Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index a3fb78fd70d2..afa087649ddb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2608,9 +2608,10 @@ retry_lookup: } /* - * It already exists. + * create/update audit record if it already exists. */ - audit_inode(pathname, path->dentry); + if (path->dentry->d_inode) + audit_inode(pathname, path->dentry); /* * If atomic_open() acquired write access it is dropped now due to -- cgit v1.2.3 From 800179c9b8a1e796e441674776d11cd4c05d61d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jul 2012 17:29:07 -0700 Subject: fs: add link restrictions This adds symlink and hardlink restrictions to the Linux VFS. Symlinks: A long-standing class of security issues is the symlink-based time-of-check-time-of-use race, most commonly seen in world-writable directories like /tmp. The common method of exploitation of this flaw is to cross privilege boundaries when following a given symlink (i.e. a root process follows a symlink belonging to another user). For a likely incomplete list of hundreds of examples across the years, please see: http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp The solution is to permit symlinks to only be followed when outside a sticky world-writable directory, or when the uid of the symlink and follower match, or when the directory owner matches the symlink's owner. Some pointers to the history of earlier discussion that I could find: 1996 Aug, Zygo Blaxell http://marc.info/?l=bugtraq&m=87602167419830&w=2 1996 Oct, Andrew Tridgell http://lkml.indiana.edu/hypermail/linux/kernel/9610.2/0086.html 1997 Dec, Albert D Cahalan http://lkml.org/lkml/1997/12/16/4 2005 Feb, Lorenzo Hernández García-Hierro http://lkml.indiana.edu/hypermail/linux/kernel/0502.0/1896.html 2010 May, Kees Cook https://lkml.org/lkml/2010/5/30/144 Past objections and rebuttals could be summarized as: - Violates POSIX. - POSIX didn't consider this situation and it's not useful to follow a broken specification at the cost of security. - Might break unknown applications that use this feature. - Applications that break because of the change are easy to spot and fix. Applications that are vulnerable to symlink ToCToU by not having the change aren't. Additionally, no applications have yet been found that rely on this behavior. - Applications should just use mkstemp() or O_CREATE|O_EXCL. - True, but applications are not perfect, and new software is written all the time that makes these mistakes; blocking this flaw at the kernel is a single solution to the entire class of vulnerability. - This should live in the core VFS. - This should live in an LSM. (https://lkml.org/lkml/2010/5/31/135) - This should live in an LSM. - This should live in the core VFS. (https://lkml.org/lkml/2010/8/2/188) Hardlinks: On systems that have user-writable directories on the same partition as system files, a long-standing class of security issues is the hardlink-based time-of-check-time-of-use race, most commonly seen in world-writable directories like /tmp. The common method of exploitation of this flaw is to cross privilege boundaries when following a given hardlink (i.e. a root process follows a hardlink created by another user). Additionally, an issue exists where users can "pin" a potentially vulnerable setuid/setgid file so that an administrator will not actually upgrade a system fully. The solution is to permit hardlinks to only be created when the user is already the existing file's owner, or if they already have read/write access to the existing file. Many Linux users are surprised when they learn they can link to files they have no access to, so this change appears to follow the doctrine of "least surprise". Additionally, this change does not violate POSIX, which states "the implementation may require that the calling process has permission to access the existing file"[1]. This change is known to break some implementations of the "at" daemon, though the version used by Fedora and Ubuntu has been fixed[2] for a while. Otherwise, the change has been undisruptive while in use in Ubuntu for the last 1.5 years. [1] http://pubs.opengroup.org/onlinepubs/9699919799/functions/linkat.html [2] http://anonscm.debian.org/gitweb/?p=collab-maint/at.git;a=commitdiff;h=f4114656c3a6c6f6070e315ffdf940a49eda3279 This patch is based on the patches in Openwall and grsecurity, along with suggestions from Al Viro. I have added a sysctl to enable the protected behavior, and documentation. Signed-off-by: Kees Cook Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- Documentation/sysctl/fs.txt | 42 +++++++++++++++ fs/namei.c | 122 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 + kernel/sysctl.c | 18 +++++++ 4 files changed, 184 insertions(+) (limited to 'fs') diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 13d6166d7a27..d4a372e75750 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs: - nr_open - overflowuid - overflowgid +- protected_hardlinks +- protected_symlinks - suid_dumpable - super-max - super-nr @@ -157,6 +159,46 @@ The default is 65534. ============================================================== +protected_hardlinks: + +A long-standing class of security issues is the hardlink-based +time-of-check-time-of-use race, most commonly seen in world-writable +directories like /tmp. The common method of exploitation of this flaw +is to cross privilege boundaries when following a given hardlink (i.e. a +root process follows a hardlink created by another user). Additionally, +on systems without separated partitions, this stops unauthorized users +from "pinning" vulnerable setuid/setgid files against being upgraded by +the administrator, or linking to special files. + +When set to "0", hardlink creation behavior is unrestricted. + +When set to "1" hardlinks cannot be created by users if they do not +already own the source file, or do not have read/write access to it. + +This protection is based on the restrictions in Openwall and grsecurity. + +============================================================== + +protected_symlinks: + +A long-standing class of security issues is the symlink-based +time-of-check-time-of-use race, most commonly seen in world-writable +directories like /tmp. The common method of exploitation of this flaw +is to cross privilege boundaries when following a given symlink (i.e. a +root process follows a symlink belonging to another user). For a likely +incomplete list of hundreds of examples across the years, please see: +http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp + +When set to "0", symlink following behavior is unrestricted. + +When set to "1" symlinks are permitted to be followed only when outside +a sticky world-writable directory, or when the uid of the symlink and +follower match, or when the directory owner matches the symlink's owner. + +This protection is based on the restrictions in Openwall and grsecurity. + +============================================================== + suid_dumpable: This value can be used to query and set the core dump mode for setuid diff --git a/fs/namei.c b/fs/namei.c index afa087649ddb..3861d85f8488 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -650,6 +650,119 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki path_put(link); } +int sysctl_protected_symlinks __read_mostly = 1; +int sysctl_protected_hardlinks __read_mostly = 1; + +/** + * may_follow_link - Check symlink following for unsafe situations + * @link: The path of the symlink + * + * In the case of the sysctl_protected_symlinks sysctl being enabled, + * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is + * in a sticky world-writable directory. This is to protect privileged + * processes from failing races against path names that may change out + * from under them by way of other users creating malicious symlinks. + * It will permit symlinks to be followed only when outside a sticky + * world-writable directory, or when the uid of the symlink and follower + * match, or when the directory owner matches the symlink's owner. + * + * Returns 0 if following the symlink is allowed, -ve on error. + */ +static inline int may_follow_link(struct path *link, struct nameidata *nd) +{ + const struct inode *inode; + const struct inode *parent; + + if (!sysctl_protected_symlinks) + return 0; + + /* Allowed if owner and follower match. */ + inode = link->dentry->d_inode; + if (current_cred()->fsuid == inode->i_uid) + return 0; + + /* Allowed if parent directory not sticky and world-writable. */ + parent = nd->path.dentry->d_inode; + if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) + return 0; + + /* Allowed if parent directory and link owner match. */ + if (parent->i_uid == inode->i_uid) + return 0; + + path_put_conditional(link, nd); + path_put(&nd->path); + return -EACCES; +} + +/** + * safe_hardlink_source - Check for safe hardlink conditions + * @inode: the source inode to hardlink from + * + * Return false if at least one of the following conditions: + * - inode is not a regular file + * - inode is setuid + * - inode is setgid and group-exec + * - access failure for read and write + * + * Otherwise returns true. + */ +static bool safe_hardlink_source(struct inode *inode) +{ + umode_t mode = inode->i_mode; + + /* Special files should not get pinned to the filesystem. */ + if (!S_ISREG(mode)) + return false; + + /* Setuid files should not get pinned to the filesystem. */ + if (mode & S_ISUID) + return false; + + /* Executable setgid files should not get pinned to the filesystem. */ + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) + return false; + + /* Hardlinking to unreadable or unwritable sources is dangerous. */ + if (inode_permission(inode, MAY_READ | MAY_WRITE)) + return false; + + return true; +} + +/** + * may_linkat - Check permissions for creating a hardlink + * @link: the source to hardlink from + * + * Block hardlink when all of: + * - sysctl_protected_hardlinks enabled + * - fsuid does not match inode + * - hardlink source is unsafe (see safe_hardlink_source() above) + * - not CAP_FOWNER + * + * Returns 0 if successful, -ve on error. + */ +static int may_linkat(struct path *link) +{ + const struct cred *cred; + struct inode *inode; + + if (!sysctl_protected_hardlinks) + return 0; + + cred = current_cred(); + inode = link->dentry->d_inode; + + /* Source inode owner (or CAP_FOWNER) can hardlink all they like, + * otherwise, it must be a safe source. + */ + if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || + capable(CAP_FOWNER)) + return 0; + + return -EPERM; +} + static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { @@ -1818,6 +1931,9 @@ static int path_lookupat(int dfd, const char *name, while (err > 0) { void *cookie; struct path link = path; + err = may_follow_link(&link, nd); + if (unlikely(err)) + break; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); if (err) @@ -2778,6 +2894,9 @@ static struct file *path_openat(int dfd, const char *pathname, error = -ELOOP; break; } + error = may_follow_link(&link, nd); + if (unlikely(error)) + break; nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); @@ -3421,6 +3540,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; + error = may_linkat(&old_path); + if (unlikely(error)) + goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; diff --git a/include/linux/fs.h b/include/linux/fs.h index 478237844648..80c819cbe272 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -437,6 +437,8 @@ extern unsigned long get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; +extern int sysctl_protected_symlinks; +extern int sysctl_protected_hardlinks; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb4..5d9a1d2b27b4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1493,6 +1493,24 @@ static struct ctl_table fs_table[] = { }, #endif #endif + { + .procname = "protected_symlinks", + .data = &sysctl_protected_symlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "protected_hardlinks", + .data = &sysctl_protected_hardlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { .procname = "suid_dumpable", .data = &suid_dumpable, -- cgit v1.2.3 From a51d9eaa41866ab6b4b6ecad7b621f8b66ece0dc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jul 2012 17:29:08 -0700 Subject: fs: add link restriction audit reporting Adds audit messages for unexpected link restriction violations so that system owners will have some sort of potentially actionable information about misbehaving processes. Signed-off-by: Kees Cook Signed-off-by: Al Viro --- fs/namei.c | 2 ++ include/linux/audit.h | 4 ++++ kernel/audit.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 3861d85f8488..618d3531cf9f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -692,6 +692,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) path_put_conditional(link, nd); path_put(&nd->path); + audit_log_link_denied("follow_link", link); return -EACCES; } @@ -760,6 +761,7 @@ static int may_linkat(struct path *link) capable(CAP_FOWNER)) return 0; + audit_log_link_denied("linkat", link); return -EPERM; } diff --git a/include/linux/audit.h b/include/linux/audit.h index 22f292a917a3..36abf2aa7e68 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -130,6 +130,7 @@ #define AUDIT_LAST_KERN_ANOM_MSG 1799 #define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ #define AUDIT_ANOM_ABEND 1701 /* Process ended abnormally */ +#define AUDIT_ANOM_LINK 1702 /* Suspicious use of file links */ #define AUDIT_INTEGRITY_DATA 1800 /* Data integrity verification */ #define AUDIT_INTEGRITY_METADATA 1801 /* Metadata integrity verification */ #define AUDIT_INTEGRITY_STATUS 1802 /* Integrity enable status */ @@ -687,6 +688,8 @@ extern void audit_log_d_path(struct audit_buffer *ab, const struct path *path); extern void audit_log_key(struct audit_buffer *ab, char *key); +extern void audit_log_link_denied(const char *operation, + struct path *link); extern void audit_log_lost(const char *message); #ifdef CONFIG_SECURITY extern void audit_log_secctx(struct audit_buffer *ab, u32 secid); @@ -716,6 +719,7 @@ extern int audit_enabled; #define audit_log_untrustedstring(a,s) do { ; } while (0) #define audit_log_d_path(b, p, d) do { ; } while (0) #define audit_log_key(b, k) do { ; } while (0) +#define audit_log_link_denied(o, l) do { ; } while (0) #define audit_log_secctx(b,s) do { ; } while (0) #define audit_enabled 0 #endif diff --git a/kernel/audit.c b/kernel/audit.c index 1c7f2c61416b..fda8bd9e1d3a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1449,6 +1449,27 @@ void audit_log_key(struct audit_buffer *ab, char *key) audit_log_format(ab, "(null)"); } +/** + * audit_log_link_denied - report a link restriction denial + * @operation: specific link opreation + * @link: the path that triggered the restriction + */ +void audit_log_link_denied(const char *operation, struct path *link) +{ + struct audit_buffer *ab; + + ab = audit_log_start(current->audit_context, GFP_KERNEL, + AUDIT_ANOM_LINK); + audit_log_format(ab, "op=%s action=denied", operation); + audit_log_format(ab, " pid=%d comm=", current->pid); + audit_log_untrustedstring(ab, current->comm); + audit_log_d_path(ab, " path=", link); + audit_log_format(ab, " dev="); + audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id); + audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino); + audit_log_end(ab); +} + /** * audit_log_end - end one audit record * @ab: the audit_buffer -- cgit v1.2.3 From 446945ab9a82515af4b099107eda27050e077c58 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Jul 2012 00:39:50 +0400 Subject: lockd: shift grabbing a reference to nlm_host into nlm_alloc_call() It's used both for client and server hosts; we can't do nlmclnt_release_host() on failure exits, since the host might need nlmsvc_release_host(), with BUG_ON() for calling the wrong one. Makes life simpler for callers, actually... Signed-off-by: Al Viro --- fs/lockd/clntproc.c | 9 ++------- fs/lockd/svc4proc.c | 1 + fs/lockd/svclock.c | 1 - fs/lockd/svcproc.c | 1 + 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 8392cb85bd54..27c74f32671b 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -156,7 +156,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) struct nlm_rqst *call; int status; - nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return -ENOMEM; @@ -185,9 +184,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc); /* * Allocate an NLM RPC call struct - * - * Note: the caller must hold a reference to host. In case of failure, - * this reference will be released. */ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) { @@ -199,7 +195,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) atomic_set(&call->a_count, 1); locks_init_lock(&call->a_args.lock.fl); locks_init_lock(&call->a_res.lock.fl); - call->a_host = host; + call->a_host = nlm_get_host(host); return call; } if (signalled()) @@ -207,7 +203,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) printk("nlm_alloc_call: failed, waiting for memory\n"); schedule_timeout_interruptible(5*HZ); } - nlmclnt_release_host(host); return NULL; } @@ -750,7 +745,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" " Attempting to cancel lock.\n"); - req = nlm_alloc_call(nlm_get_host(host)); + req = nlm_alloc_call(host); if (!req) return -ENOMEM; req->a_flags = RPC_TASK_ASYNC; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 9a41fdc19511..185fda894789 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -256,6 +256,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args return rpc_system_err; call = nlm_alloc_call(host); + nlmsvc_release_host(host); if (call == NULL) return rpc_system_err; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e46353f41a42..b54acaf65987 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, struct nlm_block *block; struct nlm_rqst *call = NULL; - nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return NULL; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d27aab11f324..90cfe9a0bf55 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -294,6 +294,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args return rpc_system_err; call = nlm_alloc_call(host); + nlmsvc_release_host(host); if (call == NULL) return rpc_system_err; -- cgit v1.2.3 From bf8848918d751c1fb86f6514a75bf8d406f1c3c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Jul 2012 23:17:39 +0400 Subject: lockd: handle lockowner allocation failure in nlmclnt_proc() Signed-off-by: Al Viro --- fs/lockd/clntproc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 27c74f32671b..05d29124c6ab 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -161,6 +161,11 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) return -ENOMEM; nlmclnt_locks_init_private(fl, host); + if (!fl->fl_u.nfs_fl.owner) { + /* lockowner allocation has failed */ + nlmclnt_release_call(call); + return -ENOMEM; + } /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); -- cgit v1.2.3 From f2ecc5e453134a13c3b2b0f2cac52ab2d5c540d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:46 -0400 Subject: xfs: split xfs_dialloc Move the actual allocation once we have selected an allocation group into a separate helper, and make xfs_dialloc a wrapper around it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 349 ++++++++++++++++++++++++++-------------------------- 1 file changed, 174 insertions(+), 175 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 30b816d1f7e0..a124b9f88aae 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -607,188 +607,35 @@ xfs_ialloc_get_rec( } /* - * Visible inode allocation functions. - */ - -/* - * Allocate an inode on disk. - * Mode is used to tell whether the new inode will need space, and whether - * it is a directory. + * Allocate an inode. * - * The arguments IO_agbp and alloc_done are defined to work within - * the constraint of one allocation per transaction. - * xfs_dialloc() is designed to be called twice if it has to do an - * allocation to make more free inodes. On the first call, - * IO_agbp should be set to NULL. If an inode is available, - * i.e., xfs_dialloc() did not need to do an allocation, an inode - * number is returned. In this case, IO_agbp would be set to the - * current ag_buf and alloc_done set to false. - * If an allocation needed to be done, xfs_dialloc would return - * the current ag_buf in IO_agbp and set alloc_done to true. - * The caller should then commit the current transaction, allocate a new - * transaction, and call xfs_dialloc() again, passing in the previous - * value of IO_agbp. IO_agbp should be held across the transactions. - * Since the agbp is locked across the two calls, the second call is - * guaranteed to have a free inode available. - * - * Once we successfully pick an inode its number is returned and the - * on-disk data structures are updated. The inode itself is not read - * in, since doing so would break ordering constraints with xfs_reclaim. + * The caller selected an AG for us, and made sure that free inodes are + * available. */ -int -xfs_dialloc( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t parent, /* parent inode (directory) */ - umode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ - xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ - boolean_t *alloc_done, /* true if we needed to replenish - inode freelist */ - xfs_ino_t *inop) /* inode number allocated */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) { - xfs_agnumber_t agcount; /* number of allocation groups */ - xfs_buf_t *agbp; /* allocation group header's buffer */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agi_t *agi; /* allocation group header structure */ - xfs_btree_cur_t *cur; /* inode allocation btree cursor */ - int error; /* error return value */ - int i; /* result code */ - int ialloced; /* inode allocation status */ - int noroom = 0; /* no space for inode blk allocation */ - xfs_ino_t ino; /* fs-relative inode to be returned */ - /* REFERENCED */ - int j; /* result code */ - xfs_mount_t *mp; /* file system mount structure */ - int offset; /* index of inode in chunk */ - xfs_agino_t pagino; /* parent's AG relative inode # */ - xfs_agnumber_t pagno; /* parent's AG number */ - xfs_inobt_rec_incore_t rec; /* inode allocation record */ - xfs_agnumber_t tagno; /* testing allocation group number */ - xfs_btree_cur_t *tcur; /* temp cursor */ - xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ - struct xfs_perag *pag; - - - if (*IO_agbp == NULL) { - /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. - */ - agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); - /* - * Couldn't find an allocation group satisfying the - * criteria, give up. - */ - if (!agbp) { - *inop = NULLFSINO; - return 0; - } - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - } else { - /* - * Continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - } - mp = tp->t_mountp; - agcount = mp->m_sb.sb_agcount; - agno = be32_to_cpu(agi->agi_seqno); - tagno = agno; - pagno = XFS_INO_TO_AGNO(mp, parent); - pagino = XFS_INO_TO_AGINO(mp, parent); - - /* - * If we have already hit the ceiling of inode blocks then clear - * okalloc so we scan all available agi structures for a free - * inode. - */ - - if (mp->m_maxicount && - mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { - noroom = 1; - okalloc = 0; - } + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur, *tcur; + struct xfs_inobt_rec_incore rec, trec; + xfs_ino_t ino; + int error; + int offset; + int i, j; - /* - * Loop until we find an allocation group that either has free inodes - * or in which we can allocate some inodes. Iterate through the - * allocation groups upward, wrapping at the end. - */ - *alloc_done = B_FALSE; - while (!agi->agi_freecount) { - /* - * Don't do anything if we're not supposed to allocate - * any blocks, just go on to the next ag. - */ - if (okalloc) { - /* - * Try to allocate some new inodes in the allocation - * group. - */ - if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { - xfs_trans_brelse(tp, agbp); - if (error == ENOSPC) { - *inop = NULLFSINO; - return 0; - } else - return error; - } - if (ialloced) { - /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. - */ - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - *alloc_done = B_TRUE; - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; - } - } - /* - * If it failed, give up on this ag. - */ - xfs_trans_brelse(tp, agbp); - /* - * Go on to the next ag: get its ag header. - */ -nextag: - if (++tagno == agcount) - tagno = 0; - if (tagno == agno) { - *inop = NULLFSINO; - return noroom ? ENOSPC : 0; - } - pag = xfs_perag_get(mp, tagno); - if (pag->pagi_inodeok == 0) { - xfs_perag_put(pag); - goto nextag; - } - error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); - xfs_perag_put(pag); - if (error) - goto nextag; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - } - /* - * Here with an allocation group that has a free inode. - * Reset agno since we may have chosen a new ag in the - * loop above. - */ - agno = tagno; - *IO_agbp = NULL; pag = xfs_perag_get(mp, agno); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1020,6 +867,158 @@ error0: return error; } +/* + * Allocate an inode on disk. + * + * Mode is used to tell whether the new inode will need space, and whether it + * is a directory. + * + * This function is designed to be called twice if it has to do an allocation + * to make more free inodes. On the first call, *IO_agbp should be set to NULL. + * If an inode is available without having to performn an allocation, an inode + * number is returned. In this case, *IO_agbp would be NULL. If an allocation + * needes to be done, xfs_dialloc would return the current AGI buffer in + * *IO_agbp. The caller should then commit the current transaction, allocate a + * new transaction, and call xfs_dialloc() again, passing in the previous value + * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI + * buffer is locked across the two calls, the second call is guaranteed to have + * a free inode available. + * + * Once we successfully pick an inode its number is returned and the on-disk + * data structures are updated. The inode itself is not read in, since doing so + * would break ordering constraints with xfs_reclaim. + */ +int +xfs_dialloc( + struct xfs_trans *tp, + xfs_ino_t parent, + umode_t mode, + int okalloc, + struct xfs_buf **IO_agbp, + boolean_t *alloc_done, + xfs_ino_t *inop) +{ + struct xfs_buf *agbp; + xfs_agnumber_t agno; + struct xfs_agi *agi; + int error; + int ialloced; + int noroom = 0; + struct xfs_mount *mp; + xfs_agnumber_t tagno; + struct xfs_perag *pag; + + if (*IO_agbp == NULL) { + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + /* + * Couldn't find an allocation group satisfying the + * criteria, give up. + */ + if (!agbp) { + *inop = NULLFSINO; + return 0; + } + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + } else { + /* + * Continue where we left off before. In this case, we + * know that the allocation group has free inodes. + */ + agbp = *IO_agbp; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + ASSERT(be32_to_cpu(agi->agi_freecount) > 0); + } + mp = tp->t_mountp; + agno = be32_to_cpu(agi->agi_seqno); + tagno = agno; + + /* + * If we have already hit the ceiling of inode blocks then clear + * okalloc so we scan all available agi structures for a free + * inode. + */ + + if (mp->m_maxicount && + mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + noroom = 1; + okalloc = 0; + } + + /* + * Loop until we find an allocation group that either has free inodes + * or in which we can allocate some inodes. Iterate through the + * allocation groups upward, wrapping at the end. + */ + *alloc_done = B_FALSE; + while (!agi->agi_freecount) { + /* + * Don't do anything if we're not supposed to allocate + * any blocks, just go on to the next ag. + */ + if (okalloc) { + /* + * Try to allocate some new inodes in the allocation + * group. + */ + if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { + xfs_trans_brelse(tp, agbp); + if (error == ENOSPC) { + *inop = NULLFSINO; + return 0; + } else + return error; + } + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(be32_to_cpu(agi->agi_freecount) > 0); + *alloc_done = B_TRUE; + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; + } + } + /* + * If it failed, give up on this ag. + */ + xfs_trans_brelse(tp, agbp); + /* + * Go on to the next ag: get its ag header. + */ +nextag: + if (++tagno == mp->m_sb.sb_agcount) + tagno = 0; + if (tagno == agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } + pag = xfs_perag_get(mp, tagno); + if (pag->pagi_inodeok == 0) { + xfs_perag_put(pag); + goto nextag; + } + error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + xfs_perag_put(pag); + if (error) + goto nextag; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + } + + *IO_agbp = NULL; + return xfs_dialloc_ag(tp, agbp, parent, inop); +} + /* * Free disk inode. Carefully avoids touching the incore inode, all * manipulations incore are the caller's responsibility. -- cgit v1.2.3 From 08358906ed78f6ab4d3ff8e4fd1b87b9a4aea645 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:47 -0400 Subject: xfs: remove the alloc_done argument to xfs_dialloc We can simplify check the IO_agbp pointer for being non-NULL instead of passing another argument through two layers of function calls. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 3 --- fs/xfs/xfs_ialloc.h | 2 -- fs/xfs/xfs_inode.c | 5 ++--- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_utils.c | 17 +++++++---------- 5 files changed, 10 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index a124b9f88aae..2b70952c9d8c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -895,7 +895,6 @@ xfs_dialloc( umode_t mode, int okalloc, struct xfs_buf **IO_agbp, - boolean_t *alloc_done, xfs_ino_t *inop) { struct xfs_buf *agbp; @@ -955,7 +954,6 @@ xfs_dialloc( * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end. */ - *alloc_done = B_FALSE; while (!agi->agi_freecount) { /* * Don't do anything if we're not supposed to allocate @@ -982,7 +980,6 @@ xfs_dialloc( * us again where we left off. */ ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - *alloc_done = B_TRUE; *IO_agbp = agbp; *inop = NULLFSINO; return 0; diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 65ac57c8063c..1fd6ea4e9c91 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -75,8 +75,6 @@ xfs_dialloc( umode_t mode, /* mode bits for new inode */ int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ - boolean_t *alloc_done, /* an allocation was done to replenish - the free inodes */ xfs_ino_t *inop); /* inode number allocated */ /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d48e406de078..5c10825f2f80 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -887,7 +887,6 @@ xfs_ialloc( prid_t prid, int okalloc, xfs_buf_t **ialloc_context, - boolean_t *call_again, xfs_inode_t **ipp) { xfs_ino_t ino; @@ -902,10 +901,10 @@ xfs_ialloc( * the on-disk inode to be allocated. */ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, - ialloc_context, call_again, &ino); + ialloc_context, &ino); if (error) return error; - if (*call_again || ino == NULLFSINO) { + if (*ialloc_context || ino == NULLFSINO) { *ipp = NULL; return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c2e2da3abae2..04d2fe421b97 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -517,7 +517,7 @@ void xfs_inode_free(struct xfs_inode *ip); */ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, - struct xfs_buf **, boolean_t *, xfs_inode_t **); + struct xfs_buf **, xfs_inode_t **); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 4e5b9ad5cb97..0025c78ac03c 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -65,7 +65,6 @@ xfs_dir_ialloc( xfs_trans_t *ntp; xfs_inode_t *ip; xfs_buf_t *ialloc_context = NULL; - boolean_t call_again = B_FALSE; int code; uint log_res; uint log_count; @@ -91,7 +90,7 @@ xfs_dir_ialloc( * the inode(s) that we've just allocated. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, - &ialloc_context, &call_again, &ip); + &ialloc_context, &ip); /* * Return an error if we were unable to allocate a new inode. @@ -102,19 +101,18 @@ xfs_dir_ialloc( *ipp = NULL; return code; } - if (!call_again && (ip == NULL)) { + if (!ialloc_context && !ip) { *ipp = NULL; return XFS_ERROR(ENOSPC); } /* - * If call_again is set, then we were unable to get an + * If the AGI buffer is non-NULL, then we were unable to get an * inode in one operation. We need to commit the current * transaction and call xfs_ialloc() again. It is guaranteed * to succeed the second time. */ - if (call_again) { - + if (ialloc_context) { /* * Normally, xfs_trans_commit releases all the locks. * We call bhold to hang on to the ialloc_context across @@ -195,7 +193,7 @@ xfs_dir_ialloc( * this call should always succeed. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - okalloc, &ialloc_context, &call_again, &ip); + okalloc, &ialloc_context, &ip); /* * If we get an error at this point, return to the caller @@ -206,12 +204,11 @@ xfs_dir_ialloc( *ipp = NULL; return code; } - ASSERT ((!call_again) && (ip != NULL)); + ASSERT(!ialloc_context && ip); } else { - if (committed != NULL) { + if (committed != NULL) *committed = 0; - } } *ipp = ip; -- cgit v1.2.3 From 4bb61069d2019dea2a7e4e0f4432101f03a9b820 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:48 -0400 Subject: xfs: add a short cut to xfs_dialloc for the non-NULL agbp case In this case we already have selected an AG and know it has free space beause the buffer lock never got released. Jump directly into xfs_dialloc_ag and short cut the AG selection loop. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 2b70952c9d8c..7aa8a02b7937 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -634,6 +634,10 @@ xfs_dialloc_ag( pag = xfs_perag_get(mp, agno); + ASSERT(pag->pagi_init); + ASSERT(pag->pagi_inodeok); + ASSERT(pag->pagi_freecount > 0); + restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); /* @@ -907,32 +911,32 @@ xfs_dialloc( xfs_agnumber_t tagno; struct xfs_perag *pag; - if (*IO_agbp == NULL) { - /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. - */ - agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + if (*IO_agbp) { /* - * Couldn't find an allocation group satisfying the - * criteria, give up. - */ - if (!agbp) { - *inop = NULLFSINO; - return 0; - } - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - } else { - /* - * Continue where we left off before. In this case, we + * If the caller passes in a pointer to the AGI buffer, + * continue where we left off before. In this case, we * know that the allocation group has free inodes. */ agbp = *IO_agbp; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); + goto out_alloc; } + + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + + /* + * Couldn't find an allocation group satisfying the + * criteria, give up. + */ + if (!agbp) { + *inop = NULLFSINO; + return 0; + } + agi = XFS_BUF_TO_AGI(agbp); + mp = tp->t_mountp; agno = be32_to_cpu(agi->agi_seqno); tagno = agno; @@ -1012,6 +1016,7 @@ nextag: ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); } +out_alloc: *IO_agbp = NULL; return xfs_dialloc_ag(tp, agbp, parent, inop); } -- cgit v1.2.3 From 55d6af64cb8bf8c7e9a84b254d2c3479be8c067c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:49 -0400 Subject: xfs: refactor xfs_ialloc_ag_select Loop over the in-core perag structures and prefer using pagi_freecount over going out to the AGI buffer where possible. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 95 +++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 7aa8a02b7937..ecb9f22a7f35 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -442,14 +442,13 @@ xfs_ialloc_next_ag( * Select an allocation group to look for a free inode in, based on the parent * inode and then mode. Return the allocation group buffer. */ -STATIC xfs_buf_t * /* allocation group buffer */ +STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ umode_t mode, /* bits set to indicate file type */ int okalloc) /* ok to allocate more space */ { - xfs_buf_t *agbp; /* allocation group header buffer */ xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ int flags; /* alloc buffer locking flags */ @@ -459,6 +458,7 @@ xfs_ialloc_ag_select( int needspace; /* file mode implies space allocated */ xfs_perag_t *pag; /* per allocation group data */ xfs_agnumber_t pagno; /* parent (starting) ag number */ + int error; /* * Files of these types need at least one block if length > 0 @@ -474,7 +474,9 @@ xfs_ialloc_ag_select( if (pagno >= agcount) pagno = 0; } + ASSERT(pagno < agcount); + /* * Loop through allocation groups, looking for one with a little * free space in it. Note we don't look for free inodes, exactly. @@ -486,51 +488,45 @@ xfs_ialloc_ag_select( flags = XFS_ALLOC_FLAG_TRYLOCK; for (;;) { pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + if (!pag->pagi_init) { - if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) goto nextag; - } - } else - agbp = NULL; + } - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto unlock_nextag; + if (pag->pagi_freecount) { + xfs_perag_put(pag); + return agno; } - /* - * Is there enough free space for the file plus a block - * of inodes (if we need to allocate some)? - */ - ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); - if (ineed && !pag->pagf_init) { - if (agbp == NULL && - xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; + if (!okalloc) + goto nextag; + + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, agno, flags); + if (error) goto nextag; - } - (void)xfs_alloc_pagf_init(mp, tp, agno, flags); } - if (!ineed || pag->pagf_init) { - if (ineed && !(longest = pag->pagf_longest)) - longest = pag->pagf_flcount > 0; - if (!ineed || - (pag->pagf_freeblks >= needspace + ineed && - longest >= ineed && - okalloc)) { - if (agbp == NULL && - xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; - goto nextag; - } - xfs_perag_put(pag); - return agbp; - } + + /* + * Is there enough free space for the file plus a block of + * inodes? (if we need to allocate some)? + */ + ineed = XFS_IALLOC_BLOCKS(mp); + longest = pag->pagf_longest; + if (!longest) + longest = pag->pagf_flcount > 0; + + if (pag->pagf_freeblks >= needspace + ineed && + longest >= ineed) { + xfs_perag_put(pag); + return agno; } -unlock_nextag: - if (agbp) - xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); /* @@ -538,13 +534,13 @@ nextag: * down. */ if (XFS_FORCED_SHUTDOWN(mp)) - return NULL; + return NULLAGNUMBER; agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { if (flags == 0) - return NULL; + return NULLAGNUMBER; flags = 0; } } @@ -901,13 +897,13 @@ xfs_dialloc( struct xfs_buf **IO_agbp, xfs_ino_t *inop) { + struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; xfs_agnumber_t agno; struct xfs_agi *agi; int error; int ialloced; int noroom = 0; - struct xfs_mount *mp; xfs_agnumber_t tagno; struct xfs_perag *pag; @@ -925,20 +921,17 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); - - /* - * Couldn't find an allocation group satisfying the - * criteria, give up. - */ - if (!agbp) { + agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + if (agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; } + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + return XFS_ERROR(error); agi = XFS_BUF_TO_AGI(agbp); - mp = tp->t_mountp; - agno = be32_to_cpu(agi->agi_seqno); tagno = agno; /* -- cgit v1.2.3 From be60fe54b216a62403b816d3930a66ad7c51cbc6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 10:54:50 -0400 Subject: xfs: do not read the AGI buffer in xfs_dialloc until nessecary Refactor the AG selection loop in xfs_dialloc to operate on the in-memory perag data as much as possible. We only read the AGI buffer once we have selected an AG to allocate inodes now instead of for every AG considered. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 127 ++++++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index ecb9f22a7f35..21e37b55f7e5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -900,11 +900,10 @@ xfs_dialloc( struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; xfs_agnumber_t agno; - struct xfs_agi *agi; int error; int ialloced; int noroom = 0; - xfs_agnumber_t tagno; + xfs_agnumber_t start_agno; struct xfs_perag *pag; if (*IO_agbp) { @@ -921,25 +920,17 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); - if (agno == NULLAGNUMBER) { + start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + if (start_agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; } - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) - return XFS_ERROR(error); - agi = XFS_BUF_TO_AGI(agbp); - - tagno = agno; - /* * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. */ - if (mp->m_maxicount && mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { noroom = 1; @@ -951,67 +942,87 @@ xfs_dialloc( * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end. */ - while (!agi->agi_freecount) { - /* - * Don't do anything if we're not supposed to allocate - * any blocks, just go on to the next ag. - */ - if (okalloc) { - /* - * Try to allocate some new inodes in the allocation - * group. - */ - if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { - xfs_trans_brelse(tp, agbp); - if (error == ENOSPC) { - *inop = NULLFSINO; - return 0; - } else - return error; - } - if (ialloced) { - /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. - */ - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; - } + agno = start_agno; + for (;;) { + pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + + if (!pag->pagi_init) { + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) + goto out_error; } + /* - * If it failed, give up on this ag. + * Do a first racy fast path check if this AG is usable. */ - xfs_trans_brelse(tp, agbp); + if (!pag->pagi_freecount && !okalloc) + goto nextag; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + goto out_error; + /* - * Go on to the next ag: get its ag header. + * Once the AGI has been read in we have to recheck + * pagi_freecount with the AGI buffer lock held. */ -nextag: - if (++tagno == mp->m_sb.sb_agcount) - tagno = 0; - if (tagno == agno) { + if (pag->pagi_freecount) { + xfs_perag_put(pag); + goto out_alloc; + } + + if (!okalloc) { + xfs_trans_brelse(tp, agbp); + goto nextag; + } + + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); + if (error) { + xfs_trans_brelse(tp, agbp); + + if (error != ENOSPC) + goto out_error; + + xfs_perag_put(pag); *inop = NULLFSINO; - return noroom ? ENOSPC : 0; + return 0; } - pag = xfs_perag_get(mp, tagno); - if (pag->pagi_inodeok == 0) { + + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(pag->pagi_freecount > 0); xfs_perag_put(pag); - goto nextag; + + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; } - error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + +nextag: xfs_perag_put(pag); - if (error) - goto nextag; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); + if (++agno == mp->m_sb.sb_agcount) + agno = 0; + if (agno == start_agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } } out_alloc: *IO_agbp = NULL; return xfs_dialloc_ag(tp, agbp, parent, inop); +out_error: + xfs_perag_put(pag); + return XFS_ERROR(error); } /* -- cgit v1.2.3 From b373e98daa70d7ddb10f53f81e711c4d17651795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 11:13:29 -0400 Subject: xfs: clean up xfs_inactive The code to reserve log space and join the inode to the transaction is common for all cases, so don't duplicate it. Also remove the trivial xfs_inactive_symlink_local helper which can simply be opencode now. Signed-off-by: Christoph Hellwig Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 171 +++++++++++++------------------------------------- 1 file changed, 43 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c22f4e0ecac1..f9a515776a9c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -282,23 +282,15 @@ xfs_inactive_symlink_rmt( * free them all in one bunmapi call. */ ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); - if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); - *tpp = NULL; - return error; - } + /* * Lock the inode, fix the size, and join it to the transaction. * Hold it so in the normal path, we still have it locked for * the second transaction. In the error paths we need it * held so the cancel won't rele it, see below. */ - xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); size = (int)ip->i_d.di_size; ip->i_d.di_size = 0; - xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Find the block(s) so we can inval and unmap them. @@ -385,67 +377,15 @@ xfs_inactive_symlink_rmt( ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error0; } - /* - * Return with the inode locked but not joined to the transaction. - */ + + xfs_trans_ijoin(tp, ip, 0); *tpp = tp; return 0; error1: xfs_bmap_cancel(&free_list); error0: - /* - * Have to come here with the inode locked and either - * (held and in the transaction) or (not in the transaction). - * If the inode isn't held then cancel would iput it, but - * that's wrong since this is inactive and the vnode ref - * count is 0 already. - * Cancel won't do anything to the inode if held, but it still - * needs to be locked until the cancel is done, if it was - * joined to the transaction. - */ - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - *tpp = NULL; return error; - -} - -STATIC int -xfs_inactive_symlink_local( - xfs_inode_t *ip, - xfs_trans_t **tpp) -{ - int error; - - ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); - /* - * We're freeing a symlink which fit into - * the inode. Just free the memory used - * to hold the old symlink. - */ - error = xfs_trans_reserve(*tpp, 0, - XFS_ITRUNCATE_LOG_RES(ip->i_mount), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - - if (error) { - xfs_trans_cancel(*tpp, 0); - *tpp = NULL; - return error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - /* - * Zero length symlinks _can_ exist. - */ - if (ip->i_df.if_bytes > 0) { - xfs_idata_realloc(ip, - -(ip->i_df.if_bytes), - XFS_DATA_FORK); - ASSERT(ip->i_df.if_bytes == 0); - } - return 0; } STATIC int @@ -604,7 +544,7 @@ xfs_inactive( xfs_trans_t *tp; xfs_mount_t *mp; int error; - int truncate; + int truncate = 0; /* * If the inode is already free, then there can be nothing @@ -616,17 +556,6 @@ xfs_inactive( return VN_INACTIVE_CACHE; } - /* - * Only do a truncate if it's a regular file with - * some actual space in it. It's OK to look at the - * inode's fields without the lock because we're the - * only one with a reference to the inode. - */ - truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 || - (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && - S_ISREG(ip->i_d.di_mode)); - mp = ip->i_mount; error = 0; @@ -650,72 +579,54 @@ xfs_inactive( goto out; } - ASSERT(ip->i_d.di_nlink == 0); + if (S_ISREG(ip->i_d.di_mode) && + (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || + ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) + truncate = 1; error = xfs_qm_dqattach(ip, 0); if (error) return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - if (truncate) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - error = xfs_trans_reserve(tp, 0, - XFS_ITRUNCATE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) { - /* Don't call itruncate_cleanup */ - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return VN_INACTIVE_CACHE; - } + error = xfs_trans_reserve(tp, 0, + (truncate || S_ISLNK(ip->i_d.di_mode)) ? + XFS_ITRUNCATE_LOG_RES(mp) : + XFS_IFREE_LOG_RES(mp), + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, 0); + return VN_INACTIVE_CACHE; + } - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); + xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + if (S_ISLNK(ip->i_d.di_mode)) { + /* + * Zero length symlinks _can_ exist. + */ + if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { + error = xfs_inactive_symlink_rmt(ip, &tp); + if (error) + goto out_cancel; + } else if (ip->i_df.if_bytes > 0) { + xfs_idata_realloc(ip, -(ip->i_df.if_bytes), + XFS_DATA_FORK); + ASSERT(ip->i_df.if_bytes == 0); + } + } else if (truncate) { ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); - if (error) { - xfs_trans_cancel(tp, - XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - return VN_INACTIVE_CACHE; - } + if (error) + goto out_cancel; ASSERT(ip->i_d.di_nextents == 0); - } else if (S_ISLNK(ip->i_d.di_mode)) { - - /* - * If we get an error while cleaning up a - * symlink we bail out. - */ - error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? - xfs_inactive_symlink_rmt(ip, &tp) : - xfs_inactive_symlink_local(ip, &tp); - - if (error) { - ASSERT(tp == NULL); - return VN_INACTIVE_CACHE; - } - - xfs_trans_ijoin(tp, ip, 0); - } else { - error = xfs_trans_reserve(tp, 0, - XFS_IFREE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_INACTIVE_LOG_COUNT); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); - return VN_INACTIVE_CACHE; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); } /* @@ -781,7 +692,11 @@ xfs_inactive( xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - out: +out: + return VN_INACTIVE_CACHE; +out_cancel: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return VN_INACTIVE_CACHE; } -- cgit v1.2.3 From fe67be036ff2f713b1c5f24dd4cdffae75bcb97a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 11:13:30 -0400 Subject: xfs: remove xfs_inactive_attrs Remove this helper as the code flow is a lot more obvious when it gets merged into its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 97 +++++++++++++++++++-------------------------------- 1 file changed, 36 insertions(+), 61 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f9a515776a9c..9a2ae8c0ecc4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -388,54 +388,6 @@ xfs_inactive_symlink_rmt( return error; } -STATIC int -xfs_inactive_attrs( - xfs_inode_t *ip, - xfs_trans_t **tpp) -{ - xfs_trans_t *tp; - int error; - xfs_mount_t *mp; - - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - tp = *tpp; - mp = ip->i_mount; - ASSERT(ip->i_d.di_forkoff != 0); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - goto error_unlock; - - error = xfs_attr_inactive(ip); - if (error) - goto error_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, 0, - XFS_IFREE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_INACTIVE_LOG_COUNT); - if (error) - goto error_cancel; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - - ASSERT(ip->i_d.di_anextents == 0); - - *tpp = tp; - return 0; - -error_cancel: - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); -error_unlock: - *tpp = NULL; - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; -} - int xfs_release( xfs_inode_t *ip) @@ -630,24 +582,40 @@ xfs_inactive( } /* - * If there are attributes associated with the file - * then blow them away now. The code calls a routine - * that recursively deconstructs the attribute fork. - * We need to just commit the current transaction + * If there are attributes associated with the file then blow them away + * now. The code calls a routine that recursively deconstructs the + * attribute fork. We need to just commit the current transaction * because we can't use it for xfs_attr_inactive(). */ if (ip->i_d.di_anextents > 0) { - error = xfs_inactive_attrs(ip, &tp); - /* - * If we got an error, the transaction is already - * cancelled, and the inode is unlocked. Just get out. - */ - if (error) - return VN_INACTIVE_CACHE; - } else if (ip->i_afp) { - xfs_idestroy_fork(ip, XFS_ATTR_FORK); + ASSERT(ip->i_d.di_forkoff != 0); + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + goto error_unlock; + + error = xfs_attr_inactive(ip); + if (error) + goto error_unlock; + + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + error = xfs_trans_reserve(tp, 0, + XFS_IFREE_LOG_RES(mp), + 0, XFS_TRANS_PERM_LOG_RES, + XFS_INACTIVE_LOG_COUNT); + if (error) + goto error_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); } + if (ip->i_afp) + xfs_idestroy_fork(ip, XFS_ATTR_FORK); + + ASSERT(ip->i_d.di_anextents == 0); + /* * Free the inode. */ @@ -698,6 +666,13 @@ out_cancel: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return VN_INACTIVE_CACHE; + +error_cancel: + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, 0); +error_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; } /* -- cgit v1.2.3 From 0b56185b0d64ef89dad1c85bb7403fa762cbe50d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 11:13:31 -0400 Subject: xfs: do not take the iolock in xfs_inactive An inode that enters xfs_inactive has been removed from all global lists but the inode hash, and can't be recycled in xfs_iget before it has been marked reclaimable. Thus taking the iolock in here is not nessecary at all, and given the amount of lockdep false positives it has triggered already I'd rather remove the locking. The only change outside of xfs_inactive is relaxing an assert in xfs_itruncate_extents. Signed-off-by: Christoph Hellwig Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 4 +++- fs/xfs/xfs_vnodeops.c | 29 ++++++++++++----------------- 2 files changed, 15 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5c10825f2f80..2778258fcfa2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1123,7 +1123,9 @@ xfs_itruncate_extents( int error = 0; int done = 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(!atomic_read(&VFS_I(ip)->i_count) || + xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9a2ae8c0ecc4..79270430dafc 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -554,7 +554,7 @@ xfs_inactive( return VN_INACTIVE_CACHE; } - xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); if (S_ISLNK(ip->i_d.di_mode)) { @@ -591,21 +591,24 @@ xfs_inactive( ASSERT(ip->i_d.di_forkoff != 0); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) - goto error_unlock; + goto out_unlock; + + xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_attr_inactive(ip); if (error) - goto error_unlock; + goto out; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); error = xfs_trans_reserve(tp, 0, XFS_IFREE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_INACTIVE_LOG_COUNT); - if (error) - goto error_cancel; + if (error) { + xfs_trans_cancel(tp, 0); + goto out; + } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -658,21 +661,13 @@ xfs_inactive( * Release the dquots held by inode, if any. */ xfs_qm_dqdetach(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); out: return VN_INACTIVE_CACHE; out_cancel: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - return VN_INACTIVE_CACHE; - -error_cancel: - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); -error_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return VN_INACTIVE_CACHE; + goto out_unlock; } /* -- cgit v1.2.3 From 5a15322da1a51ad8f3af1962de355885b6c606f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 11:13:32 -0400 Subject: xfs: avoid the iolock in xfs_free_eofblocks for evicted inodes Same rational as the last patch - these inodes are not reachable, so don't bother with locking. Signed-off-by: Christoph Hellwig Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 79270430dafc..2a5c637344b4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -145,11 +145,6 @@ xfs_readlink( return error; } -/* - * Flags for xfs_free_eofblocks - */ -#define XFS_FREE_EOF_TRYLOCK (1<<0) - /* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when @@ -159,7 +154,7 @@ STATIC int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, - int flags) + bool need_iolock) { xfs_trans_t *tp; int error; @@ -201,13 +196,11 @@ xfs_free_eofblocks( */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - if (flags & XFS_FREE_EOF_TRYLOCK) { + if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); return 0; } - } else { - xfs_ilock(ip, XFS_IOLOCK_EXCL); } error = xfs_trans_reserve(tp, 0, @@ -217,7 +210,8 @@ xfs_free_eofblocks( if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -244,7 +238,10 @@ xfs_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; } @@ -466,8 +463,7 @@ xfs_release( if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) return 0; - error = xfs_free_eofblocks(mp, ip, - XFS_FREE_EOF_TRYLOCK); + error = xfs_free_eofblocks(mp, ip, true); if (error) return error; @@ -524,7 +520,7 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || ip->i_delayed_blks != 0))) { - error = xfs_free_eofblocks(mp, ip, 0); + error = xfs_free_eofblocks(mp, ip, false); if (error) return VN_INACTIVE_CACHE; } -- cgit v1.2.3 From 4f59af758f9092bc7b266ca919ce6067170e5172 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Jul 2012 11:13:33 -0400 Subject: xfs: remove iolock lock classes Content-Disposition: inline; filename=xfs-remove-iolock-classes Now that we never take the iolock during inode reclaim we don't need to play games with lock classes. Signed-off-by: Christoph Hellwig Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_iget.c | 15 --------------- fs/xfs/xfs_inode.h | 2 -- fs/xfs/xfs_super.c | 18 ++---------------- 3 files changed, 2 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 1bb4365e8c25..784a803383ec 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -40,17 +40,6 @@ #include "xfs_trace.h" -/* - * Define xfs inode iolock lockdep classes. We need to ensure that all active - * inodes are considered the same for lockdep purposes, including inodes that - * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to - * guarantee the locks are considered the same when there are multiple lock - * initialisation siteѕ. Also, define a reclaimable inode class so it is - * obvious in lockdep reports which class the report is against. - */ -static struct lock_class_key xfs_iolock_active; -struct lock_class_key xfs_iolock_reclaimable; - /* * Allocate and initialise an xfs_inode. */ @@ -80,8 +69,6 @@ xfs_inode_alloc( ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_active, "xfs_iolock_active"); /* initialise the xfs inode */ ip->i_ino = ino; @@ -250,8 +237,6 @@ xfs_iget_cache_hit( ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_active, "xfs_iolock_active"); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 04d2fe421b97..94b32f906e79 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -487,8 +487,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) -extern struct lock_class_key xfs_iolock_reclaimable; - /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index cb2deb13b063..bdaf4cb9f4a2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -874,6 +874,8 @@ xfs_fs_evict_inode( { xfs_inode_t *ip = XFS_I(inode); + ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); + trace_xfs_evict_inode(ip); truncate_inode_pages(&inode->i_data, 0); @@ -882,22 +884,6 @@ xfs_fs_evict_inode( XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); - /* - * The iolock is used by the file system to coordinate reads, - * writes, and block truncates. Up to this point the lock - * protected concurrent accesses by users of the inode. But - * from here forward we're doing some final processing of the - * inode because we're done with it, and although we reuse the - * iolock for protection it is really a distinct lock class - * (in the lockdep sense) from before. To keep lockdep happy - * (and basically indicate what we are doing), we explicitly - * re-init the iolock here. - */ - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); - xfs_inactive(ip); } -- cgit v1.2.3 From 8375f922aaa6e7a880022529202fb486315568c3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 28 Jun 2012 06:52:56 -0400 Subject: xfs: re-enable xfsaild idle mode and fix associated races xfsaild idle mode logic currently leads to a couple hangs: 1.) If xfsaild is rescheduled in during an incremental scan (i.e., tout != 0) and the target has been updated since the previous run, we can hit the new target and go into idle mode with a still populated ail. 2.) A wake up is only issued when the target is pushed forward. The wake up can race with xfsaild if it is currently in the process of entering idle mode, causing future wake up events to be lost. These hangs have been reproduced and verified as fixed by running xfstests 273 in a loop on a slightly modified upstream kernel. The kernel is modified to re-enable idle mode as previously implemented (when count == 0) and with a revert of commit 670ce93f, which includes performance improvements that make this harder to reproduce. The solution, the algorithm for which has been outlined by Dave Chinner, is to modify xfsaild to enter idle mode only when the ail is empty and the push target has not been moved forward since the last push. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_trans_ail.c | 35 ++++++++++++++++++++++++++++++++--- fs/xfs/xfs_trans_priv.h | 1 + 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 9c514483e599..6011ee661339 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -383,6 +383,12 @@ xfsaild_push( } spin_lock(&ailp->xa_lock); + + /* barrier matches the xa_target update in xfs_ail_push() */ + smp_rmb(); + target = ailp->xa_target; + ailp->xa_target_prev = target; + lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); if (!lip) { /* @@ -397,7 +403,6 @@ xfsaild_push( XFS_STATS_INC(xs_push_ail); lsn = lip->li_lsn; - target = ailp->xa_target; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; @@ -527,8 +532,32 @@ xfsaild( __set_current_state(TASK_KILLABLE); else __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(tout ? - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); + + spin_lock(&ailp->xa_lock); + + /* + * Idle if the AIL is empty and we are not racing with a target + * update. We check the AIL after we set the task to a sleep + * state to guarantee that we either catch an xa_target update + * or that a wake_up resets the state to TASK_RUNNING. + * Otherwise, we run the risk of sleeping indefinitely. + * + * The barrier matches the xa_target update in xfs_ail_push(). + */ + smp_rmb(); + if (!xfs_ail_min(ailp) && + ailp->xa_target == ailp->xa_target_prev) { + spin_unlock(&ailp->xa_lock); + schedule(); + tout = 0; + continue; + } + spin_unlock(&ailp->xa_lock); + + if (tout) + schedule_timeout(msecs_to_jiffies(tout)); + + __set_current_state(TASK_RUNNING); try_to_freeze(); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index fb62377d1cbc..53b7c9b0f8f7 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -67,6 +67,7 @@ struct xfs_ail { struct task_struct *xa_task; struct list_head xa_ail; xfs_lsn_t xa_target; + xfs_lsn_t xa_target_prev; struct list_head xa_cursors; spinlock_t xa_lock; xfs_lsn_t xa_last_pushed_lsn; -- cgit v1.2.3 From 9a57fa8ee7c29e11c2a29ce058573ba99157eda7 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 24 Jul 2012 10:59:19 -0500 Subject: xfs: wait for the write the superblock on unmount v2: Add the xfs_buf_lock to xfs_quiesce_attr(). Add explaination why xfs_buf_lock() is used to wait for write. xfs_wait_buftarg() does not wait for the completion of the write of the uncached superblock. This write can race with the shutdown of the log and causes a panic if the write does not win the race. During the log write, xfsaild_push() will lock the buffer and set the XBF_ASYNC flag. Because the XBF_FLAG is set, complete() is not performed on the buffer's iowait entry, we cannot call xfs_buf_iowait() to wait for the write to complete. The buffer's lock is held until the write is complete, so we can block on a xfs_buf_lock() request to be notified that the write is complete. Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_mount.c | 9 +++++++++ fs/xfs/xfs_sync.c | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9536fd190191..711ca51ca3d7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1529,6 +1529,15 @@ xfs_unmountfs( xfs_ail_push_all_sync(mp->m_ail); xfs_wait_buftarg(mp->m_ddev_targp); + /* + * The superblock buffer is uncached and xfsaild_push() will lock and + * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait() + * here but a lock on the superblock buffer will block until iodone() + * has completed. + */ + xfs_buf_lock(mp->m_sb_bp); + xfs_buf_unlock(mp->m_sb_bp); + xfs_log_unmount_write(mp); xfs_log_unmount(mp); xfs_uuid_unmount(mp); diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index e61fc1519073..97304f10e78a 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -359,6 +359,15 @@ xfs_quiesce_attr( * added an item to the AIL, thus flush it again. */ xfs_ail_push_all_sync(mp->m_ail); + + /* + * The superblock buffer is uncached and xfsaild_push() will lock and + * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait() + * here but a lock on the superblock buffer will block until iodone() + * has completed. + */ + xfs_buf_lock(mp->m_sb_bp); + xfs_buf_unlock(mp->m_sb_bp); } static void -- cgit v1.2.3 From f8310c59201b183ebee2e3fe0c7242f5729be0af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Jul 2012 11:50:30 +0400 Subject: fix O_EXCL handling for devices O_EXCL without O_CREAT has different semantics; it's "fail if already opened", not "fail if already exists". commit 71574865 broke that... Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 618d3531cf9f..e133bf3bbb03 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2418,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); - if (open_flag & O_EXCL) { + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { open_flag &= ~O_TRUNC; *opened |= FILE_CREATED; } @@ -2742,7 +2742,7 @@ retry_lookup: } error = -EEXIST; - if (open_flag & O_EXCL) + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) goto exit_dput; error = follow_managed(path, nd->flags); -- cgit v1.2.3 From 8842b3be96c376f174ae0d4f282d14728ad5febf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 7 Jun 2012 13:43:35 -0700 Subject: ceph: clean up useless d_parent checks d_parent is never NULL, and IS_ROOT() is the proper way to check for a (non-self-referential) parent. Reported-by: Al Viro Signed-off-by: Sage Weil --- fs/ceph/dir.c | 7 +++---- fs/ceph/mds_client.c | 11 ----------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3e8094be4604..6a66bd2d4da0 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -51,8 +51,7 @@ int ceph_init_dentry(struct dentry *dentry) goto out_unlock; } - if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ - ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) d_set_d_op(dentry, &ceph_snapdir_dentry_ops); @@ -79,7 +78,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) return NULL; spin_lock(&dentry->d_lock); - if (dentry->d_parent) { + if (!IS_ROOT(dentry)) { inode = dentry->d_parent->d_inode; ihold(inode); } @@ -1140,7 +1139,7 @@ static void ceph_d_prune(struct dentry *dentry) dout("ceph_d_prune %p\n", dentry); /* do we have a valid parent? */ - if (!dentry->d_parent || IS_ROOT(dentry)) + if (IS_ROOT(dentry)) return; /* if we are not hashed, we don't affect D_COMPLETE */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5ac6434185ae..418f6a82c90d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1469,11 +1469,6 @@ retry: else len += 1 + temp->d_name.len; temp = temp->d_parent; - if (temp == NULL) { - rcu_read_unlock(); - pr_err("build_path corrupt dentry %p\n", dentry); - return ERR_PTR(-EINVAL); - } } rcu_read_unlock(); if (len) @@ -1510,12 +1505,6 @@ retry: if (pos) path[--pos] = '/'; temp = temp->d_parent; - if (temp == NULL) { - rcu_read_unlock(); - pr_err("build_path corrupt dentry\n"); - kfree(path); - return ERR_PTR(-EINVAL); - } } rcu_read_unlock(); if (pos != 0 || read_seqretry(&rename_lock, seq)) { -- cgit v1.2.3 From 64894cf843278c7b2653a6fac2cd1a697ff930dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Jul 2012 00:53:35 +0400 Subject: simplify lookup_open()/atomic_open() - do the temporary mnt_want_write() early The write ref to vfsmount taken in lookup_open()/atomic_open() is going to be dropped; we take the one to stay in dentry_open(). Just grab the temporary in caller if it looks like we are going to need it (create/truncate/writable open) and pass (by value) "has it succeeded" flag. Instead of doing mnt_want_write() inside, check that flag and treat "false" as "mnt_want_write() has just failed". mnt_want_write() is cheap and the things get considerably simpler and more robust that way - we get it and drop it in the same function, to start with, rather than passing a "has something in the guts of really scary functions taken it" back to caller. Signed-off-by: Al Viro --- fs/namei.c | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index e133bf3bbb03..35291ac6f42b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2395,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, bool need_lookup, + bool got_write, bool need_lookup, int *opened) { struct inode *dir = nd->path.dentry->d_inode; @@ -2432,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ - if ((open_flag & (O_CREAT | O_TRUNC)) || - (open_flag & O_ACCMODE) != O_RDONLY) { - error = mnt_want_write(nd->path.mnt); - if (!error) { - *want_write = true; - } else if (!(open_flag & O_CREAT)) { + if (((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) { + if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall * back to lookup + open @@ -2445,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto no_open; } else if (open_flag & (O_EXCL | O_TRUNC)) { /* Fall back and fail with the right error */ - create_error = error; + create_error = -EROFS; goto no_open; } else { /* No side effects, safe to clear O_CREAT */ - create_error = error; + create_error = -EROFS; open_flag &= ~O_CREAT; } } @@ -2556,7 +2553,7 @@ looked_up: static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, int *opened) + bool got_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2574,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, file, op, want_write, + return atomic_open(nd, dentry, path, file, op, got_write, need_lookup, opened); } @@ -2598,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path, * a permanent write count is taken through * the 'struct file' in finish_open(). */ - error = mnt_want_write(nd->path.mnt); - if (error) + if (!got_write) { + error = -EROFS; goto out_dput; - *want_write = true; + } *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2631,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path, struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; - bool want_write = false; + bool got_write = false; int acc_mode = op->acc_mode; struct inode *inode; bool symlink_ok = false; @@ -2700,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path, } retry_lookup: + if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { + error = mnt_want_write(nd->path.mnt); + if (!error) + got_write = true; + /* + * do _not_ fail yet - we might not need that or fail with + * a different error; let lookup_open() decide; we'll be + * dropping this one anyway. + */ + } mutex_lock(&dir->d_inode->i_mutex); - error = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, got_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (error <= 0) { @@ -2736,9 +2743,9 @@ retry_lookup: * possible mount and symlink following (this might be optimized away if * necessary...) */ - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } error = -EEXIST; @@ -2803,7 +2810,7 @@ finish_open: error = mnt_want_write(nd->path.mnt); if (error) goto out; - want_write = true; + got_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2830,7 +2837,7 @@ opened: goto exit_fput; } out: - if (want_write) + if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); @@ -2854,9 +2861,9 @@ stale_open: nd->inode = dir->d_inode; save_parent.mnt = NULL; save_parent.dentry = NULL; - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } retried = true; goto retry_lookup; -- cgit v1.2.3 From 5e8830dc85d0a6258132977381430b327cf553f2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:23 +0200 Subject: fs: Push file_update_time() into __block_page_mkwrite() Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index c7062c896d7c..d5ec360e332d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, loff_t size; int ret; + /* + * Update file times before taking page lock. We may end up failing the + * fault so this update may be superfluous but who really cares... + */ + file_update_time(vma->vm_file); + lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || -- cgit v1.2.3 From 3ca9c3bd8a55956bee291cda5b224f737b0d0cfe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:24 +0200 Subject: ceph: Push file_update_time() into ceph_page_mkwrite() CC: Sage Weil CC: ceph-devel@vger.kernel.org Acked-by: Sage Weil Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ceph/addr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8b67304e4b80..452e71a1b753 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size, len; int ret; + /* Update time before taking page lock */ + file_update_time(vma->vm_file); + size = i_size_read(inode); if (off + PAGE_CACHE_SIZE <= size) len = PAGE_CACHE_SIZE; -- cgit v1.2.3 From 120c2bcad80c0f7c6691ad70f7fb1e709854d725 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:25 +0200 Subject: 9p: Push file_update_time() into v9fs_vm_page_mkwrite() CC: Eric Van Hensbergen CC: Ron Minnich CC: Latchesar Ionkov CC: v9fs-developer@lists.sourceforge.net Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/9p/vfs_file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fc06fd27065e..dd6f7ee1e312 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", page, (unsigned long)filp->private_data); + /* Update file times before taking page lock */ + file_update_time(filp); + v9inode = V9FS_I(inode); /* make sure the cache has finished storing the page */ v9fs_fscache_wait_on_page_write(inode, page); -- cgit v1.2.3 From a63e9b2e76632b3ccb0f33f43484c0c64d601849 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:26 +0200 Subject: gfs2: Push file_update_time() into gfs2_page_mkwrite() CC: Steven Whitehouse CC: cluster-devel@redhat.com Acked-by: Steven Whitehouse Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/gfs2/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f6efc1..07959150d44e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,6 +376,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + /* Update file times before taking page lock */ + file_update_time(vma->vm_file); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) -- cgit v1.2.3 From 14ae417c6faf28b6e8ec60cc2aa0eaa19453a41c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:27 +0200 Subject: sysfs: Push file_update_time() into bin_page_mkwrite() CC: Greg Kroah-Hartman Acked-by: Greg Kroah-Hartman Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sysfs/bin.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index a4759833d62d..614b2b544880 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = 0; if (bb->vm_ops->page_mkwrite) ret = bb->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); sysfs_put_active(attr_sd); return ret; -- cgit v1.2.3 From c30dabfe5d10c5fd70d882e5afb8f59f2942b194 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:30 +0200 Subject: fs: Push mnt_want_write() outside of i_mutex Currently, mnt_want_write() is sometimes called with i_mutex held and sometimes without it. This isn't really a problem because mnt_want_write() is a non-blocking operation (essentially has a trylock semantics) but when the function starts to handle also frozen filesystems, it will get a full lock semantics and thus proper lock ordering has to be established. So move all mnt_want_write() calls outside of i_mutex. One non-trivial case needing conversion is kern_path_create() / user_path_create() which didn't include mnt_want_write() but now needs to because it acquires i_mutex. Because there are virtual file systems which don't bother with freeze / remount-ro protection we actually provide both versions of the function - one which calls mnt_want_write() and one which does not. [AV: scratch the previous, mnt_want_write() has been moved to kern_path_create() by now] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/namei.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 35291ac6f42b..1b464390dde8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2975,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; + int err2; int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); @@ -2988,6 +2989,8 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; + /* don't fail immediately if it's r/o, at least try to report other errors */ + err2 = mnt_want_write(nd.path.mnt); /* * Do the final lookup. */ @@ -3009,9 +3012,10 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path error = -ENOENT; goto fail; } - error = mnt_want_write(nd.path.mnt); - if (error) + if (unlikely(err2)) { + error = err2; goto fail; + } *path = nd.path; return dentry; fail: @@ -3019,6 +3023,8 @@ fail: dentry = ERR_PTR(error); unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + if (!err2) + mnt_drop_write(nd.path.mnt); out: path_put(&nd.path); return dentry; @@ -3266,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname) } nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3276,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -ENOENT; goto exit3; } - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit3; error = security_path_rmdir(&nd.path, dentry); if (error) - goto exit4; + goto exit3; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); -exit4: - mnt_drop_write(nd.path.mnt); exit3: dput(dentry); exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3355,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) goto exit1; nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3367,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname) if (!inode) goto slashes; ihold(inode); - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit2; error = security_path_unlink(&nd.path, dentry); if (error) - goto exit3; + goto exit2; error = vfs_unlink(nd.path.dentry->d_inode, dentry); -exit3: - mnt_drop_write(nd.path.mnt); - exit2: +exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3753,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (newnd.last_type != LAST_NORM) goto exit2; + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit2; + oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; @@ -3788,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (new_dentry == trap) goto exit5; - error = mnt_want_write(oldnd.path.mnt); - if (error) - goto exit5; error = security_path_rename(&oldnd.path, old_dentry, &newnd.path, new_dentry); if (error) - goto exit6; + goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); -exit6: - mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); + mnt_drop_write(oldnd.path.mnt); exit2: path_put(&newnd.path); putname(to); -- cgit v1.2.3 From e24f17da3560781e274699f066fb788ad52f4402 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:31 +0200 Subject: fat: Push mnt_want_write() outside of i_mutex When mnt_want_write() starts to handle freezing it will get a full lock semantics requiring proper lock ordering. So push mnt_want_write() call outside of i_mutex as in other places. CC: OGAWA Hirofumi Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/fat/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fat/file.c b/fs/fat/file.c index a71fe3715ee8..e007b8bd8e5e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) if (err) goto out; - mutex_lock(&inode->i_mutex); err = mnt_want_write_file(file); if (err) - goto out_unlock_inode; + goto out; + mutex_lock(&inode->i_mutex); /* * ATTR_VOLUME and ATTR_DIR cannot be changed; this also @@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) /* The root directory has no attributes */ if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { err = -EINVAL; - goto out_drop_write; + goto out_unlock_inode; } if (sbi->options.sys_immutable && ((attr | oldattr) & ATTR_SYS) && !capable(CAP_LINUX_IMMUTABLE)) { err = -EPERM; - goto out_drop_write; + goto out_unlock_inode; } /* @@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) */ err = security_inode_setattr(file->f_path.dentry, &ia); if (err) - goto out_drop_write; + goto out_unlock_inode; /* This MUST be done before doing anything irreversible... */ err = fat_setattr(file->f_path.dentry, &ia); if (err) - goto out_drop_write; + goto out_unlock_inode; fsnotify_change(file->f_path.dentry, ia.ia_valid); if (sbi->options.sys_immutable) { @@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) fat_save_attrs(inode, attr); mark_inode_dirty(inode); -out_drop_write: - mnt_drop_write_file(file); out_unlock_inode: mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(file); out: return err; } -- cgit v1.2.3 From e7848683ae7ded0a4a8964122a47da9104a98337 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:32 +0200 Subject: btrfs: Push mnt_want_write() outside of i_mutex When mnt_want_write() starts to handle freezing it will get a full lock semantics requiring proper lock ordering. So push mnt_want_write() call consistently outside of i_mutex. CC: Chris Mason CC: linux-btrfs@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1e9f6c019ad0..cd93eb530b74 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -193,6 +193,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (!inode_owner_or_capable(inode)) return -EACCES; + ret = mnt_want_write_file(file); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); ip_oldflags = ip->flags; @@ -207,10 +211,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } } - ret = mnt_want_write_file(file); - if (ret) - goto out_unlock; - if (flags & FS_SYNC_FL) ip->flags |= BTRFS_INODE_SYNC; else @@ -273,9 +273,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode->i_flags = i_oldflags; } - mnt_drop_write_file(file); out_unlock: mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(file); return ret; } @@ -641,6 +641,10 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; + error = mnt_want_write(parent->mnt); + if (error) + return error; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); @@ -652,13 +656,9 @@ static noinline int btrfs_mksubvol(struct path *parent, if (dentry->d_inode) goto out_dput; - error = mnt_want_write(parent->mnt); - if (error) - goto out_dput; - error = btrfs_may_create(dir, dentry); if (error) - goto out_drop_write; + goto out_dput; down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); @@ -676,12 +676,11 @@ static noinline int btrfs_mksubvol(struct path *parent, fsnotify_mkdir(dir, dentry); out_up_read: up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); -out_drop_write: - mnt_drop_write(parent->mnt); out_dput: dput(dentry); out_unlock: mutex_unlock(&dir->i_mutex); + mnt_drop_write(parent->mnt); return error; } -- cgit v1.2.3 From 4a55c1017b8dcfd0554734ce3f19374d5b522d59 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:33 +0200 Subject: nfsd: Push mnt_want_write() outside of i_mutex When mnt_want_write() starts to handle freezing it will get a full lock semantics requiring proper lock ordering. So push mnt_want_write() call consistently outside of i_mutex. CC: linux-nfs@vger.kernel.org CC: "J. Bruce Fields" Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/nfsd/nfs4recover.c | 9 +++--- fs/nfsd/nfsfh.c | 1 + fs/nfsd/nfsproc.c | 9 +++++- fs/nfsd/vfs.c | 79 +++++++++++++++++++++++----------------------- fs/nfsd/vfs.h | 11 +++++-- include/linux/nfsd/nfsfh.h | 1 + 6 files changed, 64 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5ff0b7b9fc08..43295d45cc2b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (status < 0) return; + status = mnt_want_write_file(rec_file); + if (status) + return; + dir = rec_file->f_path.dentry; /* lock the parent */ mutex_lock(&dir->d_inode->i_mutex); @@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = mnt_want_write_file(rec_file); - if (status) - goto out_put; status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); - mnt_drop_write_file(rec_file); out_put: dput(dentry); out_unlock: @@ -189,6 +189,7 @@ out_unlock: " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); + mnt_drop_write_file(rec_file); nfs4_reset_creds(original_cred); } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cc793005a87c..032af381b3aa 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp) fhp->fh_post_saved = 0; #endif } + fh_drop_write(fhp); if (exp) { exp_put(exp); fhp->fh_export = NULL; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e15dc45fc5ec..aad6d457b9e8 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, struct dentry *dchild; int type, mode; __be32 nfserr; + int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); dprintk("nfsd: CREATE %s %.*s\n", @@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; + hosterr = fh_want_write(dirfhp); + if (hosterr) { + nfserr = nfserrno(hosterr); + goto done; + } + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { @@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, out_unlock: /* We don't really need to unlock, as fh_put does it. */ fh_unlock(dirfhp); - + fh_drop_write(dirfhp); done: fh_put(dirfhp); return nfsd_return_dirop(nfserr, resp); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4700a0a929d7..dccd396a1bb7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1276,6 +1276,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * If it has, the parent directory should already be locked. */ if (!resfhp->fh_dentry) { + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); @@ -1319,14 +1323,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - /* * Get the dir op function pointer. */ err = 0; + host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(dirp, dchild, iap->ia_mode, true); @@ -1343,10 +1344,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; } - if (host_err < 0) { - fh_drop_write(fhp); + if (host_err < 0) goto out_nfserr; - } err = nfsd_create_setattr(rqstp, resfhp, iap); @@ -1358,7 +1357,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = nfserrno(commit_metadata(fhp)); if (err2) err = err2; - fh_drop_write(fhp); /* * Update the file handle to get the new inode info. */ @@ -1417,6 +1415,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notdir; if (!dirp->i_op->lookup) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); /* @@ -1449,9 +1452,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, v_atime = verifier[1]&0x7fffffff; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; if (dchild->d_inode) { err = 0; @@ -1522,7 +1522,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!err) err = nfserrno(commit_metadata(fhp)); - fh_drop_write(fhp); /* * Update the filehandle to get the new inode info. */ @@ -1533,6 +1532,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); if (dchild && !IS_ERR(dchild)) dput(dchild); + fh_drop_write(fhp); return err; out_nfserr: @@ -1613,6 +1613,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock(fhp); dentry = fhp->fh_dentry; dnew = lookup_one_len(fname, dentry, flen); @@ -1620,10 +1625,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) @@ -1683,6 +1684,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + host_err = fh_want_write(tfhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; @@ -1694,18 +1701,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dold = tfhp->fh_dentry; - host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); - goto out_dput; - } err = nfserr_noent; if (!dold->d_inode) - goto out_drop_write; + goto out_dput; host_err = nfsd_break_lease(dold->d_inode); if (host_err) { err = nfserrno(host_err); - goto out_drop_write; + goto out_dput; } host_err = vfs_link(dold, dirp, dnew); if (!host_err) { @@ -1718,12 +1720,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_drop_write: - fh_drop_write(tfhp); out_dput: dput(dnew); out_unlock: fh_unlock(ffhp); + fh_drop_write(tfhp); out: return err; @@ -1766,6 +1767,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + host_err = fh_want_write(ffhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); @@ -1796,17 +1803,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; - host_err = fh_want_write(ffhp); - if (host_err) - goto out_dput_new; host_err = nfsd_break_lease(odentry->d_inode); if (host_err) - goto out_drop_write; + goto out_dput_new; if (ndentry->d_inode) { host_err = nfsd_break_lease(ndentry->d_inode); if (host_err) - goto out_drop_write; + goto out_dput_new; } host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err) { @@ -1814,8 +1818,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!host_err) host_err = commit_metadata(ffhp); } -out_drop_write: - fh_drop_write(ffhp); out_dput_new: dput(ndentry); out_dput_old: @@ -1831,6 +1833,7 @@ out_drop_write: fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = 0; + fh_drop_write(ffhp); out: return err; @@ -1856,6 +1859,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (err) goto out; + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = dentry->d_inode; @@ -1874,21 +1881,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; - host_err = fh_want_write(fhp); - if (host_err) - goto out_put; - host_err = nfsd_break_lease(rdentry->d_inode); if (host_err) - goto out_drop_write; + goto out_put; if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) host_err = commit_metadata(fhp); -out_drop_write: - fh_drop_write(fhp); out_put: dput(rdentry); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index ec0611b2b738..359594c393d2 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); static inline int fh_want_write(struct svc_fh *fh) { - return mnt_want_write(fh->fh_export->ex_path.mnt); + int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + + if (!ret) + fh->fh_want_write = 1; + return ret; } static inline void fh_drop_write(struct svc_fh *fh) { - mnt_drop_write(fh->fh_export->ex_path.mnt); + if (fh->fh_want_write) { + fh->fh_want_write = 0; + mnt_drop_write(fh->fh_export->ex_path.mnt); + } } #endif /* LINUX_NFSD_VFS_H */ diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index ce4743a26015..fa63048fecff 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -143,6 +143,7 @@ typedef struct svc_fh { int fh_maxsize; /* max size for fh_handle */ unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ #ifdef CONFIG_NFSD_V3 unsigned char fh_post_saved; /* post-op attrs saved */ -- cgit v1.2.3 From 0add3e8567a42b8137e26c0595a59f893d8592e0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 27 Jul 2012 11:49:26 -0700 Subject: nfs: fix stub return type warnings Fix numerous repeated warnings by making the stub function void instead of non-void: fs/nfs/nfs4_fs.h: In function 'nfs4_unregister_sysctl': fs/nfs/nfs4_fs.h:385:1: warning: no return statement in function returning non-void Signed-off-by: Randy Dunlap Cc: Trond Myklebust Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b1ecacd8784a..5511690de8a5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -380,7 +380,7 @@ static inline int nfs4_register_sysctl(void) return 0; } -static inline int nfs4_unregister_sysctl(void) +static inline void nfs4_unregister_sysctl(void) { } #endif -- cgit v1.2.3 From 59948db3be76099b14c7103d0f46c5454b173c3a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:49 -0400 Subject: NFS: fix pnfs regression with directio reads Commit 1abb50886af "NFS: Create an read_pageio_init() function" did not modify the call in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 48253372ab1d..69f20c73220e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -393,7 +393,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_read(&desc, dreq->inode, + NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; -- cgit v1.2.3 From c95908e4c50d218f016e3866f5abf786055df635 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:50 -0400 Subject: NFS: fix pnfs regression with directio writes Commit 57208fa7e51 "NFS: Create an write_pageio_init() function" did not modify the calls in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 69f20c73220e..42dce909ec70 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -478,7 +478,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -782,7 +782,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, + NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); -- cgit v1.2.3 From f44106e2173f08ccb1c9195d85a6c22388b461c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 15:49:56 -0400 Subject: nfs: fix fl_type tests in NFSv4 code fl_type is not a bitmap. Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e373c30e8d4..6843e0a37de8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4885,7 +4885,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. */ - switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) { + switch (request->fl_type) { case F_RDLCK: if (!(filp->f_mode & FMODE_READ)) return -EBADF; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 610ebccbde5d..6cbd602e26d5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1236,7 +1236,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct static inline int nfs4_lock_type(struct file_lock *fl, int block) { - if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + if (fl->fl_type == F_RDLCK) return block ? NFS4_READW_LT : NFS4_READ_LT; return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; } -- cgit v1.2.3 From 159e0561e322dd8008fff59e36efff8d2bdd0b0e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 29 May 2012 13:57:58 +0800 Subject: pnfsblock: bail out partial page IO Current block layout driver read/write code assumes page aligned IO in many places. Add a checker to validate the assumption. Otherwise there would be data corruption like when application does open(O_WRONLY) and page unaliged write. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7ae8a608956f..dd392ed5f2e2 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -228,6 +228,14 @@ bl_end_par_io_read(void *data, int unused) schedule_work(&rdata->task.u.tk_work); } +static bool +bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) +{ + if ((offset & blkmask) || (len & blkmask)) + return false; + return true; +} + static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -244,6 +252,9 @@ bl_read_pagelist(struct nfs_read_data *rdata) dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) + goto use_mds; + par = alloc_parallel(rdata); if (!par) goto use_mds; @@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par; + struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; struct page **pages = wdata->args.pages; @@ -563,6 +574,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + /* Check for alignment first */ + if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) + goto out_mds; + /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -996,14 +1011,32 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static void +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); +} + +static void +bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_write_mds(pgio); + else + pnfs_generic_pg_init_write(pgio, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = bl_pg_init_read, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = bl_pg_init_write, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From 5cf02d09b50b1ee1c2d536c9cf64af5a7d433f56 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:58:51 -0400 Subject: nfs: skip commit in releasepage if we're freeing memory for fs-related reasons We've had some reports of a deadlock where rpciod ends up with a stack trace like this: PID: 2507 TASK: ffff88103691ab40 CPU: 14 COMMAND: "rpciod/14" #0 [ffff8810343bf2f0] schedule at ffffffff814dabd9 #1 [ffff8810343bf3b8] nfs_wait_bit_killable at ffffffffa038fc04 [nfs] #2 [ffff8810343bf3c8] __wait_on_bit at ffffffff814dbc2f #3 [ffff8810343bf418] out_of_line_wait_on_bit at ffffffff814dbcd8 #4 [ffff8810343bf488] nfs_commit_inode at ffffffffa039e0c1 [nfs] #5 [ffff8810343bf4f8] nfs_release_page at ffffffffa038bef6 [nfs] #6 [ffff8810343bf528] try_to_release_page at ffffffff8110c670 #7 [ffff8810343bf538] shrink_page_list.clone.0 at ffffffff81126271 #8 [ffff8810343bf668] shrink_inactive_list at ffffffff81126638 #9 [ffff8810343bf818] shrink_zone at ffffffff8112788f #10 [ffff8810343bf8c8] do_try_to_free_pages at ffffffff81127b1e #11 [ffff8810343bf958] try_to_free_pages at ffffffff8112812f #12 [ffff8810343bfa08] __alloc_pages_nodemask at ffffffff8111fdad #13 [ffff8810343bfb28] kmem_getpages at ffffffff81159942 #14 [ffff8810343bfb58] fallback_alloc at ffffffff8115a55a #15 [ffff8810343bfbd8] ____cache_alloc_node at ffffffff8115a2d9 #16 [ffff8810343bfc38] kmem_cache_alloc at ffffffff8115b09b #17 [ffff8810343bfc78] sk_prot_alloc at ffffffff81411808 #18 [ffff8810343bfcb8] sk_alloc at ffffffff8141197c #19 [ffff8810343bfce8] inet_create at ffffffff81483ba6 #20 [ffff8810343bfd38] __sock_create at ffffffff8140b4a7 #21 [ffff8810343bfd98] xs_create_sock at ffffffffa01f649b [sunrpc] #22 [ffff8810343bfdd8] xs_tcp_setup_socket at ffffffffa01f6965 [sunrpc] #23 [ffff8810343bfe38] worker_thread at ffffffff810887d0 #24 [ffff8810343bfee8] kthread at ffffffff8108dd96 #25 [ffff8810343bff48] kernel_thread at ffffffff8100c1ca rpciod is trying to allocate memory for a new socket to talk to the server. The VM ends up calling ->releasepage to get more memory, and it tries to do a blocking commit. That commit can't succeed however without a connected socket, so we deadlock. Fix this by setting PF_FSTRANS on the workqueue task prior to doing the socket allocation, and having nfs_release_page check for that flag when deciding whether to do a commit call. Also, set PF_FSTRANS unconditionally in rpc_async_schedule since that function can also do allocations sometimes. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/file.c | 7 +++++-- net/sunrpc/sched.c | 2 ++ net/sunrpc/xprtrdma/transport.c | 3 ++- net/sunrpc/xprtsock.c | 10 ++++++++++ 4 files changed, 19 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 70d124a61b98..1b3925426929 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -447,8 +447,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ada1e2c33aa4..1f19aa15f89b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -791,7 +791,9 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + current->flags |= PF_FSTRANS; __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + current->flags &= ~PF_FSTRANS; } /** diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b446e100286f..06cdbff79e4a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work) int rc = 0; if (!xprt->shutdown) { + current->flags |= PF_FSTRANS; xprt_clear_connected(xprt); dprintk("RPC: %s: %sconnect\n", __func__, @@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work) out: xprt_wake_pending_tasks(xprt, rc); - out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 890b03f8d877..b88c6bf657ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1895,6 +1895,8 @@ static void xs_local_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); @@ -1928,6 +1930,7 @@ static void xs_local_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -1970,6 +1973,8 @@ static void xs_udp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + /* Start by resetting any existing state */ xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, @@ -1988,6 +1993,7 @@ static void xs_udp_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /* @@ -2113,6 +2119,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, @@ -2162,6 +2170,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; return; case -EINVAL: /* Happens, for instance, if the user specified a link @@ -2174,6 +2183,7 @@ out_eagain: out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /** -- cgit v1.2.3 From a427b9ec4eda8cd6e641ea24541d30b641fc3140 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Jul 2012 16:53:36 +0100 Subject: NFS: Fix a number of bugs in the idmapper Fix a number of bugs in the NFS idmapper code: (1) Only registered key types can be passed to the core keys code, so register the legacy idmapper key type. This is a requirement because the unregister function cleans up keys belonging to that key type so that there aren't dangling pointers to the module left behind - including the key->type pointer. (2) Rename the legacy key type. You can't have two key types with the same name, and (1) would otherwise require that. (3) complete_request_key() must be called in the error path of nfs_idmap_legacy_upcall(). (4) There is one idmap struct for each nfs_client struct. This means that idmap->idmap_key_cons is shared without the use of a lock. This is a problem because key_instantiate_and_link() - as called indirectly by idmap_pipe_downcall() - releases anyone waiting for the key to be instantiated. What happens is that idmap_pipe_downcall() running in the rpc.idmapd thread, releases the NFS filesystem in whatever thread that is running in to continue. This may then make another idmapper call, overwriting idmap_key_cons before idmap_pipe_downcall() gets the chance to call complete_request_key(). I *think* that reading idmap_key_cons only once, before key_instantiate_and_link() is called, and then caching the result in a variable is sufficient. Bug (4) is the cause of: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 0 Oops: 0010 [#1] SMP CPU 1 Modules linked in: ppdev parport_pc lp parport ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack nfs fscache xt_CHECKSUM auth_rpcgss iptable_mangle nfs_acl bridge stp llc lockd be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi snd_hda_codec_realtek snd_usb_audio snd_hda_intel snd_hda_codec snd_seq snd_pcm snd_hwdep snd_usbmidi_lib snd_rawmidi snd_timer uvcvideo videobuf2_core videodev media videobuf2_vmalloc snd_seq_device videobuf2_memops e1000e vhost_net iTCO_wdt joydev coretemp snd soundcore macvtap macvlan i2c_i801 snd_page_alloc tun iTCO_vendor_support microcode kvm_intel kvm sunrpc hid_logitech_dj usb_storage i915 drm_kms_helper drm i2c_algo_bit i2c_core video [last unloaded: scsi_wait_scan] Pid: 1229, comm: rpc.idmapd Not tainted 3.4.2-1.fc16.x86_64 #1 Gateway DX4710-UB801A/G33M05G1 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff8801a3645d40 EFLAGS: 00010246 RAX: ffff880077707e30 RBX: ffff880077707f50 RCX: ffff8801a18ccd80 RDX: 0000000000000006 RSI: ffff8801a3645e75 RDI: ffff880077707f50 RBP: ffff8801a3645d88 R08: ffff8801a430f9c0 R09: ffff8801a3645db0 R10: 000000000000000a R11: 0000000000000246 R12: ffff8801a18ccd80 R13: ffff8801a3645e75 R14: ffff8801a430f9c0 R15: 0000000000000006 FS: 00007fb6fb51a700(0000) GS:ffff8801afc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001a49b0000 CR4: 00000000000027e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rpc.idmapd (pid: 1229, threadinfo ffff8801a3644000, task ffff8801a3bf9710) Stack: ffffffff81260878 ffff8801a3645db0 ffff8801a3645db0 ffff880077707a90 ffff880077707f50 ffff8801a18ccd80 0000000000000006 ffff8801a3645e75 ffff8801a430f9c0 ffff8801a3645dd8 ffffffff81260983 ffff8801a3645de8 Call Trace: [] ? __key_instantiate_and_link+0x58/0x100 [] key_instantiate_and_link+0x63/0xa0 [] idmap_pipe_downcall+0x1cb/0x1e0 [nfs] [] rpc_pipe_write+0x67/0x90 [sunrpc] [] vfs_write+0xb3/0x180 [] sys_write+0x4a/0x90 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: David Howells Reviewed-by: Steve Dickson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.4] --- fs/nfs/idmap.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 864c51e4b400..1b5058b4043b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -205,12 +205,18 @@ static int nfs_idmap_init_keyring(void) if (ret < 0) goto failed_put_key; + ret = register_key_type(&key_type_id_resolver_legacy); + if (ret < 0) + goto failed_reg_legacy; + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; id_resolver_cache = cred; return 0; +failed_reg_legacy: + unregister_key_type(&key_type_id_resolver); failed_put_key: key_put(keyring); failed_put_cred: @@ -222,6 +228,7 @@ static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); + unregister_key_type(&key_type_id_resolver_legacy); put_cred(id_resolver_cache); } @@ -385,7 +392,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { }; static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", + .name = "id_legacy", .instantiate = user_instantiate, .match = user_match, .revoke = user_revoke, @@ -674,6 +681,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; + BUG_ON(idmap->idmap_key_cons != NULL); idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); @@ -687,8 +695,7 @@ out2: out1: kfree(msg); out0: - key_revoke(cons->key); - key_revoke(cons->authkey); + complete_request_key(cons, ret); return ret; } @@ -722,11 +729,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons; struct idmap_msg im; size_t namelen_in; int ret; + /* If instantiation is successful, anyone waiting for key construction + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + if (mlen != sizeof(im)) { ret = -ENOSPC; goto out; @@ -739,7 +753,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); + complete_request_key(cons, -ENOKEY); goto out_incomplete; } @@ -756,7 +770,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } out: - complete_request_key(idmap->idmap_key_cons, ret); + complete_request_key(cons, ret); out_incomplete: return ret; } -- cgit v1.2.3 From ab7017a3a0a64b953e091619c30413b3721d925d Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:16 -0400 Subject: NFS: Add version registering framework This patch adds in the code to track multiple versions of the NFS protocol. I created default structures for v2, v3 and v4 so that each version can continue to work while I convert them into kernel modules. I also removed the const parameter from the rpc_version array so that I can change it at runtime. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 4 +- fs/nfs/client.c | 147 +++++++++++++++++++++++++++++++++++----------- fs/nfs/inode.c | 9 +-- fs/nfs/internal.h | 10 ++-- fs/nfs/nfs.h | 72 +++++++++++++++++++++++ fs/nfs/nfs2super.c | 25 ++++++++ fs/nfs/nfs3super.c | 25 ++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4client.c | 4 +- fs/nfs/nfs4super.c | 14 ++++- fs/nfs/super.c | 32 ++++++---- include/linux/nfs_fs_sb.h | 1 + 12 files changed, 283 insertions(+), 61 deletions(-) create mode 100644 fs/nfs/nfs.h create mode 100644 fs/nfs/nfs2super.c create mode 100644 fs/nfs/nfs3super.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b96c2038346..66dd3075e5db 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,8 +9,8 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o +nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 65afa382c5e3..462de24482b4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -51,25 +51,23 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +static DEFINE_SPINLOCK(nfs_version_lock); +static DEFINE_MUTEX(nfs_version_mutex); +static LIST_HEAD(nfs_versions); /* * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { -#ifdef CONFIG_NFS_V2 - [2] = &nfs_version2, -#endif -#ifdef CONFIG_NFS_V3 - [3] = &nfs_version3, -#endif -#ifdef CONFIG_NFS_V4 - [4] = &nfs_version4, -#endif + [2] = NULL, + [3] = NULL, + [4] = NULL, }; const struct rpc_program nfs_program = { @@ -101,6 +99,93 @@ const struct rpc_program nfsacl_program = { }; #endif /* CONFIG_NFS_V3_ACL */ +static struct nfs_subversion *find_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs; + spin_lock(&nfs_version_lock); + + list_for_each_entry(nfs, &nfs_versions, list) { + if (nfs->rpc_ops->version == version) { + spin_unlock(&nfs_version_lock); + return nfs; + } + }; + + spin_unlock(&nfs_version_lock); + return ERR_PTR(-EPROTONOSUPPORT);; +} + +struct nfs_subversion *get_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs = find_nfs_version(version); + + if (IS_ERR(nfs)) { + mutex_lock(&nfs_version_mutex); + request_module("nfs%d", version); + nfs = find_nfs_version(version); + mutex_unlock(&nfs_version_mutex); + } + + if (!IS_ERR(nfs)) + try_module_get(nfs->owner); + return nfs; +} + +void put_nfs_version(struct nfs_subversion *nfs) +{ + module_put(nfs->owner); +} + +void register_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + list_add(&nfs->list, &nfs_versions); + nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(register_nfs_version); + +void unregister_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + nfs_version[nfs->rpc_ops->version] = NULL; + list_del(&nfs->list); + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(unregister_nfs_version); + +/* + * Preload all configured NFS versions during module init. + * This function should be edited after each protocol is converted, + * and eventually removed. + */ +int __init nfs_register_versions(void) +{ + int err = init_nfs_v2(); + if (err) + return err; + + err = init_nfs_v3(); + if (err) + return err; + + return init_nfs_v4(); +} + +/* + * Remove each pre-loaded NFS version + */ +void nfs_unregister_versions(void) +{ + exit_nfs_v2(); + exit_nfs_v3(); + exit_nfs_v4(); +} + /* * Allocate a shared client record * @@ -116,7 +201,10 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - clp->rpc_ops = cl_init->rpc_ops; + clp->cl_nfs_mod = cl_init->nfs_mod; + try_module_get(clp->cl_nfs_mod->owner); + + clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -145,6 +233,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error_cleanup: + put_nfs_version(clp->cl_nfs_mod); kfree(clp); error_0: return ERR_PTR(err); @@ -205,6 +294,7 @@ void nfs_free_client(struct nfs_client *clp) put_rpccred(clp->cl_machine_cred); put_net(clp->cl_net); + put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp); @@ -362,7 +452,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops != data->rpc_ops) + if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; if (clp->cl_proto != data->proto) @@ -431,9 +521,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); + const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", cl_init->rpc_ops->version); + cl_init->hostname ?: "", rpc_ops->version); /* see if the client already exists */ do { @@ -450,14 +541,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, list_add(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return cl_init->rpc_ops->init_client(new, - timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr, + authflavour); } spin_unlock(&nn->nfs_client_lock); - new = cl_init->rpc_ops->alloc_client(cl_init); + new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", @@ -714,13 +804,14 @@ error: * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + const struct nfs_parsed_mount_data *data, + struct nfs_subversion *nfs_mod) { struct nfs_client_initdata cl_init = { .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, - .rpc_ops = NULL, + .nfs_mod = nfs_mod, .proto = data->nfs_server.protocol, .net = data->net, }; @@ -730,21 +821,6 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); - switch (data->version) { -#ifdef CONFIG_NFS_V2 - case 2: - cl_init.rpc_ops = &nfs_v2_clientops; - break; -#endif -#ifdef CONFIG_NFS_V3 - case 3: - cl_init.rpc_ops = &nfs_v3_clientops; - break; -#endif - default: - return -EPROTONOSUPPORT; - } - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) @@ -1033,7 +1109,8 @@ void nfs_free_server(struct nfs_server *server) * - keyed on server and FSID */ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; struct nfs_fattr *fattr; @@ -1049,7 +1126,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data); + error = nfs_init_server(server, data, nfs_mod); if (error < 0) goto error; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 35f7e4bc680e..e8877c82582d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -50,6 +50,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -1671,21 +1672,17 @@ static int __init init_nfs_fs(void) rpc_proc_register(&init_net, &nfs_rpcstat); #endif -#ifdef CONFIG_NFS_V4 - err = init_nfs_v4(); + err = nfs_register_versions(); if (err) goto out1; -#endif if ((err = register_nfs_fs()) != 0) goto out0; return 0; out0: -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); + nfs_unregister_versions(); out1: -#endif #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cfafd13b6fe9..ac936476b3bc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -90,7 +90,7 @@ struct nfs_client_initdata { const char *hostname; const struct sockaddr *addr; size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; + struct nfs_subversion *nfs_mod; int proto; u32 minorversion; struct net *net; @@ -189,7 +189,8 @@ nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_fh *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -321,6 +322,7 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type nfs4_xdev_fs_type; @@ -329,8 +331,8 @@ extern struct file_system_type nfs4_referral_fs_type; void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct file_system_type *, struct nfs_server *, - int, const char *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, + struct nfs_mount_info *, struct nfs_subversion *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h new file mode 100644 index 000000000000..ac10b9e6c920 --- /dev/null +++ b/fs/nfs/nfs.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + * + * Function and structures exported by the NFS module + * for use by NFS version-specific modules. + */ +#ifndef __LINUX_INTERNAL_NFS_H +#define __LINUX_INTERNAL_NFS_H + +#include +#include +#include + +struct nfs_subversion { + struct module *owner; /* THIS_MODULE pointer */ + struct file_system_type *nfs_fs; /* NFS filesystem type */ + const struct rpc_version *rpc_vers; /* NFS version information */ + const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + struct list_head list; /* List of NFS versions */ +}; + +int nfs_register_versions(void); +void nfs_unregister_versions(void); + +#ifdef CONFIG_NFS_V2 +int init_nfs_v2(void); +void exit_nfs_v2(void); +#else /* CONFIG_NFS_V2 */ +static inline int __init init_nfs_v2(void) +{ + return 0; +} + +static inline void exit_nfs_v2(void) +{ +} +#endif /* CONFIG_NFS_V2 */ + +#ifdef CONFIG_NFS_V3 +int init_nfs_v3(void); +void exit_nfs_v3(void); +#else /* CONFIG_NFS_V3 */ +static inline int __init init_nfs_v3(void) +{ + return 0; +} + +static inline void exit_nfs_v3(void) +{ +} +#endif /* CONFIG_NFS_V3 */ + +#ifdef CONFIG_NFS_V4 +int init_nfs_v4(void); +void exit_nfs_v4(void); +#else /* CONFIG_NFS_V4 */ +static inline int __init init_nfs_v4(void) +{ + return 0; +} + +static inline void exit_nfs_v4(void) +{ +} +#endif /* CONFIG_NFS_V4 */ + +struct nfs_subversion *get_nfs_version(unsigned int); +void put_nfs_version(struct nfs_subversion *); +void register_nfs_version(struct nfs_subversion *); +void unregister_nfs_version(struct nfs_subversion *); + +#endif /* __LINUX_INTERNAL_NFS_H */ diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c new file mode 100644 index 000000000000..cef06d42334a --- /dev/null +++ b/fs/nfs/nfs2super.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include +#include +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v2 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version2, + .rpc_ops = &nfs_v2_clientops, +}; + +int __init init_nfs_v2(void) +{ + register_nfs_version(&nfs_v2); + return 0; +} + +void exit_nfs_v2(void) +{ + unregister_nfs_version(&nfs_v2); +} diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c new file mode 100644 index 000000000000..f815cf359d97 --- /dev/null +++ b/fs/nfs/nfs3super.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include +#include +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v3 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version3, + .rpc_ops = &nfs_v3_clientops, +}; + +int __init init_nfs_v3(void) +{ + register_nfs_version(&nfs_v3); + return 0; +} + +void exit_nfs_v3(void) +{ + unregister_nfs_version(&nfs_v3); +} diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5511690de8a5..99c2e7e4d3ea 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -366,6 +366,7 @@ extern const nfs4_stateid zero_stateid; /* nfs4super.c */ struct nfs_mount_info; +extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 1c3f13c8e472..769e798b3959 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -357,7 +357,7 @@ static int nfs4_set_client(struct nfs_server *server, .hostname = hostname, .addr = addr, .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = proto, .minorversion = minorversion, .net = net, @@ -411,7 +411,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, struct nfs_client_initdata cl_init = { .addr = ds_addr, .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, .net = mds_clp->cl_net, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 59264fb335c8..1f3401902c2f 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -8,6 +8,7 @@ #include #include "internal.h" #include "nfs4_fs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -75,6 +76,13 @@ static const struct super_operations nfs4_sops = { .remount_fs = nfs_remount, }; +struct nfs_subversion nfs_v4 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, +}; + /* * Set up an NFS4 superblock */ @@ -113,7 +121,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, goto out; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); out: return mntroot; @@ -293,7 +301,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, goto out; } - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); out: nfs_free_fhandle(mount_info.mntfh); return mntroot; @@ -343,6 +351,7 @@ int __init init_nfs_v4(void) if (err < 0) goto out2; + register_nfs_version(&nfs_v4); return 0; out2: nfs4_unregister_sysctl(); @@ -354,6 +363,7 @@ out: void exit_nfs_v4(void) { + unregister_nfs_version(&nfs_v4); unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 95866a8c21bb..61405a7a6b3c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -64,6 +64,7 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 @@ -281,7 +282,7 @@ static match_table_t nfs_vers_tokens = { static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs_fs_type = { +struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .mount = nfs_fs_mount, @@ -1650,7 +1651,8 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, } static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -1662,11 +1664,11 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs_create_server(mount_info->parsed, mount_info->mntfh, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); - return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); + return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } /* @@ -2297,10 +2299,10 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return 0; } -struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, +struct dentry *nfs_fs_mount_common(struct nfs_server *server, int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2319,7 +2321,7 @@ struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); + s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; @@ -2378,6 +2380,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod; int error; mount_info.parsed = nfs_alloc_parsed_mount_data(); @@ -2394,12 +2397,20 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } + nfs_mod = get_nfs_version(mount_info.parsed->version); + if (IS_ERR(nfs_mod)) { + mntroot = ERR_CAST(nfs_mod); + goto out; + } + #ifdef CONFIG_NFS_V4 if (mount_info.parsed->version == 4) mntroot = nfs4_try_mount(flags, dev_name, &mount_info); else #endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info); + mntroot = nfs_try_mount(flags, dev_name, &mount_info, nfs_mod); + + put_nfs_version(nfs_mod); out: nfs_free_parsed_mount_data(mount_info.parsed); @@ -2440,6 +2451,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, struct nfs_clone_mount *data = mount_info->cloned; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; int error; dprintk("--> nfs_xdev_mount_common()\n"); @@ -2453,7 +2465,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, goto out_err; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 65327652c61a..6039297801f4 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { struct rpc_clnt * cl_rpcclient; const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ int cl_proto; /* Network transport protocol */ + struct nfs_subversion * cl_nfs_mod; /* pointer to nfs version module */ u32 cl_minorversion;/* NFSv4 minorversion */ struct rpc_cred *cl_machine_cred; -- cgit v1.2.3 From e8f25e6d6d198dca7d09d8fe2c24ba3b9683bb24 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:17 -0400 Subject: NFS: Remove the NFS v4 xdev mount function I can now share this code with the v2 and v3 code by using the NFS subversion structure. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 13 ------------- fs/nfs/nfs4super.c | 25 ------------------------- fs/nfs/super.c | 30 ++++++++++-------------------- 3 files changed, 10 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 08b9c93675da..0f699fefee6e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -195,20 +195,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { -#ifdef CONFIG_NFS_V4 - struct vfsmount *mnt = ERR_PTR(-EINVAL); - switch (server->nfs_client->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); - break; - case 4: - mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); - } - return mnt; -#else return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); -#endif } /** diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1f3401902c2f..8a505573c289 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -14,8 +14,6 @@ static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, @@ -37,14 +35,6 @@ static struct file_system_type nfs4_remote_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs4_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -261,21 +251,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 61405a7a6b3c..4faefa19a8c3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2445,10 +2445,15 @@ void nfs_kill_super(struct super_block *s) * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ struct dentry * -nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, - const char *dev_name, struct nfs_mount_info *mount_info) +nfs_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = mount_info->cloned; + struct nfs_clone_mount *data = raw_data; + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = data, + }; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; @@ -2456,7 +2461,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, dprintk("--> nfs_xdev_mount_common()\n"); - mount_info->mntfh = data->fh; + mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); @@ -2465,7 +2470,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, goto out_err; } - mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; @@ -2475,21 +2480,6 @@ out_err: goto out; } -/* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); -} - #ifdef CONFIG_NFS_V4 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) -- cgit v1.2.3 From ff9099f26645818563c8d396a154c2ce6ee422eb Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:18 -0400 Subject: NFS: Create a try_mount rpc op I'm already looking up the nfs subversion in nfs_fs_mount(), so I have easy access to rpc_ops that used to be difficult to reach. This allows me to set up a different mount path for NFS v2/3 and NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4super.c | 3 ++- fs/nfs/proc.c | 1 + fs/nfs/super.c | 14 ++++---------- include/linux/nfs_xdr.h | 4 ++++ 8 files changed, 16 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ac936476b3bc..3364eccd17ef 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -328,6 +328,8 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, + struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 65d23eb92fe0..4f4cb8e49716 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -925,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 99c2e7e4d3ea..c321fb59d801 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -367,7 +367,7 @@ extern const nfs4_stateid zero_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6843e0a37de8..eb4ba1d99df9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6870,6 +6870,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, + .try_mount = nfs4_try_mount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 8a505573c289..9384f666b6ab 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -226,7 +226,8 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, } struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { char *export_path; struct vfsmount *root_mnt; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4d3356af3309..ebb3d9c5227b 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -774,6 +774,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4faefa19a8c3..5fca59d73e40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1650,9 +1650,9 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } -static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct dentry *nfs_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -2403,15 +2403,9 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } -#ifdef CONFIG_NFS_V4 - if (mount_info.parsed->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, &mount_info); - else -#endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info, nfs_mod); + mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); put_nfs_version(nfs_mod); - out: nfs_free_parsed_mount_data(mount_info.parsed); nfs_free_fhandle(mount_info.mntfh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0e181c2320b7..bc7415baf44d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1353,6 +1353,8 @@ struct nfs_renamedata { struct nfs_access_entry; struct nfs_client; struct rpc_timeout; +struct nfs_subversion; +struct nfs_mount_info; struct nfs_client_initdata; struct nfs_pageio_descriptor; @@ -1370,6 +1372,8 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); + struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, + struct nfs_subversion *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v1.2.3 From 1179acc6a3e260bc4edc74fa94f6c7908290eaec Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:19 -0400 Subject: NFS: Only initialize the ACL client in the v3 case v2 and v4 don't use it, so I create two new nfs_rpc_ops functions to initialize the ACL client only when we are using v3. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 61 ++++------------------------------------------ fs/nfs/internal.h | 15 ++++++++---- fs/nfs/nfs3client.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 2 ++ fs/nfs/nfs4client.c | 10 +++++--- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4super.c | 2 +- fs/nfs/proc.c | 2 ++ fs/nfs/super.c | 4 +-- include/linux/nfs_xdr.h | 3 +++ 11 files changed, 99 insertions(+), 69 deletions(-) create mode 100644 fs/nfs/nfs3client.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 66dd3075e5db..7ca0125da65e 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -10,7 +10,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 462de24482b4..1f2908287cba 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -83,22 +83,6 @@ struct rpc_stat nfs_rpcstat = { .program = &nfs_program }; - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static const struct rpc_version *nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -const struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - static struct nfs_subversion *find_nfs_version(unsigned int version) { struct nfs_subversion *nfs; @@ -695,36 +679,6 @@ static int nfs_start_lockd(struct nfs_server *server) return 0; } -/* - * Initialise an NFSv3 ACL client connection - */ -#ifdef CONFIG_NFS_V3_ACL -static void nfs_init_server_aclclient(struct nfs_server *server) -{ - if (server->nfs_client->rpc_ops->version != 3) - goto out_noacl; - if (server->flags & NFS_MOUNT_NOACL) - goto out_noacl; - - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - if (IS_ERR(server->client_acl)) - goto out_noacl; - - /* No errors! Assume that Sun nfsacls are supported */ - server->caps |= NFS_CAP_ACLS; - return; - -out_noacl: - server->caps &= ~NFS_CAP_ACLS; -} -#else -static inline void nfs_init_server_aclclient(struct nfs_server *server) -{ - server->flags &= ~NFS_MOUNT_NOACL; - server->caps &= ~NFS_CAP_ACLS; -} -#endif - /* * Create a general RPC client */ @@ -874,8 +828,6 @@ static int nfs_init_server(struct nfs_server *server, server->mountd_protocol = data->mount_server.protocol; server->namelen = data->namlen; - /* Create a client RPC handle for the NFSv3 ACL management interface */ - nfs_init_server_aclclient(server); dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; @@ -1108,8 +1060,7 @@ void nfs_free_server(struct nfs_server *server) * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh, +struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { struct nfs_server *server; @@ -1126,7 +1077,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data, nfs_mod); + error = nfs_init_server(server, mount_info->parsed, nfs_mod); if (error < 0) goto error; @@ -1135,13 +1086,13 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -1149,7 +1100,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; @@ -1210,8 +1161,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, flavor); if (error < 0) goto out_free_server; - if (!IS_ERR(source->client_acl)) - nfs_init_server_aclclient(server); /* probe the filesystem info for this server filesystem */ error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3364eccd17ef..2151bafd55b4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -187,13 +187,11 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); -extern struct nfs_server *nfs_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *, +extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); @@ -225,6 +223,13 @@ static inline void nfs_fs_proc_exit(void) int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); #endif +/* nfs3client.c */ +#ifdef CONFIG_NFS_V3 +struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c new file mode 100644 index 000000000000..b3fc65ef39ca --- /dev/null +++ b/fs/nfs/nfs3client.c @@ -0,0 +1,65 @@ +#include +#include +#include "internal.h" + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static const struct rpc_version *nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +const struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; + +/* + * Initialise an NFSv3 ACL client connection + */ +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + /* Create a client RPC handle for the NFS v3 ACL management interface */ + if (!IS_ERR(server)) + nfs_init_server_aclclient(server); + return server; +} + +struct nfs_server *nfs3_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor); + if (!IS_ERR(server) && !IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + return server; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4f4cb8e49716..0952c791df36 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -969,4 +969,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs3_create_server, + .clone_server = nfs3_clone_server, }; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 769e798b3959..b2d409d2805a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -574,8 +574,10 @@ error: * Create a version 4 volume record * - keyed on server and FSID */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) +/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh)*/ +struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; int error; @@ -587,11 +589,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, return ERR_PTR(-ENOMEM); /* set up the general RPC client */ - error = nfs4_init_server(server, data); + error = nfs4_init_server(server, mount_info->parsed); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh); + error = nfs4_server_common_setup(server, mount_info->mntfh); if (error < 0) goto error; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index eb4ba1d99df9..36c6432aac7b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6916,6 +6916,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, + .create_server = nfs4_create_server, + .clone_server = nfs_clone_server, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 9384f666b6ab..a62836256665 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -105,7 +105,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs4_create_server(mount_info, &nfs_v4); if (IS_ERR(server)) { mntroot = ERR_CAST(server); goto out; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ebb3d9c5227b..50a88c3546ed 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -817,4 +817,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs_create_server, + .clone_server = nfs_clone_server, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5fca59d73e40..a5f9fb3bfdcc 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1664,7 +1664,7 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh, nfs_mod); + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); @@ -2458,7 +2458,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); + server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bc7415baf44d..631182062994 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1439,6 +1439,9 @@ struct nfs_rpc_ops { (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t); void (*free_client) (struct nfs_client *); + struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *); + struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); }; /* -- cgit v1.2.3 From 6a74490dca897471a994a542fc7c5a469b48b46b Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:20 -0400 Subject: NFS: Pass super operations and xattr handlers in the nfs_subversion I can set all variables in the nfs_fill_super() function, allowing me to remove the nfs4_fill_super() function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/nfs.h | 2 ++ fs/nfs/nfs2super.c | 1 + fs/nfs/nfs3super.c | 1 + fs/nfs/nfs4super.c | 24 +++--------------------- fs/nfs/super.c | 9 +++++---- 6 files changed, 13 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2151bafd55b4..17d14709e750 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -327,6 +327,7 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index ac10b9e6c920..9f502a0c1e59 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -16,6 +16,8 @@ struct nfs_subversion { struct file_system_type *nfs_fs; /* NFS filesystem type */ const struct rpc_version *rpc_vers; /* NFS version information */ const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + const struct super_operations *sops; /* NFS Super operations */ + const struct xattr_handler **xattr; /* NFS xattr handlers */ struct list_head list; /* List of NFS versions */ }; diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index cef06d42334a..a9fb69d72816 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -11,6 +11,7 @@ static struct nfs_subversion nfs_v2 = { .nfs_fs = &nfs_fs_type, .rpc_vers = &nfs_version2, .rpc_ops = &nfs_v2_clientops, + .sops = &nfs_sops, }; int __init init_nfs_v2(void) diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index f815cf359d97..8378090b8104 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -11,6 +11,7 @@ static struct nfs_subversion nfs_v3 = { .nfs_fs = &nfs_fs_type, .rpc_vers = &nfs_version3, .rpc_ops = &nfs_v3_clientops, + .sops = &nfs_sops, }; int __init init_nfs_v3(void) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index a62836256665..c70e1730755c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -71,25 +71,10 @@ struct nfs_subversion nfs_v4 = { .nfs_fs = &nfs4_fs_type, .rpc_vers = &nfs_version4, .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, }; -/* - * Set up an NFS4 superblock - */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); -} - /* * Get the superblock for the NFS4 root partition */ @@ -101,7 +86,6 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); - mount_info->fill_super = nfs4_fill_super; mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ @@ -236,8 +220,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - mount_info->fill_super = nfs4_fill_super; - export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, @@ -257,7 +239,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, + .fill_super = nfs_fill_super, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a5f9fb3bfdcc..a275d19ae512 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -298,7 +298,7 @@ struct file_system_type nfs_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -static const struct super_operations nfs_sops = { +const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, @@ -2105,10 +2105,12 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_blocksize_bits = 0; sb->s_blocksize = 0; - if (data->bsize) + sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; + sb->s_op = server->nfs_client->cl_nfs_mod->sops; + if (data && data->bsize) sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -2116,7 +2118,6 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_gran = 1; } - sb->s_op = &nfs_sops; nfs_initialise_sb(sb); } -- cgit v1.2.3 From 19d87ca3623956494b517f3abe0caf2616d55457 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:21 -0400 Subject: NFS: Split out remaining NFS v4 inode functions Somehow I missed this in my previous patch series, but these functions are only needed by the v4 code and should be moved to a v4-only file. I wasn't exactly sure where I should put these functions, so I moved them into nfs4super.c where I could make them static. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 23 +---------------------- fs/nfs/internal.h | 4 +--- fs/nfs/nfs4_fs.h | 3 --- fs/nfs/nfs4super.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 20 -------------------- 5 files changed, 41 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8877c82582d..a6ffa4be2a03 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -106,7 +106,7 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -static void nfs_clear_inode(struct inode *inode) +void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... @@ -1472,27 +1472,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) return -ESTALE; } - -#ifdef CONFIG_NFS_V4 - -/* - * Clean out any remaining NFSv4 state that might be left over due - * to open() calls that passed nfs_atomic_lookup, but failed to call - * nfs_open(). - */ -void nfs4_evict_inode(struct inode *inode) -{ - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); - pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation_noreclaim(inode); - /* First call standard NFS clear_inode() code */ - nfs_clear_inode(inode); -} -#endif - struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 17d14709e750..4174faf73ec1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -319,10 +319,8 @@ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); +extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); -#ifdef CONFIG_NFS_V4 -extern void nfs4_evict_inode(struct inode *); -#endif void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c321fb59d801..4811e1251d32 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,9 +205,6 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); -/* write.c */ -int nfs4_write_inode(struct inode *, struct writeback_control *); - /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index c70e1730755c..1c825f3bef51 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -6,12 +6,16 @@ #include #include #include +#include "delegation.h" #include "internal.h" #include "nfs4_fs.h" +#include "pnfs.h" #include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); +static void nfs4_evict_inode(struct inode *inode); static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, @@ -75,6 +79,41 @@ struct nfs_subversion nfs_v4 = { .xattr = nfs4_xattr_handlers, }; +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); + + if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { + int status; + bool sync = true; + + if (wbc->sync_mode == WB_SYNC_NONE) + sync = false; + + status = pnfs_layoutcommit_inode(inode, sync); + if (status < 0) + return status; + } + return ret; +} + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation_noreclaim(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + /* * Get the superblock for the NFS4 root partition */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f312860c15d0..6ddac54dc67f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1674,26 +1674,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) return nfs_commit_unstable_pages(inode, wbc); } -#ifdef CONFIG_NFS_V4 -int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret = nfs_write_inode(inode, wbc); - - if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status; - bool sync = true; - - if (wbc->sync_mode == WB_SYNC_NONE) - sync = false; - - status = pnfs_layoutcommit_inode(inode, sync); - if (status < 0) - return status; - } - return ret; -} -#endif - /* * flush the inode to disk. */ -- cgit v1.2.3 From fac1e8e4ef417e958060a6c3a061cc1a180bd8ae Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:22 -0400 Subject: NFS: Keep module parameters in the generic NFS client Otherwise we break backwards compatibility when v4 becomes a modules. Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 24 ------------------------ fs/nfs/idmap.c | 3 --- fs/nfs/nfs4_fs.h | 4 +++- fs/nfs/nfs4client.c | 9 --------- fs/nfs/nfs4proc.c | 6 ------ fs/nfs/nfs4xdr.c | 6 ------ fs/nfs/super.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 48 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 23ff18fe080a..ca3ac992028b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -37,31 +37,7 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; -#define NFS_CALLBACK_MAXPORTNR (65535U) - -static int param_set_portnr(const char *val, const struct kernel_param *kp) -{ - unsigned long num; - int ret; - - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} -static struct kernel_param_ops param_ops_portnr = { - .set = param_set_portnr, - .get = param_get_uint, -}; -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); - -module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); /* * This is the NFSv4 callback kernel thread. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 1b5058b4043b..b701358c39c3 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,8 +52,6 @@ #define NFS_UINT_MAXLEN 11 -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600; static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; @@ -366,7 +364,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ } /* idmap classic begins here */ -module_param(nfs_idmap_cache_timeout, int, 0644); enum { Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4811e1251d32..bafe5186c9cd 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -367,7 +367,9 @@ extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int init_nfs_v4(void); void exit_nfs_v4(void); - +extern bool nfs4_disable_idmapping; +extern unsigned short max_session_slots; +extern unsigned short send_implementation_id; /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index b2d409d2805a..cbcdfaf32505 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -17,11 +17,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -/* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; - /* * Get a unique NFSv4.0 callback identifier which will be used * by the V4.0 callback service to lookup the nfs_client struct @@ -659,7 +654,3 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 36c6432aac7b..a99a8d948721 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,8 +72,6 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) -static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -6932,10 +6930,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; -module_param(max_session_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " - "requests the client will negotiate"); - /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6cbd602e26d5..ca13483edd60 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -852,12 +852,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ -static unsigned short send_implementation_id = 1; - -module_param(send_implementation_id, ushort, 0644); -MODULE_PARM_DESC(send_implementation_id, - "Send implementation ID with NFSv4.1 exchange_id"); - static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a275d19ae512..8e0da5a6b3c5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2574,4 +2574,49 @@ out_no_address: return -EINVAL; } +/* + * NFS v4 module parameters need to stay in the + * NFS client for backwards compatibility + */ +unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_tcpport; +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600; +/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ +bool nfs4_disable_idmapping = true; +unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short send_implementation_id = 1; + +#define NFS_CALLBACK_MAXPORTNR (65535U) + +static int param_set_portnr(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param(nfs_idmap_cache_timeout, int, 0644); +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From ddda8e0aa8b955e20cb80908189bfa154ab54837 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:23 -0400 Subject: NFS: Convert v2 into a module The module (nfs2.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v2. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 4 +++- fs/nfs/client.c | 12 ++++++------ fs/nfs/dir.c | 13 +++++++++++++ fs/nfs/file.c | 2 ++ fs/nfs/inode.c | 10 ++++++++++ fs/nfs/namespace.c | 2 ++ fs/nfs/nfs.h | 14 -------------- fs/nfs/nfs2super.c | 9 +++++++-- fs/nfs/read.c | 1 + fs/nfs/super.c | 3 +++ fs/nfs/write.c | 2 ++ 12 files changed, 50 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac394..6764dbf66d05 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -30,7 +30,7 @@ config NFS_FS If unsure, say N. config NFS_V2 - bool "NFS client support for NFS version 2" + tristate "NFS client support for NFS version 2" depends on NFS_FS default y help diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7ca0125da65e..df61db41bfa8 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,6 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ @@ -25,6 +24,9 @@ endif nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o +obj-$(CONFIG_NFS_V2) += nfs2.o +nfs2-y := nfs2super.o proc.o nfs2xdr.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1f2908287cba..fa8acf510333 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -149,11 +149,7 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version); */ int __init nfs_register_versions(void) { - int err = init_nfs_v2(); - if (err) - return err; - - err = init_nfs_v3(); + int err = init_nfs_v3(); if (err) return err; @@ -165,7 +161,6 @@ int __init nfs_register_versions(void) */ void nfs_unregister_versions(void) { - exit_nfs_v2(); exit_nfs_v3(); exit_nfs_v4(); } @@ -222,6 +217,7 @@ error_cleanup: error_0: return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(nfs_alloc_client); #ifdef CONFIG_NFS_V4 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ @@ -284,6 +280,7 @@ void nfs_free_client(struct nfs_client *clp) dprintk("<-- nfs_free_client()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_client); /* * Release a reference to a shared client record @@ -753,6 +750,7 @@ error: dprintk("<-- nfs_init_client() = xerror %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_init_client); /* * Create a version 2 or 3 client @@ -1122,6 +1120,7 @@ error: nfs_free_server(server); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_create_server); /* * Clone an NFS2, NFS3 or NFS4 server record @@ -1191,6 +1190,7 @@ out_free_server: dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_clone_server); void nfs_clients_init(struct net *net) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d49f1b9cd3fd..c382a6d5e177 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -17,6 +17,7 @@ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ +#include #include #include #include @@ -1196,6 +1197,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { @@ -1263,6 +1265,7 @@ out: nfs_free_fhandle(fhandle); return res; } +EXPORT_SYMBOL_GPL(nfs_lookup); #ifdef CONFIG_NFS_V4 static int nfs4_lookup_revalidate(struct dentry *, unsigned int); @@ -1508,6 +1511,7 @@ out_error: dput(parent); return error; } +EXPORT_SYMBOL_GPL(nfs_instantiate); /* * Following a failed create operation, we drop the dentry rather @@ -1536,6 +1540,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_create); /* * See comments for nfs_proc_create regarding failed operations. @@ -1563,6 +1568,7 @@ out_err: d_drop(dentry); return status; } +EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. @@ -1586,6 +1592,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { @@ -1609,6 +1616,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } +EXPORT_SYMBOL_GPL(nfs_rmdir); /* * Remove a file after making sure there are no pending writes, @@ -1680,6 +1688,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) d_rehash(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_unlink); /* * To create a symbolic link, most file systems instantiate a new inode, @@ -1750,6 +1759,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) return 0; } +EXPORT_SYMBOL_GPL(nfs_symlink); int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1771,6 +1781,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } return error; } +EXPORT_SYMBOL_GPL(nfs_link); /* * RENAME @@ -1869,6 +1880,7 @@ out: dput(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_rename); static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); @@ -2188,6 +2200,7 @@ out_notsup: res = generic_permission(inode, mask); goto out; } +EXPORT_SYMBOL_GPL(nfs_permission); /* * Local variables: diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1b3925426929..5b3e70389553 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -16,6 +16,7 @@ * nfs regular file handling functions */ +#include #include #include #include @@ -865,3 +866,4 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; +EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a6ffa4be2a03..f358b976e9e6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -193,6 +193,7 @@ void nfs_invalidate_atime(struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* * Invalidate, but do not unhash, the inode. @@ -438,6 +439,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) out: return error; } +EXPORT_SYMBOL_GPL(nfs_setattr); /** * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall @@ -496,6 +498,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) nfs_vmtruncate(inode, attr->ia_size); } } +EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -535,6 +538,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) out: return err; } +EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { @@ -623,6 +627,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_close_context); struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) { @@ -1028,6 +1033,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->owner_name = NULL; fattr->group_name = NULL; } +EXPORT_SYMBOL_GPL(nfs_fattr_init); struct nfs_fattr *nfs_alloc_fattr(void) { @@ -1038,6 +1044,7 @@ struct nfs_fattr *nfs_alloc_fattr(void) nfs_fattr_init(fattr); return fattr; } +EXPORT_SYMBOL_GPL(nfs_alloc_fattr); struct nfs_fh *nfs_alloc_fhandle(void) { @@ -1048,6 +1055,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) fh->size = 0; return fh; } +EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); #ifdef NFS_DEBUG /* @@ -1168,6 +1176,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1255,6 +1264,7 @@ out_noforce: spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 0f699fefee6e..2a3b170e88e0 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -7,6 +7,7 @@ * NFS namespace */ +#include #include #include #include @@ -255,3 +256,4 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); } +EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 9f502a0c1e59..f5d1cf5f5dc7 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -24,20 +24,6 @@ struct nfs_subversion { int nfs_register_versions(void); void nfs_unregister_versions(void); -#ifdef CONFIG_NFS_V2 -int init_nfs_v2(void); -void exit_nfs_v2(void); -#else /* CONFIG_NFS_V2 */ -static inline int __init init_nfs_v2(void) -{ - return 0; -} - -static inline void exit_nfs_v2(void) -{ -} -#endif /* CONFIG_NFS_V2 */ - #ifdef CONFIG_NFS_V3 int init_nfs_v3(void); void exit_nfs_v3(void); diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index a9fb69d72816..0a9782c9171a 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -14,13 +14,18 @@ static struct nfs_subversion nfs_v2 = { .sops = &nfs_sops, }; -int __init init_nfs_v2(void) +static int __init init_nfs_v2(void) { register_nfs_version(&nfs_v2); return 0; } -void exit_nfs_v2(void) +static void __exit exit_nfs_v2(void) { unregister_nfs_version(&nfs_v2); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v2); +module_exit(exit_nfs_v2); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6267b873bbcb..b000e4c0cf83 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -113,6 +113,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8e0da5a6b3c5..999ce7505142 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -289,6 +289,7 @@ struct file_system_type nfs_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, @@ -312,6 +313,7 @@ const struct super_operations nfs_sops = { .show_stats = nfs_show_stats, .remount_fs = nfs_remount, }; +EXPORT_SYMBOL_GPL(nfs_sops); #ifdef CONFIG_NFS_V4 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); @@ -1670,6 +1672,7 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } +EXPORT_SYMBOL_GPL(nfs_try_mount); /* * Split "dev_name" into "hostname:export_path". diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6ddac54dc67f..1e8d4b043769 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1210,6 +1210,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { @@ -1688,6 +1689,7 @@ int nfs_wb_all(struct inode *inode) return sync_inode(inode, &wbc); } +EXPORT_SYMBOL_GPL(nfs_wb_all); int nfs_wb_page_cancel(struct inode *inode, struct page *page) { -- cgit v1.2.3 From 1c606fb74c758beafd98cbad9a9133eadeec2371 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:24 -0400 Subject: NFS: Convert v3 into a module This patch exports symbols and moves over the final structures needed by the v3 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V3 or CONFIG_NFS_V3_MODULE are set. The module (nfs3.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v3. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 6 ++++-- fs/nfs/client.c | 5 ----- fs/nfs/dir.c | 1 + fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 3 +++ fs/nfs/internal.h | 2 +- fs/nfs/nfs.h | 14 -------------- fs/nfs/nfs3super.c | 9 +++++++-- fs/nfs/super.c | 6 +++--- fs/nfs/write.c | 8 ++++---- 11 files changed, 25 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6764dbf66d05..f81a729c00e9 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -40,7 +40,7 @@ config NFS_V2 If unsure, say Y. config NFS_V3 - bool "NFS client support for NFS version 3" + tristate "NFS client support for NFS version 3" depends on NFS_FS default y help diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index df61db41bfa8..01846edc5c94 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,8 +9,6 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ @@ -27,6 +25,10 @@ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_NFS_V2) += nfs2.o nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V3) += nfs3.o +nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fa8acf510333..8687b6b6edc1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -149,10 +149,6 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version); */ int __init nfs_register_versions(void) { - int err = init_nfs_v3(); - if (err) - return err; - return init_nfs_v4(); } @@ -161,7 +157,6 @@ int __init nfs_register_versions(void) */ void nfs_unregister_versions(void) { - exit_nfs_v3(); exit_nfs_v4(); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c382a6d5e177..55438c970cbf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1981,6 +1981,7 @@ void nfs_access_zap_cache(struct inode *inode) spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); } +EXPORT_SYMBOL_GPL(nfs_access_zap_cache); static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec70..899238156b11 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f358b976e9e6..78dfc3e895ec 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -186,6 +186,7 @@ void nfs_zap_acl_cache(struct inode *inode) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { @@ -847,6 +848,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { @@ -1213,6 +1215,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4174faf73ec1..64f0dc41a9b7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -224,7 +224,7 @@ int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); #endif /* nfs3client.c */ -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index f5d1cf5f5dc7..3e1b84baa57f 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -24,20 +24,6 @@ struct nfs_subversion { int nfs_register_versions(void); void nfs_unregister_versions(void); -#ifdef CONFIG_NFS_V3 -int init_nfs_v3(void); -void exit_nfs_v3(void); -#else /* CONFIG_NFS_V3 */ -static inline int __init init_nfs_v3(void) -{ - return 0; -} - -static inline void exit_nfs_v3(void) -{ -} -#endif /* CONFIG_NFS_V3 */ - #ifdef CONFIG_NFS_V4 int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 8378090b8104..cc471c725230 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -14,13 +14,18 @@ static struct nfs_subversion nfs_v3 = { .sops = &nfs_sops, }; -int __init init_nfs_v3(void) +static int __init init_nfs_v3(void) { register_nfs_version(&nfs_v3); return 0; } -void exit_nfs_v3(void) +static void __exit exit_nfs_v3(void) { unregister_nfs_version(&nfs_v3); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v3); +module_exit(exit_nfs_v3); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 999ce7505142..558a85c9594a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -69,7 +69,7 @@ #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 @@ -1876,7 +1876,7 @@ static int nfs23_validate_mount_data(void *options, return NFS_TEXT_DATA; } -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ @@ -1896,7 +1896,7 @@ out_no_sec: dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); return -EINVAL; -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) out_v3_not_compiled: dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); return -EPROTONOSUPPORT; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1e8d4b043769..f268fe4f2785 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -446,7 +446,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -636,7 +636,7 @@ out: hdr->release(hdr); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -1298,7 +1298,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1358,7 +1358,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; -- cgit v1.2.3 From 89d77c8fa8e6d1cb7e2cce95b428be30ddcc6f23 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:25 -0400 Subject: NFS: Convert v4 into a module This patch exports symbols needed by the v4 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V4 or CONFIG_NFS_V4_MODULE are set. The module (nfs4.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 19 ++++++-------- fs/nfs/callback.h | 2 +- fs/nfs/client.c | 34 +++++++++++-------------- fs/nfs/delegation.h | 2 +- fs/nfs/dir.c | 6 ++++- fs/nfs/direct.c | 2 +- fs/nfs/dns_resolve.c | 4 +++ fs/nfs/file.c | 13 ++++++++++ fs/nfs/inode.c | 64 +++++++++++++++++++++++------------------------ fs/nfs/internal.h | 8 +++--- fs/nfs/namespace.c | 2 ++ fs/nfs/netns.h | 2 +- fs/nfs/nfs.h | 17 ------------- fs/nfs/nfs4_fs.h | 5 ++-- fs/nfs/nfs4super.c | 9 +++++-- fs/nfs/pagelist.c | 4 +++ fs/nfs/pnfs.c | 2 ++ fs/nfs/read.c | 4 +++ fs/nfs/super.c | 41 +++++++++++++++++++++++------- fs/nfs/write.c | 13 +++++++--- include/linux/nfs_fs.h | 6 ++--- include/linux/nfs_fs_sb.h | 6 ++--- include/linux/nfs_idmap.h | 2 +- include/linux/nfs_xdr.h | 2 +- 25 files changed, 155 insertions(+), 116 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f81a729c00e9..195c1ea6151a 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -72,7 +72,7 @@ config NFS_V3_ACL If unsure, say N. config NFS_V4 - bool "NFS client support for NFS version 4" + tristate "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS select KEYS diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 01846edc5c94..8bf3a3f6925a 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,17 +9,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - nfs4super.o nfs4file.o delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o - -ifeq ($(CONFIG_SYSCTL), y) -nfs-y += sysctl.o -nfs-$(CONFIG_NFS_V4) += nfs4sysctl.o -endif - +nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_NFS_V2) += nfs2.o @@ -29,6 +19,13 @@ obj-$(CONFIG_NFS_V3) += nfs3.o nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V4) += nfs4.o +nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ + delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs4getroot.o nfs4client.o +nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a5527c90a5aa..b44d7b128b71 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -192,7 +192,7 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_process_state *cps); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, struct cb_process_state *cps); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8687b6b6edc1..9fc0d9dfc91b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -142,24 +142,6 @@ void unregister_nfs_version(struct nfs_subversion *nfs) } EXPORT_SYMBOL_GPL(unregister_nfs_version); -/* - * Preload all configured NFS versions during module init. - * This function should be edited after each protocol is converted, - * and eventually removed. - */ -int __init nfs_register_versions(void) -{ - return init_nfs_v4(); -} - -/* - * Remove each pre-loaded NFS version - */ -void nfs_unregister_versions(void) -{ - exit_nfs_v4(); -} - /* * Allocate a shared client record * @@ -214,7 +196,7 @@ error_0: } EXPORT_SYMBOL_GPL(nfs_alloc_client); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -390,6 +372,7 @@ int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr); #endif /* CONFIG_NFS_V4_1 */ /* @@ -456,6 +439,7 @@ int nfs_wait_client_init_complete(const struct nfs_client *clp) return wait_event_killable(nfs_client_active_wq, nfs_client_init_is_complete(clp)); } +EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete); /* * Found an existing client. Make sure it's ready before returning. @@ -530,6 +514,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, cl_init->hostname ?: "", PTR_ERR(new)); return new; } +EXPORT_SYMBOL_GPL(nfs_get_client); /* * Mark a server as ready or failed @@ -540,6 +525,7 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } +EXPORT_SYMBOL_GPL(nfs_mark_client_ready); /* * Initialise the timeout values for a connection @@ -581,6 +567,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, BUG(); } } +EXPORT_SYMBOL_GPL(nfs_init_timeout_values); /* * Create an RPC client handle @@ -620,6 +607,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, clp->cl_rpcclient = clnt; return 0; } +EXPORT_SYMBOL_GPL(nfs_create_rpc_client); /* * Version 2 or 3 client destruction @@ -706,6 +694,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server, return 0; } +EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); /** * nfs_init_client - Initialise an NFS2 or NFS3 client @@ -932,6 +921,7 @@ out_error: dprintk("nfs_probe_fsinfo: error = %d\n", -error); return error; } +EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); /* * Copy useful information when duplicating a server record @@ -948,6 +938,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; } +EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); void nfs_server_insert_lists(struct nfs_server *server) { @@ -961,6 +952,7 @@ void nfs_server_insert_lists(struct nfs_server *server) spin_unlock(&nn->nfs_client_lock); } +EXPORT_SYMBOL_GPL(nfs_server_insert_lists); static void nfs_server_remove_lists(struct nfs_server *server) { @@ -1020,6 +1012,7 @@ struct nfs_server *nfs_alloc_server(void) return server; } +EXPORT_SYMBOL_GPL(nfs_alloc_server); /* * Free up a server record @@ -1048,6 +1041,7 @@ void nfs_free_server(struct nfs_server *server) nfs_release_automount_timer(); dprintk("<-- nfs_free_server()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_server); /* * Create a version 2 or 3 volume record @@ -1193,7 +1187,7 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1f3ccd934635..bbc6a4dba0d8 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -8,7 +8,7 @@ #ifndef FS_NFS_DELEGATION_H #define FS_NFS_DELEGATION_H -#if defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V4) /* * NFSv4 delegation */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 55438c970cbf..627f108ede23 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -936,6 +936,7 @@ void nfs_force_lookup_revalidate(struct inode *dir) { NFS_I(dir)->cache_change_attribute++; } +EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); /* * A check for whether or not the parent directory has changed. @@ -1267,7 +1268,7 @@ out: } EXPORT_SYMBOL_GPL(nfs_lookup); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { @@ -1277,6 +1278,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs4_dentry_operations); static fmode_t flags_to_mode(int flags) { @@ -1419,6 +1421,7 @@ no_open: return finish_no_open(file, res); } +EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { @@ -2142,6 +2145,7 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } +EXPORT_SYMBOL_GPL(nfs_may_open); int nfs_permission(struct inode *inode, int mask) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 899238156b11..b7b4f80968b5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index b3924b8a6000..31c26c4dcc23 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -8,6 +8,7 @@ #ifdef CONFIG_NFS_USE_KERNEL_DNS +#include #include #include #include "dns_resolve.h" @@ -27,9 +28,11 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, kfree(ip_addr); return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #else +#include #include #include #include @@ -345,6 +348,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = -ESRCH; return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); int nfs_dns_resolver_cache_init(struct net *net) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5b3e70389553..1557978ca7b3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,6 +53,7 @@ int nfs_check_flags(int flags) return 0; } +EXPORT_SYMBOL_GPL(nfs_check_flags); /* * Open file @@ -85,6 +86,7 @@ nfs_file_release(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } +EXPORT_SYMBOL_GPL(nfs_file_release); /** * nfs_revalidate_size - Revalidate the file size @@ -138,6 +140,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return generic_file_llseek(filp, offset, origin); } +EXPORT_SYMBOL_GPL(nfs_file_llseek); /* * Flush all dirty pages, and check for write errors. @@ -166,6 +169,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } +EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, @@ -190,6 +194,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, } return result; } +EXPORT_SYMBOL_GPL(nfs_file_read); ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, @@ -212,6 +217,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, } return res; } +EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -233,6 +239,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) } return status; } +EXPORT_SYMBOL_GPL(nfs_file_mmap); /* * Flush any dirty pages for this process, and check for write errors. @@ -271,6 +278,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) ret = status; return ret; } +EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); static int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -615,6 +623,7 @@ out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); goto out; } +EXPORT_SYMBOL_GPL(nfs_file_write); ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, @@ -646,6 +655,7 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_splice_write); static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) @@ -806,6 +816,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) out_err: return ret; } +EXPORT_SYMBOL_GPL(nfs_lock); /* * Lock a (portion of) a file @@ -835,6 +846,7 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } +EXPORT_SYMBOL_GPL(nfs_flock); /* * There is no protocol support for leases, so we have no way to implement @@ -847,6 +859,7 @@ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) file->f_path.dentry->d_name.name, arg); return -EINVAL; } +EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 78dfc3e895ec..2ed6138f32ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -82,6 +82,7 @@ int nfs_wait_bit_killable(void *word) freezable_schedule(); return 0; } +EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** * nfs_compat_user_ino64 - returns the user-visible inode number @@ -117,6 +118,7 @@ void nfs_clear_inode(struct inode *inode) nfs_access_zap_cache(inode); nfs_fscache_release_inode_cookie(inode); } +EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { @@ -393,6 +395,7 @@ out_no_inode: dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } +EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) @@ -655,6 +658,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mdsthreshold = NULL; return ctx; } +EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { @@ -662,6 +666,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) atomic_inc(&ctx->lock_context.count); return ctx; } +EXPORT_SYMBOL_GPL(get_nfs_open_context); static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { @@ -689,6 +694,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 0); } +EXPORT_SYMBOL_GPL(put_nfs_open_context); /* * Ensure that mmap has a recent RPC credential for use when writing out @@ -704,6 +710,7 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics @@ -1497,11 +1504,12 @@ struct inode *nfs_alloc_inode(struct super_block *sb) nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); #endif -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } +EXPORT_SYMBOL_GPL(nfs_alloc_inode); static void nfs_i_callback(struct rcu_head *head) { @@ -1513,10 +1521,11 @@ void nfs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, nfs_i_callback); } +EXPORT_SYMBOL_GPL(nfs_destroy_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) INIT_LIST_HEAD(&nfsi->open_states); nfsi->delegation = NULL; nfsi->delegation_state = 0; @@ -1562,6 +1571,7 @@ static void nfs_destroy_inodecache(void) } struct workqueue_struct *nfsiod_workqueue; +EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* * start up the nfsiod workqueue @@ -1622,90 +1632,80 @@ static int __init init_nfs_fs(void) err = nfs_dns_resolver_init(); if (err < 0) - goto out11; + goto out10;; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out10; + goto out9; err = nfs_fscache_register(); if (err < 0) - goto out9; + goto out8; err = nfsiod_start(); if (err) - goto out8; + goto out7; err = nfs_fs_proc_init(); if (err) - goto out7; + goto out6; err = nfs_init_nfspagecache(); if (err) - goto out6; + goto out5; err = nfs_init_inodecache(); if (err) - goto out5; + goto out4; err = nfs_init_readpagecache(); if (err) - goto out4; + goto out3; err = nfs_init_writepagecache(); if (err) - goto out3; + goto out2; err = nfs_init_directcache(); if (err) - goto out2; + goto out1; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif - - err = nfs_register_versions(); - if (err) - goto out1; - if ((err = register_nfs_fs()) != 0) goto out0; return 0; out0: - nfs_unregister_versions(); -out1: #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out2: +out1: nfs_destroy_writepagecache(); -out3: +out2: nfs_destroy_readpagecache(); -out4: +out3: nfs_destroy_inodecache(); -out5: +out4: nfs_destroy_nfspagecache(); -out6: +out5: nfs_fs_proc_exit(); -out7: +out6: nfsiod_stop(); -out8: +out7: nfs_fscache_unregister(); -out9: +out8: unregister_pernet_subsys(&nfs_net_ops); -out10: +out9: nfs_dns_resolver_destroy(); -out11: +out10: return err; } static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); -#endif nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 64f0dc41a9b7..8865538b26b6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -262,7 +262,7 @@ extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs4xdr.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); #endif @@ -272,7 +272,7 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct rpc_procinfo nfs4_procedures[]; #endif @@ -328,7 +328,7 @@ extern int nfs_wait_bit_killable(void *word); extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif @@ -364,7 +364,7 @@ struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, const char *); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2a3b170e88e0..655925373b91 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -113,6 +113,7 @@ Elong_unlock: Elong: return ERR_PTR(-ENAMETOOLONG); } +EXPORT_SYMBOL_GPL(nfs_path); /* * nfs_d_automount - Handle crossing a mountpoint on the server @@ -241,6 +242,7 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } +EXPORT_SYMBOL_GPL(nfs_do_submount); struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8a6394edb8b0..0539de1b8d1f 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -20,7 +20,7 @@ struct nfs_net { wait_queue_head_t bl_wq; struct list_head nfs_client_list; struct list_head nfs_volume_list; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif spinlock_t nfs_client_lock; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 3e1b84baa57f..43679df56cd0 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -21,23 +21,6 @@ struct nfs_subversion { struct list_head list; /* List of NFS versions */ }; -int nfs_register_versions(void); -void nfs_unregister_versions(void); - -#ifdef CONFIG_NFS_V4 -int init_nfs_v4(void); -void exit_nfs_v4(void); -#else /* CONFIG_NFS_V4 */ -static inline int __init init_nfs_v4(void) -{ - return 0; -} - -static inline void exit_nfs_v4(void) -{ -} -#endif /* CONFIG_NFS_V4 */ - struct nfs_subversion *get_nfs_version(unsigned int); void put_nfs_version(struct nfs_subversion *); void register_nfs_version(struct nfs_subversion *); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index bafe5186c9cd..3b950dd81e81 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,7 +9,7 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idmap; @@ -365,11 +365,10 @@ extern const nfs4_stateid zero_stateid; struct nfs_mount_info; extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -int init_nfs_v4(void); -void exit_nfs_v4(void); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short send_implementation_id; + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1c825f3bef51..12a31a9dbcdd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -332,7 +332,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, } -int __init init_nfs_v4(void) +static int __init init_nfs_v4(void) { int err; @@ -358,10 +358,15 @@ out: return err; } -void exit_nfs_v4(void) +static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v4); +module_exit(exit_nfs_v4); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f4..1e7d8879dae6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -54,6 +54,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); } +EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; } +EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_can_coalesce_requests - test two requests for compatibility @@ -409,6 +411,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_pageio_add_request); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -424,6 +427,7 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } +EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7fbd25afe418..76875bfcf19c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1407,6 +1407,7 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) @@ -1561,6 +1562,7 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b000e4c0cf83..6935e401ad76 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -48,6 +48,7 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } return rhdr; } +EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -80,6 +81,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) kmem_cache_free(nfs_rdata_cachep, rhdr); } +EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_read_data *rdata) { @@ -96,6 +98,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_readdata_release); static int nfs_return_empty_page(struct page *page) @@ -398,6 +401,7 @@ int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, return nfs_pagein_multi(desc, hdr); return nfs_pagein_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 558a85c9594a..ac6a3c55dce4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -315,7 +315,7 @@ const struct super_operations nfs_sops = { }; EXPORT_SYMBOL_GPL(nfs_sops); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); @@ -366,6 +366,7 @@ void nfs_sb_active(struct super_block *sb) if (atomic_inc_return(&server->active) == 1) atomic_inc(&sb->s_active); } +EXPORT_SYMBOL_GPL(nfs_sb_active); void nfs_sb_deactive(struct super_block *sb) { @@ -374,6 +375,7 @@ void nfs_sb_deactive(struct super_block *sb) if (atomic_dec_and_test(&server->active)) deactivate_super(sb); } +EXPORT_SYMBOL_GPL(nfs_sb_deactive); /* * Deliver file system statistics to userspace @@ -439,6 +441,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) dprintk("%s: statfs error = %d\n", __func__, -error); return error; } +EXPORT_SYMBOL_GPL(nfs_statfs); /* * Map the security flavour number to a name @@ -544,7 +547,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { @@ -675,8 +678,9 @@ int nfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_options); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { @@ -709,7 +713,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } } #else -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void show_pnfs(struct seq_file *m, struct nfs_server *server) { } @@ -734,12 +738,14 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root) free_page((unsigned long)page); return err; } +EXPORT_SYMBOL_GPL(nfs_show_devname); int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; } +EXPORT_SYMBOL_GPL(nfs_show_path); /* * Present statistical information for this VFS mountpoint @@ -774,7 +780,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); @@ -832,6 +838,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_stats); /* * Begin unmount by attempting to remove all automounted mountpoints we added @@ -851,6 +858,7 @@ void nfs_umount_begin(struct super_block *sb) if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } +EXPORT_SYMBOL_GPL(nfs_umount_begin); static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) { @@ -1915,7 +1923,7 @@ out_invalid_fh: return -EINVAL; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, struct nfs_parsed_mount_data *args, @@ -1953,7 +1961,7 @@ static int nfs_validate_text_mount_data(void *options, goto out_no_address; if (args->version == 4) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; @@ -1976,7 +1984,7 @@ static int nfs_validate_text_mount_data(void *options, &args->nfs_server.export_path, max_pathlen); -#ifndef CONFIG_NFS_V4 +#if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -2075,6 +2083,7 @@ out: kfree(data); return error; } +EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock @@ -2123,6 +2132,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) nfs_initialise_sb(sb); } +EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock @@ -2292,6 +2302,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } +EXPORT_SYMBOL_GPL(nfs_set_sb_security); int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) @@ -2302,6 +2313,7 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return -ESTALE; return 0; } +EXPORT_SYMBOL_GPL(nfs_clone_sb_security); struct dentry *nfs_fs_mount_common(struct nfs_server *server, int flags, const char *dev_name, @@ -2375,6 +2387,7 @@ error_splat_bdi: deactivate_locked_super(s); goto out; } +EXPORT_SYMBOL_GPL(nfs_fs_mount_common); struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) @@ -2415,6 +2428,7 @@ out: nfs_free_fhandle(mount_info.mntfh); return mntroot; } +EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Ensure that we unregister the bdi before kill_anon_super @@ -2426,6 +2440,7 @@ void nfs_put_super(struct super_block *s) bdi_unregister(&server->backing_dev_info); } +EXPORT_SYMBOL_GPL(nfs_put_super); /* * Destroy an NFS2/3 superblock @@ -2438,6 +2453,7 @@ void nfs_kill_super(struct super_block *s) nfs_fscache_release_super_cookie(s); nfs_free_server(server); } +EXPORT_SYMBOL_GPL(nfs_kill_super); /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) @@ -2478,7 +2494,7 @@ out_err: goto out; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { @@ -2590,6 +2606,13 @@ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; +EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); +EXPORT_SYMBOL_GPL(nfs_callback_tcpport); +EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); +EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); +EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(send_implementation_id); + #define NFS_CALLBACK_MAXPORTNR (65535U) static int param_set_portnr(const char *val, const struct kernel_param *kp) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f268fe4f2785..e4a2ad2059bd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -84,6 +84,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } return p; } +EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -115,6 +116,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); mempool_free(whdr, nfs_wdata_mempool); } +EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_write_data *wdata) { @@ -131,6 +133,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_writedata_release); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -446,7 +449,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -636,7 +639,7 @@ out: hdr->release(hdr); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -1173,6 +1176,7 @@ int nfs_generic_flush(struct nfs_pageio_descriptor *desc, return nfs_flush_multi(desc, hdr); return nfs_flush_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { @@ -1298,7 +1302,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1358,7 +1362,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1674,6 +1678,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { return nfs_commit_unstable_pages(inode, wbc); } +EXPORT_SYMBOL_GPL(nfs_write_inode); /* * flush the inode to disk. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4b6043c20f77..2889877318bc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -191,7 +191,7 @@ struct nfs_inode { struct hlist_head silly_list; wait_queue_head_t waitqueue; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; @@ -428,7 +428,7 @@ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ * linux/fs/nfs/file.c */ extern const struct file_operations nfs_file_operations; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern const struct file_operations nfs4_file_operations; #endif /* CONFIG_NFS_V4 */ extern const struct address_space_operations nfs_file_aops; @@ -538,7 +538,7 @@ extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6039297801f4..310c63c8ab2c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -53,7 +53,7 @@ struct nfs_client { u32 cl_minorversion;/* NFSv4 minorversion */ struct rpc_cred *cl_machine_cred; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; /* Clientid verifier */ unsigned long cl_state; @@ -138,7 +138,7 @@ struct nfs_server { #endif u32 pnfs_blksize; /* layout_blksize attr */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set of attributes supported on this filesystem */ @@ -201,7 +201,7 @@ struct nfs_server { #define NFS4_MAX_SLOT_TABLE (256U) #define NFS4_NO_SLOT ((u32)-1) -#if defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V4) /* Sessions */ #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 7eed2012d288..ece91c57ad79 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -69,7 +69,7 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) int nfs_idmap_init(void); void nfs_idmap_quit(void); #else diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 631182062994..00485e084394 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -824,7 +824,7 @@ struct nfs3_getaclres { struct posix_acl * acl_default; }; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) typedef u64 clientid4; -- cgit v1.2.3 From 1fe60e51a3744528f3939b1b1167ca909133d9ae Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Jul 2012 16:23:22 -0700 Subject: libceph: move feature bits to separate header This is simply cleanup that will keep things more closely synced with the userland code. Signed-off-by: Sage Weil Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- fs/ceph/mds_client.c | 1 + fs/ceph/super.c | 1 + include/linux/ceph/ceph_features.h | 24 ++++++++++++++++++++++++ include/linux/ceph/ceph_fs.h | 14 -------------- include/linux/ceph/libceph.h | 6 ------ net/ceph/ceph_common.c | 5 +++-- 6 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 include/linux/ceph/ceph_features.h (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 418f6a82c90d..39b76d66bc5d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -10,6 +10,7 @@ #include "super.h" #include "mds_client.h" +#include #include #include #include diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7305a4..2c47ecfe4373 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -18,6 +18,7 @@ #include "super.h" #include "mds_client.h" +#include #include #include #include diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h new file mode 100644 index 000000000000..342f93dbe162 --- /dev/null +++ b/include/linux/ceph/ceph_features.h @@ -0,0 +1,24 @@ +#ifndef __CEPH_FEATURES +#define __CEPH_FEATURES + +/* + * feature bits + */ +#define CEPH_FEATURE_UID (1<<0) +#define CEPH_FEATURE_NOSRCADDR (1<<1) +#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) +#define CEPH_FEATURE_FLOCK (1<<3) +#define CEPH_FEATURE_SUBSCRIBE2 (1<<4) +#define CEPH_FEATURE_MONNAMES (1<<5) +#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) +#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) + +/* + * Features supported. + */ +#define CEPH_FEATURES_SUPPORTED_DEFAULT \ + (CEPH_FEATURE_NOSRCADDR) + +#define CEPH_FEATURES_REQUIRED_DEFAULT \ + (CEPH_FEATURE_NOSRCADDR) +#endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index e81ab30d4896..d021610efd65 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -35,20 +35,6 @@ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 - -/* - * feature bits - */ -#define CEPH_FEATURE_UID (1<<0) -#define CEPH_FEATURE_NOSRCADDR (1<<1) -#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) -#define CEPH_FEATURE_FLOCK (1<<3) -#define CEPH_FEATURE_SUBSCRIBE2 (1<<4) -#define CEPH_FEATURE_MONNAMES (1<<5) -#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) -#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) - - /* * ceph_file_layout - describe data layout for a file/inode */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 98ec36ae8a3b..ea072e1f9db9 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -22,12 +22,6 @@ #include "osd_client.h" #include "ceph_fs.h" -/* - * Supported features - */ -#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR -#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR - /* * mount options */ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 3b45e01fa8d1..69e38db28e5f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -460,9 +461,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, client->auth_err = 0; client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | + client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | supported_features; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | + client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT | required_features; /* msgr */ -- cgit v1.2.3 From 32b4560b04af6e4fee241ea6de6db780eaf354f2 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 30 Jul 2012 14:39:10 -0700 Subject: ntfs: remove references to long gone super operations and unimplemented methods ->delete_inode(), ->write_super_lockfs(), ->unlockfs() are gone so remove refereces to them in the NTFS code. Remove unnecessary comments about unimplemented methods while at it (suggested by Christoph Hellwig). Noticed while cleaning up the fsfreeze mess. Signed-off-by: Fernando Luis Vazquez Cao Cc: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/super.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index b341492542ca..2bc149d6a784 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2660,31 +2660,14 @@ static const struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ #ifdef NTFS_RW - //.dirty_inode = NULL, /* VFS: Called from - // __mark_inode_dirty(). */ .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to disk. */ - //.drop_inode = NULL, /* VFS: Called just after the - // inode reference count has - // been decreased to zero. - // NOTE: The inode lock is - // held. See fs/inode.c:: - // generic_drop_inode(). */ - //.delete_inode = NULL, /* VFS: Delete inode from disk. - // Called when i_count becomes - // 0 and i_nlink is also 0. */ - //.write_super = NULL, /* Flush dirty super block to - // disk. */ - //.sync_fs = NULL, /* ? */ - //.write_super_lockfs = NULL, /* ? */ - //.unlockfs = NULL, /* ? */ #endif /* NTFS_RW */ .put_super = ntfs_put_super, /* Syscall: umount. */ .statfs = ntfs_statfs, /* Syscall: statfs */ .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is removed from memory. */ - //.umount_begin = NULL, /* Forced umount. */ .show_options = ntfs_show_options, /* Show mount options in proc. */ }; -- cgit v1.2.3 From 779302e67835fe9a6b74327e54969ba59cb3478a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 30 Jul 2012 14:39:13 -0700 Subject: fs/xattr.c:getxattr(): improve handling of allocation failures This allocation can be as large as 64k. - Add __GFP_NOWARN so the falied kmalloc() is silent - Fall back to vmalloc() if the kmalloc() failed Signed-off-by: Sasha Levin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 1d7ac3790458..4d45b7189e7e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -427,6 +427,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, { ssize_t error; void *kvalue = NULL; + void *vvalue = NULL; char kname[XATTR_NAME_MAX + 1]; error = strncpy_from_user(kname, name, sizeof(kname)); @@ -438,9 +439,13 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (size) { if (size > XATTR_SIZE_MAX) size = XATTR_SIZE_MAX; - kvalue = kzalloc(size, GFP_KERNEL); - if (!kvalue) - return -ENOMEM; + kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!kvalue) { + vvalue = vmalloc(size); + if (!vvalue) + return -ENOMEM; + kvalue = vvalue; + } } error = vfs_getxattr(d, kname, kvalue, size); @@ -452,7 +457,10 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } - kfree(kvalue); + if (vvalue) + vfree(vvalue); + else + kfree(kvalue); return error; } -- cgit v1.2.3 From 9520628e8ceb69fa9a4aee6b57f22675d9e1b709 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:15 -0700 Subject: fs: make dumpable=2 require fully qualified path When the suid_dumpable sysctl is set to "2", and there is no core dump pipe defined in the core_pattern sysctl, a local user can cause core files to be written to root-writable directories, potentially with user-controlled content. This means an admin can unknowningly reintroduce a variation of CVE-2006-2451, allowing local users to gain root privileges. $ cat /proc/sys/fs/suid_dumpable 2 $ cat /proc/sys/kernel/core_pattern core $ ulimit -c unlimited $ cd / $ ls -l core ls: cannot access core: No such file or directory $ touch core touch: cannot touch `core': Permission denied $ OHAI="evil-string-here" ping localhost >/dev/null 2>&1 & $ pid=$! $ sleep 1 $ kill -SEGV $pid $ ls -l core -rw------- 1 root kees 458752 Jun 21 11:35 core $ sudo strings core | grep evil OHAI=evil-string-here While cron has been fixed to abort reading a file when there is any parse error, there are still other sensitive directories that will read any file present and skip unparsable lines. Instead of introducing a suid_dumpable=3 mode and breaking all users of mode 2, this only disables the unsafe portion of mode 2 (writing to disk via relative path). Most users of mode 2 (e.g. Chrome OS) already use a core dump pipe handler, so this change will not break them. For the situations where a pipe handler is not defined but mode 2 is still active, crash dumps will only be written to fully qualified paths. If a relative path is defined (e.g. the default "core" pattern), dump attempts will trigger a printk yelling about the lack of a fully qualified path. Signed-off-by: Kees Cook Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 18 ++++++++++++------ fs/exec.c | 17 ++++++++++++++--- 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 13d6166d7a27..8c235b6e4246 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -163,16 +163,22 @@ This value can be used to query and set the core dump mode for setuid or otherwise protected/tainted binaries. The modes are 0 - (default) - traditional behaviour. Any process which has changed - privilege levels or is execute only will not be dumped + privilege levels or is execute only will not be dumped. 1 - (debug) - all processes dump core when possible. The core dump is owned by the current user and no security is applied. This is intended for system debugging situations only. Ptrace is unchecked. + This is insecure as it allows regular users to examine the memory + contents of privileged processes. 2 - (suidsafe) - any binary which normally would not be dumped is dumped - readable by root only. This allows the end user to remove - such a dump but not access it directly. For security reasons - core dumps in this mode will not overwrite one another or - other files. This mode is appropriate when administrators are - attempting to debug problems in a normal environment. + anyway, but only if the "core_pattern" kernel sysctl is set to + either a pipe handler or a fully qualified path. (For more details + on this limitation, see CVE-2006-2451.) This mode is appropriate + when administrators are attempting to debug problems in a normal + environment, and either have a core dump pipe handler that knows + to treat privileged core dumps with care, or specific directory + defined for catching core dumps. If a core dump happens without + a pipe handler or fully qualifid path, a message will be emitted + to syslog warning about the lack of a correct setting. ============================================================== diff --git a/fs/exec.c b/fs/exec.c index e95aeeddd25c..95aae3f9c036 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2111,6 +2111,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe; + bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .signr = signr, @@ -2136,14 +2137,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (!cred) goto fail; /* - * We cannot trust fsuid as being the "true" uid of the - * process nor do we know its entire history. We only know it - * was tainted so we dump it as root in mode 2. + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). */ if (__get_dumpable(cprm.mm_flags) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + need_nonrelative = true; } retval = coredump_wait(exit_code, &core_state); @@ -2223,6 +2226,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; + if (need_nonrelative && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + cprm.file = filp_open(cn.corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); -- cgit v1.2.3 From 54b501992dd2a839e94e76aa392c392b55080ce8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:18 -0700 Subject: coredump: warn about unsafe suid_dumpable / core_pattern combo When suid_dumpable=2, detect unsafe core_pattern settings and warn when they are seen. Signed-off-by: Kees Cook Suggested-by: Andrew Morton Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- include/linux/sched.h | 5 +++++ kernel/sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 95aae3f9c036..5af8390e0fae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2002,17 +2002,17 @@ static void coredump_finish(struct mm_struct *mm) void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case 0: + case SUID_DUMPABLE_DISABLED: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 1: + case SUID_DUMPABLE_ENABLED: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 2: + case SUID_DUMPABLE_SAFE: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -2025,7 +2025,7 @@ static int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret >= 2) ? 2 : ret; + return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; } int get_dumpable(struct mm_struct *mm) @@ -2142,7 +2142,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == 2) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a721cef7e2d4..1e26a5e45aa6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -406,6 +406,11 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); +/* get/set_dumpable() values */ +#define SUID_DUMPABLE_DISABLED 0 +#define SUID_DUMPABLE_ENABLED 1 +#define SUID_DUMPABLE_SAFE 2 + /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb4..b46f496405e4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -174,6 +174,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; @@ -410,7 +415,7 @@ static struct ctl_table kern_table[] = { .data = core_pattern, .maxlen = CORENAME_MAX_SIZE, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_dostring_coredump, }, { .procname = "core_pipe_limit", @@ -1498,7 +1503,7 @@ static struct ctl_table fs_table[] = { .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_coredump, .extra1 = &zero, .extra2 = &two, }, @@ -2009,6 +2014,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +static void validate_coredump_safety(void) +{ + if (suid_dumpable == SUID_DUMPABLE_SAFE && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING "Unsafe core_pattern used with "\ + "suid_dumpable=2. Pipe handler or fully qualified "\ + "core dump path required.\n"); + } +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, -- cgit v1.2.3 From 533574c6bc30cf526cc1c41bde050c854a945efb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 30 Jul 2012 14:40:13 -0700 Subject: btrfs: use printk_get_level and printk_skip_level, add __printf, fix fallout Use the generic printk_get_level() to search a message for a kern_level. Add __printf to verify format and arguments. Fix a few messages that had mismatches in format and arguments. Add #ifdef CONFIG_PRINTK blocks to shrink the object size a bit when not using printk. [akpm@linux-foundation.org: whitespace tweak] Signed-off-by: Joe Perches Cc: Kay Sievers Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/ctree.h | 13 +++++++++++++ fs/btrfs/disk-io.c | 2 +- fs/btrfs/relocation.c | 2 +- fs/btrfs/super.c | 41 +++++++++++++++++++++++++++++++++++------ 4 files changed, 50 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index adb1cd7ceb9b..4bab807227ad 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3342,10 +3342,22 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); + +#ifdef CONFIG_PRINTK +__printf(2, 3) void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...); +#else +static inline __printf(2, 3) +void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) +{ +} +#endif + +__printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); + void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno); @@ -3386,6 +3398,7 @@ do { \ (errno), fmt, ##args); \ } while (0) +__printf(5, 6) void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 502b20c56e84..fadeba6a5db9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1114,7 +1114,7 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, spin_unlock(&root->fs_info->delalloc_lock); btrfs_panic(root->fs_info, -EOVERFLOW, "Can't clear %lu bytes from " - " dirty_mdatadata_bytes (%lu)", + " dirty_mdatadata_bytes (%llu)", buf->len, root->fs_info->dirty_metadata_bytes); } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c5dbd9149679..4da08652004d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1241,7 +1241,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (rb_node) { btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " - "tree\n"); + "tree\n", node->bytenr); kfree(node); return -EEXIST; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fa61ef59cd61..8c6e61d6eed5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -125,6 +125,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) } } +#ifdef CONFIG_PRINTK /* * __btrfs_std_error decodes expected errors from the caller and * invokes the approciate error response. @@ -167,7 +168,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, va_end(args); } -const char *logtypes[] = { +static const char * const logtypes[] = { "emergency", "alert", "critical", @@ -185,22 +186,50 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) struct va_format vaf; va_list args; const char *type = logtypes[4]; + int kern_level; va_start(args, fmt); - if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { - memcpy(lvl, fmt, 3); - lvl[3] = '\0'; - fmt += 3; - type = logtypes[fmt[1] - '0']; + kern_level = printk_get_level(fmt); + if (kern_level) { + size_t size = printk_skip_level(fmt) - fmt; + memcpy(lvl, fmt, size); + lvl[size] = '\0'; + fmt += size; + type = logtypes[kern_level - '0']; } else *lvl = '\0'; vaf.fmt = fmt; vaf.va = &args; + printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf); + + va_end(args); } +#else + +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno, const char *fmt, ...) +{ + struct super_block *sb = fs_info->sb; + + /* + * Special case: if the error is EROFS, and we're already + * under MS_RDONLY, then it is safe here. + */ + if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) + return; + + /* Don't go through full error handling during mount */ + if (sb->s_flags & MS_BORN) { + save_error_info(fs_info); + btrfs_handle_error(fs_info); + } +} +#endif + /* * We only mark the transaction aborted and then set the file system read-only. * This will prevent new transactions from starting or trying to join this -- cgit v1.2.3 From 9b58f6d4aaef070bf6e0744713b6d2b6fc1b3578 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:40:57 -0700 Subject: qnx4fs: use memweight() Use memweight() to count the total number of bits clear in memory area. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. Signed-off-by: Akinobu Mita Acked-by: Anders Larsen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/qnx4/bitmap.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 22e0d60e53ef..76a7a697b778 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -17,23 +17,6 @@ #include #include "qnx4.h" -static void count_bits(register const char *bmPart, register int size, - int *const tf) -{ - char b; - int tot = *tf; - - if (size > QNX4_BLOCK_SIZE) { - size = QNX4_BLOCK_SIZE; - } - do { - b = *bmPart++; - tot += 8 - hweight8(b); - size--; - } while (size != 0); - *tf = tot; -} - unsigned long qnx4_count_free_blocks(struct super_block *sb) { int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; @@ -44,13 +27,16 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb) struct buffer_head *bh; while (total < size) { + int bytes = min(size - total, QNX4_BLOCK_SIZE); + if ((bh = sb_bread(sb, start + offset)) == NULL) { printk(KERN_ERR "qnx4: I/O error in counting free blocks\n"); break; } - count_bits(bh->b_data, size - total, &total_free); + total_free += bytes * BITS_PER_BYTE - + memweight(bh->b_data, bytes); brelse(bh); - total += QNX4_BLOCK_SIZE; + total += bytes; offset++; } -- cgit v1.2.3 From 0121ad62c20ed779e38ad689071da2805f03249f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:00 -0700 Subject: affs: use memweight() Use memweight() to count the total number of bits set in memory area. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/bitmap.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 6e0be43ef6ef..a32246b8359e 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -10,30 +10,6 @@ #include #include "affs.h" -/* This is, of course, shamelessly stolen from fs/minix */ - -static const int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; - -static u32 -affs_count_free_bits(u32 blocksize, const void *data) -{ - const u32 *map; - u32 free; - u32 tmp; - - map = data; - free = 0; - for (blocksize /= 4; blocksize > 0; blocksize--) { - tmp = *map++; - while (tmp) { - free += nibblemap[tmp & 0xf]; - tmp >>= 4; - } - } - - return free; -} - u32 affs_count_free_blocks(struct super_block *sb) { @@ -317,7 +293,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) goto out; } pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key); - bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4); + bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); /* Don't try read the extension if this is the last block, * but we also need the right bm pointer below @@ -367,7 +343,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) /* recalculate bitmap count for last block */ bm--; - bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4); + bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); out: affs_brelse(bh); -- cgit v1.2.3 From a75613ec73ec87726a81fe421385a13c25fdcfc4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:03 -0700 Subject: ocfs2: use memweight() Use memweight to count the total number of bits set in memory area. Signed-off-by: Akinobu Mita Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/localalloc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 210c35237548..a9f78c74d687 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -784,14 +784,10 @@ bail: static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) { - int i; - u8 *buffer; - u32 count = 0; + u32 count; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); - buffer = la->la_bitmap; - for (i = 0; i < le16_to_cpu(la->la_size); i++) - count += hweight8(buffer[i]); + count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); trace_ocfs2_local_alloc_count_bits(count); return count; -- cgit v1.2.3 From ecd0afa3ced0ebf36901b53fd9ee431f8a34a161 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:05 -0700 Subject: ext2: use memweight() Convert ext2_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT2FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following changes. - Remove unnecessary map == NULL check in ext2_count_free() which always takes non-null pointer as the memory area. - Fix printk format warning that only reveals with EXT2FS_DEBUG. Signed-off-by: Akinobu Mita Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/balloc.c | 14 ++------------ fs/ext2/ialloc.c | 1 + 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1c3613998862..376aa77f3ca7 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1444,19 +1444,9 @@ ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp) #ifdef EXT2FS_DEBUG -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - -unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars) { - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); } #endif /* EXT2FS_DEBUG */ diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c13eb7b91a11..8f370e012e61 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -644,6 +644,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) } brelse(bitmap_bh); printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", + (unsigned long) percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), desc_count, bitmap_count); return desc_count; -- cgit v1.2.3 From 10d470849a7c6dd360e8ad4770160ad7af9adb4b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:06 -0700 Subject: ext3: use memweight() Convert ext3_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT3FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following changes. - Remove unnecessary map == NULL check in ext3_count_free() which always takes non-null pointer as the memory area. - Fix printk format warning that only reveals with EXT3FS_DEBUG. Signed-off-by: Akinobu Mita Acked-by: Jan Kara Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/balloc.c | 2 +- fs/ext3/bitmap.c | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 25cd60892116..90d901f0486b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1813,7 +1813,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) brelse(bitmap_bh); printk("ext3_count_free_blocks: stored = "E3FSBLK ", computed = "E3FSBLK", "E3FSBLK"\n", - le32_to_cpu(es->s_free_blocks_count), + (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); return bitmap_count; #else diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c index 909d13e26560..ef9c643e8e9d 100644 --- a/fs/ext3/bitmap.c +++ b/fs/ext3/bitmap.c @@ -11,19 +11,9 @@ #ifdef EXT3FS_DEBUG -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) { - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); } #endif /* EXT3FS_DEBUG */ -- cgit v1.2.3 From 6017b485caeae5915956190b4f3d8307021e785d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:08 -0700 Subject: ext4: use memweight() Convert ext4_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT4FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following change. - Remove unnecessary map == NULL check in ext4_count_free() which always takes non-null pointer as the memory area. Signed-off-by: Akinobu Mita Cc: "Theodore Ts'o" Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/bitmap.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index a94b9c63ee5c..f8716eab9995 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -11,16 +11,9 @@ #include #include "ext4.h" -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - unsigned int ext4_count_free(char *bitmap, unsigned int numchars) { - unsigned int i, sum = 0; - - for (i = 0; i < numchars; i++) - sum += nibblemap[bitmap[i] & 0xf] + - nibblemap[(bitmap[i] >> 4) & 0xf]; - return sum; + return numchars * BITS_PER_BYTE - memweight(bitmap, numchars); } int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, -- cgit v1.2.3 From 6ed6a722f9abac25b0549e7507a2b745ede4475c Mon Sep 17 00:00:00 2001 From: Vladimir Serbinenko Date: Mon, 30 Jul 2012 14:42:00 -0700 Subject: minixfs: fix block limit check On minix2 and minix3 usually max_size is 7fffffff and the check in question prohibits creation of last block spanning right before 7fffffff, due to downward rounding during the division. Fix it by using multiplication instead. [akpm@linux-foundation.org: fix up code layout, use local `sb'] Signed-off-by: Vladimir Serbinenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/itree_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index 13487ad16894..78e2d93e5c83 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,7 +32,8 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n", block, bdevname(sb->s_bdev, b)); - } else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) { + } else if ((u64)block * (u64)sb->s_blocksize >= + minix_sb(sb)->s_max_size) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %s\n", -- cgit v1.2.3 From 6b0f3393e38584ec22bab62fe01df58ae5a73ee7 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 30 Jul 2012 14:42:02 -0700 Subject: nilfs2: add omitted comment for ns_mount_state field of the_nilfs structure Add omitted comment for ns_mount_state field of the_nilfs structure. Signed-off-by: Vyacheslav Dubeyko Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/the_nilfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 9992b11312ff..ef40a510e2f3 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -52,6 +52,7 @@ enum { * @ns_sbwtime: previous write time of super block * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block + * @ns_mount_state: file system state * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next -- cgit v1.2.3 From 278038ac53c6c4f53d1d34f978beb9aba1410b2c Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 30 Jul 2012 14:42:03 -0700 Subject: nilfs2: remove references to long gone super operations ->delete_inode(), ->write_super_lockfs(), ->unlockfs() are gone so remove references to them in the NTFS code. Noticed while cleaning up the fsfreeze mess. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index d57c42f974ea..a76d6ea51ffb 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -677,7 +677,6 @@ static const struct super_operations nilfs_sops = { .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, /* .write_inode = nilfs_write_inode, */ - /* .put_inode = nilfs_put_inode, */ /* .drop_inode = nilfs_drop_inode, */ .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, @@ -685,8 +684,6 @@ static const struct super_operations nilfs_sops = { .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, - /* .write_super_lockfs */ - /* .unlockfs */ .statfs = nilfs_statfs, .remount_fs = nilfs_remount, /* .umount_begin */ -- cgit v1.2.3 From fe0627e7b3d32a41c16fac6e0af091991545865e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 30 Jul 2012 14:42:05 -0700 Subject: nilfs2: fix timing issue between rmcp and chcp ioctls The checkpoint deletion ioctl (rmcp ioctl) has potential for breaking snapshot because it is not fully exclusive with checkpoint mode change ioctl (chcp ioctl). The rmcp ioctl first tests if the specified checkpoint is a snapshot or not within nilfs_cpfile_delete_checkpoint function, and then calls nilfs_cpfile_delete_checkpoints function to actually invalidate the checkpoint only if it's not a snapshot. However, the checkpoint can be changed into a snapshot by the chcp ioctl between these two operations. In that case, calling nilfs_cpfile_delete_checkpoints() wrongly invalidates the snapshot, which leads to snapshot list corruption and snapshot count mismatch. This fixes the issue by changing nilfs_cpfile_delete_checkpoints() so that it reconfirms the target checkpoints are snapshot or not. This second check is exclusive with the chcp operation since it is protected by an existing semaphore. Signed-off-by: Ryusuke Konishi Cc: Fernando Luis Vazquez Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index dab5c4c6dfaf..deaa3d33a0aa 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -286,7 +286,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 cno; void *kaddr; unsigned long tnicps; - int ret, ncps, nicps, count, i; + int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { printk(KERN_ERR "%s: invalid range of checkpoint numbers: " @@ -301,6 +301,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, if (ret < 0) goto out_sem; tnicps = 0; + nss = 0; for (cno = start; cno < end; cno += ncps) { ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); @@ -318,8 +319,9 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - WARN_ON(nilfs_checkpoint_snapshot(cp)); - if (!nilfs_checkpoint_invalid(cp)) { + if (nilfs_checkpoint_snapshot(cp)) { + nss++; + } else if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; } @@ -364,6 +366,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } brelse(header_bh); + if (nss > 0) + ret = -EBUSY; out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); -- cgit v1.2.3 From 572d8b3945a31bee7c40d21556803e4807fd9141 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 30 Jul 2012 14:42:07 -0700 Subject: nilfs2: fix deadlock issue between chcp and thaw ioctls An fs-thaw ioctl causes deadlock with a chcp or mkcp -s command: chcp D ffff88013870f3d0 0 1325 1324 0x00000004 ... Call Trace: nilfs_transaction_begin+0x11c/0x1a0 [nilfs2] wake_up_bit+0x20/0x20 copy_from_user+0x18/0x30 [nilfs2] nilfs_ioctl_change_cpmode+0x7d/0xcf [nilfs2] nilfs_ioctl+0x252/0x61a [nilfs2] do_page_fault+0x311/0x34c get_unmapped_area+0x132/0x14e do_vfs_ioctl+0x44b/0x490 __set_task_blocked+0x5a/0x61 vm_mmap_pgoff+0x76/0x87 __set_current_blocked+0x30/0x4a sys_ioctl+0x4b/0x6f system_call_fastpath+0x16/0x1b thaw D ffff88013870d890 0 1352 1351 0x00000004 ... Call Trace: rwsem_down_failed_common+0xdb/0x10f call_rwsem_down_write_failed+0x13/0x20 down_write+0x25/0x27 thaw_super+0x13/0x9e do_vfs_ioctl+0x1f5/0x490 vm_mmap_pgoff+0x76/0x87 sys_ioctl+0x4b/0x6f filp_close+0x64/0x6c system_call_fastpath+0x16/0x1b where the thaw ioctl deadlocked at thaw_super() when called while chcp was waiting at nilfs_transaction_begin() called from nilfs_ioctl_change_cpmode(). This deadlock is 100% reproducible. This is because nilfs_ioctl_change_cpmode() first locks sb->s_umount in read mode and then waits for unfreezing in nilfs_transaction_begin(), whereas thaw_super() locks sb->s_umount in write mode. The locking of sb->s_umount here was intended to make snapshot mounts and the downgrade of snapshots to checkpoints exclusive. This fixes the deadlock issue by replacing the sb->s_umount usage in nilfs_ioctl_change_cpmode() with a dedicated mutex which protects snapshot mounts. Signed-off-by: Ryusuke Konishi Cc: Fernando Luis Vazquez Cao Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 4 ++-- fs/nilfs2/super.c | 3 +++ fs/nilfs2/the_nilfs.c | 1 + fs/nilfs2/the_nilfs.h | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 06658caa18bd..0b6387c67e6c 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - down_read(&inode->i_sb->s_umount); + mutex_lock(&nilfs->ns_snapshot_mount_mutex); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( @@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ - up_read(&inode->i_sb->s_umount); + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); out: mnt_drop_write_file(filp); return ret; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index a76d6ea51ffb..6522cac6057c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -945,6 +945,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct nilfs_root *root; int ret; + mutex_lock(&nilfs->ns_snapshot_mount_mutex); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); @@ -969,6 +971,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 501b7f8b739f..41e6a04a561f 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_bdev = bdev; atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_snapshot_mount_mutex); INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index ef40a510e2f3..2558f320b821 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -47,6 +47,7 @@ enum { * @ns_flags: flags * @ns_bdev: block device * @ns_sem: semaphore for shared states + * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block @@ -100,6 +101,7 @@ struct the_nilfs { struct block_device *ns_bdev; struct rw_semaphore ns_sem; + struct mutex ns_snapshot_mount_mutex; /* * used for -- cgit v1.2.3 From f5974c8f8cf431baf44e7127b669e3b1960f184f Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 30 Jul 2012 14:42:10 -0700 Subject: nilfs2: add omitted comments for different structures in driver implementation Add omitted comments for different structures in driver implementation. Signed-off-by: Vyacheslav Dubeyko Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.h | 14 +++++++++++--- fs/nilfs2/bmap.h | 7 +++++++ fs/nilfs2/btnode.h | 8 +++++++- fs/nilfs2/dat.c | 6 ++++++ fs/nilfs2/export.h | 8 ++++++++ fs/nilfs2/ifile.c | 6 +++++- fs/nilfs2/inode.c | 7 +++++++ fs/nilfs2/mdt.h | 7 +++++++ fs/nilfs2/nilfs.h | 17 +++++++++++++++-- fs/nilfs2/sufile.c | 8 +++++++- fs/nilfs2/the_nilfs.h | 3 +-- 11 files changed, 81 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index f5fde36b9e28..fb7238100548 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -76,15 +76,23 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit find_next_zero_bit_le -/* - * persistent object allocator cache +/** + * struct nilfs_bh_assoc - block offset and buffer head association + * @blkoff: block offset + * @bh: buffer head */ - struct nilfs_bh_assoc { unsigned long blkoff; struct buffer_head *bh; }; +/** + * struct nilfs_palloc_cache - persistent object allocator cache + * @lock: cache protecting lock + * @prev_desc: blockgroup descriptors cache + * @prev_bitmap: blockgroup bitmap cache + * @prev_entry: translation entries cache + */ struct nilfs_palloc_cache { spinlock_t lock; struct nilfs_bh_assoc prev_desc; diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 40d9f453d31c..b89e68076adc 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -135,6 +135,13 @@ struct nilfs_bmap { /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +/** + * struct nilfs_bmap_store - shadow copy of bmap state + * @data: cached raw block mapping of on-disk inode + * @last_allocated_key: cached value of last allocated key for data block + * @last_allocated_ptr: cached value of last allocated ptr for data block + * @state: cached value of state field of bmap structure + */ struct nilfs_bmap_store { __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; __u64 last_allocated_key; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 3a4dd2d8d3fc..d876b565ce64 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -29,7 +29,13 @@ #include #include - +/** + * struct nilfs_btnode_chkey_ctxt - change key context + * @oldkey: old key of block's moving content + * @newkey: new key for block's content + * @bh: buffer head of old buffer + * @newbh: buffer head of new buffer + */ struct nilfs_btnode_chkey_ctxt { __u64 oldkey; __u64 newkey; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index b5c13f3576b9..fa0f80308c2d 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,12 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +/** + * struct nilfs_dat_info - on-memory private data of DAT file + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of DAT file + * @shadow: shadow map of DAT file + */ struct nilfs_dat_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h index a71cc412b651..19ccbf9522ab 100644 --- a/fs/nilfs2/export.h +++ b/fs/nilfs2/export.h @@ -5,6 +5,14 @@ extern const struct export_operations nilfs_export_ops; +/** + * struct nilfs_fid - NILFS file id type + * @cno: checkpoint number + * @ino: inode number + * @gen: file generation (version) for NFS + * @parent_gen: parent generation (version) for NFS + * @parent_ino: parent inode number + */ struct nilfs_fid { u64 cno; u64 ino; diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 5a48df79d674..d8e65bde083c 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -29,7 +29,11 @@ #include "alloc.h" #include "ifile.h" - +/** + * struct nilfs_ifile_info - on-memory private data of ifile + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of ifile + */ struct nilfs_ifile_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7cc64465ec26..6e2c3db976b2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -34,6 +34,13 @@ #include "cpfile.h" #include "ifile.h" +/** + * struct nilfs_iget_args - arguments used during comparison between inodes + * @ino: inode number + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag + */ struct nilfs_iget_args { u64 ino; __u64 cno; diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index ab20a4baa50f..ab172e8549c5 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -28,6 +28,13 @@ #include "nilfs.h" #include "page.h" +/** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state + * @frozen_data: shadowed dirty data pages + * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @frozen_buffers: list of frozen buffers + */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; struct address_space frozen_data; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 250add84da76..74cece80e9a3 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -32,8 +32,21 @@ #include "the_nilfs.h" #include "bmap.h" -/* - * nilfs inode data in memory +/** + * struct nilfs_inode_info - nilfs inode data in memory + * @i_flags: inode flags + * @i_state: dynamic state flags + * @i_bmap: pointer on i_bmap_data + * @i_bmap_data: raw block mapping + * @i_xattr: + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode + * @i_btnode_cache: cached pages of b-tree nodes + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode + * @i_root: root object of the current filesystem tree + * @vfs_inode: VFS inode object */ struct nilfs_inode_info { __u32 i_flags; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c5b7653a4391..3127e9f438a7 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -30,7 +30,13 @@ #include "mdt.h" #include "sufile.h" - +/** + * struct nilfs_sufile_info - on-memory private data of sufile + * @mi: on-memory private data of metadata file + * @ncleansegs: number of clean segments + * @allocmin: lower limit of allocatable segment range + * @allocmax: upper limit of allocatable segment range + */ struct nilfs_sufile_info { struct nilfs_mdt_info mi; unsigned long ncleansegs;/* number of clean segments */ diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 2558f320b821..6eee4177807b 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -232,9 +232,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) * @count: refcount of this structure * @nilfs: nilfs object * @ifile: inode file - * @root: root inode * @inodes_count: number of inodes - * @blocks_count: number of blocks (Reserved) + * @blocks_count: number of blocks */ struct nilfs_root { __u64 cno; -- cgit v1.2.3 From 497d48bd27ec1c44b4600e8e98a776188f2e11f2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 30 Jul 2012 14:42:11 -0700 Subject: hfsplus: use -ENOMEM when kzalloc() fails Use -ENOMEM return value instead of -EINVAL when kzalloc() fails. Signed-off-by: Namjae Jeon Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 473332098013..fdafb2d71654 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -365,7 +365,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) u64 last_fs_block, last_fs_page; int err; - err = -EINVAL; + err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto out; -- cgit v1.2.3 From a943ed71c9171fb5e3b256e8022bbedff95cc826 Mon Sep 17 00:00:00 2001 From: Steven J. Magnani Date: Mon, 30 Jul 2012 14:42:13 -0700 Subject: fat: accessors for msdos_dir_entry 'start' fields Simplify code by providing accessor functions for the directory entry start cluster fields. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 15 +++++++++++++++ fs/fat/inode.c | 12 +++--------- fs/fat/namei_msdos.c | 11 +++-------- fs/fat/namei_vfat.c | 11 +++-------- 4 files changed, 24 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index fc35c5c69136..2deeeb86f331 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -217,6 +217,21 @@ static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) #endif } +static inline int fat_get_start(const struct msdos_sb_info *sbi, + const struct msdos_dir_entry *de) +{ + int cluster = le16_to_cpu(de->start); + if (sbi->fat_bits == 32) + cluster |= (le16_to_cpu(de->starthi) << 16); + return cluster; +} + +static inline void fat_set_start(struct msdos_dir_entry *de, int cluster) +{ + de->start = cpu_to_le16(cluster); + de->starthi = cpu_to_le16(cluster >> 16); +} + static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) { #ifdef __BIG_ENDIAN diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0038b32cb362..05e897fe9866 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -369,10 +369,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; - MSDOS_I(inode)->i_start = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) - MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); - + MSDOS_I(inode)->i_start = fat_get_start(sbi, de); MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start; error = fat_calc_dir_size(inode); if (error < 0) @@ -385,9 +382,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_mode = fat_make_mode(sbi, de->attr, ((sbi->options.showexec && !is_exec(de->name + 8)) ? S_IRUGO|S_IWUGO : S_IRWXUGO)); - MSDOS_I(inode)->i_start = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) - MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); + MSDOS_I(inode)->i_start = fat_get_start(sbi, de); MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start; inode->i_size = le32_to_cpu(de->size); @@ -613,8 +608,7 @@ retry: else raw_entry->size = cpu_to_le32(inode->i_size); raw_entry->attr = fat_make_attrs(inode); - raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); - raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); + fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, &raw_entry->date, NULL); if (sbi->options.isvfat) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 70d993a93805..b0e12bf9f4a1 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -246,8 +246,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, de.ctime_cs = 0; de.time = time; de.date = date; - de.start = cpu_to_le16(cluster); - de.starthi = cpu_to_le16(cluster >> 16); + fat_set_start(&de, cluster); de.size = 0; err = fat_add_entries(dir, &de, 1, sinfo); @@ -530,9 +529,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, mark_inode_dirty(old_inode); if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); @@ -572,9 +569,7 @@ error_dotdot: corrupt = 1; if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6cc480652433..6a6d8c0715a1 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -651,8 +651,7 @@ shortname: de->time = de->ctime = time; de->date = de->cdate = de->adate = date; de->ctime_cs = time_cs; - de->start = cpu_to_le16(cluster); - de->starthi = cpu_to_le16(cluster >> 16); + fat_set_start(de, cluster); de->size = 0; out_free: __putname(uname); @@ -965,9 +964,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_inode); if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); @@ -1009,9 +1006,7 @@ error_dotdot: corrupt = 1; if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } -- cgit v1.2.3 From deb8274a0cf44827ec260330cc1d94d0f3dcfb94 Mon Sep 17 00:00:00 2001 From: Steven J. Magnani Date: Mon, 30 Jul 2012 14:42:16 -0700 Subject: fat: refactor shortname parsing Nearly identical shortname parsing is performed in fat_search_long() and __fat_readdir(). Extract this code into a function that may be called by both. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 255 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 137 insertions(+), 118 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 6eaa28c98ad1..dc49ed2cbffa 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -35,6 +35,11 @@ #define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) #define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) +static inline unsigned char fat_tolower(unsigned char c) +{ + return ((c >= 'A') && (c <= 'Z')) ? c+32 : c; +} + static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, struct msdos_dir_entry *de) @@ -333,6 +338,124 @@ parse_long: return 0; } +/** + * fat_parse_short - Parse MS-DOS (short) directory entry. + * @sb: superblock + * @de: directory entry to parse + * @name: FAT_MAX_SHORT_SIZE array in which to place extracted name + * @dot_hidden: Nonzero == prepend '.' to names with ATTR_HIDDEN + * + * Returns the number of characters extracted into 'name'. + */ +static int fat_parse_short(struct super_block *sb, + const struct msdos_dir_entry *de, + unsigned char *name, int dot_hidden) +{ + const struct msdos_sb_info *sbi = MSDOS_SB(sb); + int isvfat = sbi->options.isvfat; + int nocase = sbi->options.nocase; + unsigned short opt_shortname = sbi->options.shortname; + struct nls_table *nls_disk = sbi->nls_disk; + wchar_t uni_name[14]; + unsigned char c, work[MSDOS_NAME]; + unsigned char *ptname = name; + int chi, chl, i, j, k; + int dotoffset = 0; + int name_len = 0, uni_len = 0; + + if (!isvfat && dot_hidden && (de->attr & ATTR_HIDDEN)) { + *ptname++ = '.'; + dotoffset = 1; + } + + memcpy(work, de->name, sizeof(work)); + /* see namei.c, msdos_format_name */ + if (work[0] == 0x05) + work[0] = 0xE5; + + /* Filename */ + for (i = 0, j = 0; i < 8;) { + c = work[i]; + if (!c) + break; + chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, + &uni_name[j++], opt_shortname, + de->lcase & CASE_LOWER_BASE); + if (chl <= 1) { + if (!isvfat) + ptname[i] = nocase ? c : fat_tolower(c); + i++; + if (c != ' ') { + name_len = i; + uni_len = j; + } + } else { + uni_len = j; + if (isvfat) + i += min(chl, 8-i); + else { + for (chi = 0; chi < chl && i < 8; chi++, i++) + ptname[i] = work[i]; + } + if (chl) + name_len = i; + } + } + + i = name_len; + j = uni_len; + fat_short2uni(nls_disk, ".", 1, &uni_name[j++]); + if (!isvfat) + ptname[i] = '.'; + i++; + + /* Extension */ + for (k = 8; k < MSDOS_NAME;) { + c = work[k]; + if (!c) + break; + chl = fat_shortname2uni(nls_disk, &work[k], MSDOS_NAME - k, + &uni_name[j++], opt_shortname, + de->lcase & CASE_LOWER_EXT); + if (chl <= 1) { + k++; + if (!isvfat) + ptname[i] = nocase ? c : fat_tolower(c); + i++; + if (c != ' ') { + name_len = i; + uni_len = j; + } + } else { + uni_len = j; + if (isvfat) { + int offset = min(chl, MSDOS_NAME-k); + k += offset; + i += offset; + } else { + for (chi = 0; chi < chl && k < MSDOS_NAME; + chi++, i++, k++) { + ptname[i] = work[k]; + } + } + if (chl) + name_len = i; + } + } + + if (name_len > 0) { + name_len += dotoffset; + + if (sbi->options.isvfat) { + uni_name[uni_len] = 0x0000; + name_len = fat_uni_to_x8(sb, uni_name, name, + FAT_MAX_SHORT_SIZE); + } + } + + return name_len; +} + /* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. @@ -344,15 +467,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh = NULL; struct msdos_dir_entry *de; - struct nls_table *nls_disk = sbi->nls_disk; unsigned char nr_slots; - wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char work[MSDOS_NAME]; unsigned char bufname[FAT_MAX_SHORT_SIZE]; - unsigned short opt_shortname = sbi->options.shortname; loff_t cpos = 0; - int chl, i, j, last_u, err, len; + int err, len; err = -ENOENT; while (1) { @@ -380,47 +499,16 @@ parse_record: goto end_of_dir; } - memcpy(work, de->name, sizeof(de->name)); - /* see namei.c, msdos_format_name */ - if (work[0] == 0x05) - work[0] = 0xE5; - for (i = 0, j = 0, last_u = 0; i < 8;) { - if (!work[i]) - break; - chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_BASE); - if (chl <= 1) { - if (work[i] != ' ') - last_u = j; - } else { - last_u = j; - } - i += chl; - } - j = last_u; - fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); - for (i = 8; i < MSDOS_NAME;) { - if (!work[i]) - break; - chl = fat_shortname2uni(nls_disk, &work[i], - MSDOS_NAME - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_EXT); - if (chl <= 1) { - if (work[i] != ' ') - last_u = j; - } else { - last_u = j; - } - i += chl; - } - if (!last_u) + /* Never prepend '.' to hidden files here. + * That is done only for msdos mounts (and only when + * 'dotsOK=yes'); if we are executing here, it is in the + * context of a vfat mount. + */ + len = fat_parse_short(sb, de, bufname, 0); + if (len == 0) continue; /* Compare shortname */ - bufuname[last_u] = 0x0000; - len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); if (fat_name_match(sbi, name, name_len, bufname, len)) goto found; @@ -469,20 +557,15 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; struct msdos_dir_entry *de; - struct nls_table *nls_disk = sbi->nls_disk; unsigned char nr_slots; - wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char c, work[MSDOS_NAME]; - unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname; - unsigned short opt_shortname = sbi->options.shortname; + unsigned char bufname[FAT_MAX_SHORT_SIZE]; int isvfat = sbi->options.isvfat; - int nocase = sbi->options.nocase; const char *fill_name = NULL; unsigned long inum; unsigned long lpos, dummy, *furrfu = &lpos; loff_t cpos; - int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0; + int short_len = 0, fill_len = 0; int ret = 0; lock_super(sb); @@ -556,74 +639,10 @@ parse_record: } } - if (sbi->options.dotsOK) { - ptname = bufname; - dotoffset = 0; - if (de->attr & ATTR_HIDDEN) { - *ptname++ = '.'; - dotoffset = 1; - } - } - - memcpy(work, de->name, sizeof(de->name)); - /* see namei.c, msdos_format_name */ - if (work[0] == 0x05) - work[0] = 0xE5; - for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { - if (!(c = work[i])) - break; - chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_BASE); - if (chl <= 1) { - ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c; - if (c != ' ') { - last = i; - last_u = j; - } - } else { - last_u = j; - for (chi = 0; chi < chl && i < 8; chi++) { - ptname[i] = work[i]; - i++; last = i; - } - } - } - i = last; - j = last_u; - fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); - ptname[i++] = '.'; - for (i2 = 8; i2 < MSDOS_NAME;) { - if (!(c = work[i2])) - break; - chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_EXT); - if (chl <= 1) { - i2++; - ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c; - if (c != ' ') { - last = i; - last_u = j; - } - } else { - last_u = j; - for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) { - ptname[i++] = work[i2++]; - last = i; - } - } - } - if (!last) + short_len = fat_parse_short(sb, de, bufname, sbi->options.dotsOK); + if (short_len == 0) goto record_end; - i = last + dotoffset; - j = last_u; - - if (isvfat) { - bufuname[j] = 0x0000; - i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); - } if (nr_slots) { /* hack for fat_ioctl_filldir() */ struct fat_ioctl_filldir_callback *p = dirent; @@ -631,12 +650,12 @@ parse_record: p->longname = fill_name; p->long_len = fill_len; p->shortname = bufname; - p->short_len = i; + p->short_len = short_len; fill_name = NULL; fill_len = 0; } else { fill_name = bufname; - fill_len = i; + fill_len = short_len; } start_filldir: -- cgit v1.2.3 From 108ceeb020bb3558fe175a3fc8b60fd6c1a2a279 Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Mon, 30 Jul 2012 14:42:23 -0700 Subject: coredump: fix wrong comments on core limits of pipe coredump case In commit 898b374af6f7 ("exec: replace call_usermodehelper_pipe with use of umh init function and resolve limit"), the core limits recursive check value was changed from 0 to 1, but the corresponding comments were not updated. Signed-off-by: Jovi Zhang Cc: Oleg Nesterov Cc: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 5af8390e0fae..3684353ebd5f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2174,15 +2174,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) } if (cprm.limit == 1) { - /* + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use - * cprm.limit of 1 here as a speacial value. Any - * non-1 limit gets set to RLIM_INFINITY below, but - * a limit of 0 skips the dump. This is a consistent - * way to catch recursive crashes. We can still crash - * if the core_pattern binary sets RLIM_CORE = !1 - * but it runs as root, and can do lots of stupid things + * cprm.limit of 1 here as a speacial value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * * Note that we use task_tgid_vnr here to grab the pid * of the process group leader. That way we get the * right pid if a thread in a multi-threaded -- cgit v1.2.3 From e8905ec27e2f4ea1b9f7e03df68a060b3ae6fca8 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Mon, 30 Jul 2012 14:42:26 -0700 Subject: proc: environ_read() make sure offset points to environment address range Currently the following offset and environment address range check in environ_read() of /proc//environ is buggy: int this_len = mm->env_end - (mm->env_start + src); if (this_len <= 0) break; Large or negative offsets on /proc//environ converted to 'unsigned long' may pass this check since '(mm->env_start + src)' can overflow and 'this_len' will be positive. This can turn /proc//environ to act like /proc//mem since (mm->env_start + src) will point and read from another VMA. There are two fixes here plus some code cleaning: 1) Fix the overflow by checking if the offset that was converted to unsigned long will always point to the [mm->env_start, mm->env_end] address range. 2) Remove the truncation that was made to the result of the check, storing the result in 'int this_len' will alter its value and we can not depend on it. For kernels that have commit b409e578d ("proc: clean up /proc//environ handling") which adds the appropriate ptrace check and saves the 'mm' at ->open() time, this is not a security issue. This patch is taken from the grsecurity patch since it was just made available. Signed-off-by: Djalal Harouni Cc: Oleg Nesterov Cc: Brad Spengler Acked-by: Kees Cook Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 2772208338f8..39ee093b5e96 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -827,15 +827,16 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; while (count > 0) { - int this_len, retval, max_len; + size_t this_len, max_len; + int retval; - this_len = mm->env_end - (mm->env_start + src); - - if (this_len <= 0) + if (src >= (mm->env_end - mm->env_start)) break; - max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - this_len = (this_len > max_len) ? max_len : this_len; + this_len = mm->env_end - (mm->env_start + src); + + max_len = min_t(size_t, PAGE_SIZE, count); + this_len = min(max_len, this_len); retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); -- cgit v1.2.3 From bc452b4b65bd589083a7a7ba4f14f85dfc8454fa Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Mon, 30 Jul 2012 14:42:28 -0700 Subject: proc: do not allow negative offsets on /proc//environ __mem_open() which is called by both /proc//environ and /proc//mem ->open() handlers will allow the use of negative offsets. /proc//mem has negative offsets but not /proc//environ. Clean this by moving the 'force FMODE_UNSIGNED_OFFSET flag' to mem_open() to allow negative offsets only on /proc//mem. Signed-off-by: Djalal Harouni Cc: Oleg Nesterov Cc: Brad Spengler Acked-by: Kees Cook Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 39ee093b5e96..1b6c84cbdb73 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -695,8 +695,6 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) mmput(mm); } - /* OK to pass negative loff_t, we can catch out-of-range */ - file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; return 0; @@ -704,7 +702,12 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) static int mem_open(struct inode *inode, struct file *file) { - return __mem_open(inode, file, PTRACE_MODE_ATTACH); + int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); + + /* OK to pass negative loff_t, we can catch out-of-range */ + file->f_mode |= FMODE_UNSIGNED_OFFSET; + + return ret; } static ssize_t mem_rw(struct file *file, char __user *buf, -- cgit v1.2.3 From 98c350cda2c14a343d34ea01a3d9c24fea5ec66d Mon Sep 17 00:00:00 2001 From: Justin Lecher Date: Mon, 30 Jul 2012 14:42:53 -0700 Subject: fs: cachefiles: add support for large files in filesystem caching Support the caching of large files. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=31182 Signed-off-by: Justin Lecher Signed-off-by: Suresh Jayaraman Tested-by: Suresh Jayaraman Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cachefiles/rdwr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c0353dfac51f..c994691d9445 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -919,7 +919,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) * own time */ path.mnt = cache->mnt; path.dentry = object->backer; - file = dentry_open(&path, O_RDWR, cache->cache_cred); + file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); } else { -- cgit v1.2.3 From 1d151c337d79fa3de88654d2514f58fbd916a8e0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 30 Jul 2012 14:43:00 -0700 Subject: c/r: fcntl: add F_GETOWNER_UIDS option When we restore file descriptors we would like them to look exactly as they were at dumping time. With help of fcntl it's almost possible, the missing snippet is file owners UIDs. To be able to read their values the F_GETOWNER_UIDS is introduced. This option is valid iif CONFIG_CHECKPOINT_RESTORE is turned on, otherwise returning -EINVAL. Signed-off-by: Cyrill Gorcunov Acked-by: "Eric W. Biederman" Cc: "Serge E. Hallyn" Cc: Oleg Nesterov Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 29 +++++++++++++++++++++++++++++ include/asm-generic/fcntl.h | 4 ++++ security/selinux/hooks.c | 1 + 3 files changed, 34 insertions(+) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 81b70e665bf0..887b5ba8c9b5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -340,6 +341,31 @@ static int f_getown_ex(struct file *filp, unsigned long arg) return ret; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static int f_getowner_uids(struct file *filp, unsigned long arg) +{ + struct user_namespace *user_ns = current_user_ns(); + uid_t * __user dst = (void * __user)arg; + uid_t src[2]; + int err; + + read_lock(&filp->f_owner.lock); + src[0] = from_kuid(user_ns, filp->f_owner.uid); + src[1] = from_kuid(user_ns, filp->f_owner.euid); + read_unlock(&filp->f_owner.lock); + + err = put_user(src[0], &dst[0]); + err |= put_user(src[1], &dst[1]); + + return err; +} +#else +static int f_getowner_uids(struct file *filp, unsigned long arg) +{ + return -EINVAL; +} +#endif + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -396,6 +422,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SETOWN_EX: err = f_setown_ex(filp, arg); break; + case F_GETOWNER_UIDS: + err = f_getowner_uids(filp, arg); + break; case F_GETSIG: err = filp->f_owner.signum; break; diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 9e5b0356e2bb..a48937d4a5ea 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -120,6 +120,10 @@ #define F_GETOWN_EX 16 #endif +#ifndef F_GETOWNER_UIDS +#define F_GETOWNER_UIDS 17 +#endif + #define F_OWNER_TID 0 #define F_OWNER_PID 1 #define F_OWNER_PGRP 2 diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 94c45a1531a4..ec43760a8a03 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3180,6 +3180,7 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, case F_GETFL: case F_GETOWN: case F_GETSIG: + case F_GETOWNER_UIDS: /* Just check FD__USE permission */ err = file_has_perm(cred, file, 0); break; -- cgit v1.2.3 From a53aab645c82f0146e35684b34692c69b5118121 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Jul 2012 16:21:17 -0700 Subject: ceph: close old con before reopening on mds reconnect When we detect a mds session reset, close the old ceph_connection before reopening it. This ensures we clean up the old socket properly and keep the ceph_connection state correct. Signed-off-by: Sage Weil Reviewed-by: Alex Elder Reviewed-by: Yehuda Sadeh --- fs/ceph/mds_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 39b76d66bc5d..a5a735422aa7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2518,6 +2518,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; + ceph_con_close(&session->s_con); ceph_con_open(&session->s_con, CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); -- cgit v1.2.3 From 21ec6ffa46719a4ed45531b5b01014c26f0416c4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 20 Jul 2012 08:18:36 -0500 Subject: ceph: fix potential double free We re-run the loop but we don't re-set the attrs pointer back to NULL. Signed-off-by: Alan Cox Reviewed-by: Alex Elder --- fs/ceph/xattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 785cb3057c95..2c2ae5be9902 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -457,6 +457,7 @@ start: for (i = 0; i < numattr; i++) kfree(xattrs[i]); kfree(xattrs); + xattrs = NULL; goto start; } err = -EIO; -- cgit v1.2.3 From aa711ee3402ad10ffd5b70ce0417fadc9a95cccf Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 13 Jul 2012 20:35:11 -0500 Subject: ceph: define snap counts as u32 everywhere There are two structures in which a count of snapshots are maintained: struct ceph_snap_context { ... u32 num_snaps; ... } and struct ceph_snap_realm { ... u32 num_prior_parent_snaps; /* had prior to parent_since */ ... u32 num_snaps; ... } These fields never take on negative values (e.g., to hold special meaning), and so are really inherently unsigned. Furthermore they take their value from over-the-wire or on-disk formatted 32-bit values. So change their definition to have type u32, and change some spots elsewhere in the code to account for this change. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/snap.c | 18 ++++++++++-------- fs/ceph/super.h | 4 ++-- include/linux/ceph/libceph.h | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e5206fc76562..cbb2f54a3019 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -296,8 +296,7 @@ static int build_snap_context(struct ceph_snap_realm *realm) struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; int err = 0; - int i; - int num = realm->num_prior_parent_snaps + realm->num_snaps; + u32 num = realm->num_prior_parent_snaps + realm->num_snaps; /* * build parent context, if it hasn't been built. @@ -321,11 +320,11 @@ static int build_snap_context(struct ceph_snap_realm *realm) realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { - dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" + dout("build_snap_context %llx %p: %p seq %lld (%u snaps)" " (unchanged)\n", realm->ino, realm, realm->cached_context, realm->cached_context->seq, - realm->cached_context->num_snaps); + (unsigned int) realm->cached_context->num_snaps); return 0; } @@ -342,6 +341,8 @@ static int build_snap_context(struct ceph_snap_realm *realm) num = 0; snapc->seq = realm->seq; if (parent) { + u32 i; + /* include any of parent's snaps occurring _after_ my parent became my parent */ for (i = 0; i < parent->cached_context->num_snaps; i++) @@ -361,8 +362,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; - dout("build_snap_context %llx %p: %p seq %lld (%d snaps)\n", - realm->ino, realm, snapc, snapc->seq, snapc->num_snaps); + dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n", + realm->ino, realm, snapc, snapc->seq, + (unsigned int) snapc->num_snaps); if (realm->cached_context) ceph_put_snap_context(realm->cached_context); @@ -402,9 +404,9 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm) * helper to allocate and decode an array of snapids. free prior * instance, if any. */ -static int dup_array(u64 **dst, __le64 *src, int num) +static int dup_array(u64 **dst, __le64 *src, u32 num) { - int i; + u32 i; kfree(*dst); if (num) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc35036d258d..3ea48b7b98b3 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -612,9 +612,9 @@ struct ceph_snap_realm { u64 parent_since; /* snapid when our current parent became so */ u64 *prior_parent_snaps; /* snaps inherited from any parents we */ - int num_prior_parent_snaps; /* had prior to parent_since */ + u32 num_prior_parent_snaps; /* had prior to parent_since */ u64 *snaps; /* snaps specific to this realm */ - int num_snaps; + u32 num_snaps; struct ceph_snap_realm *parent; struct list_head children; /* list of child realms */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index ea072e1f9db9..42624789b06f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -154,7 +154,7 @@ struct ceph_client { struct ceph_snap_context { atomic_t nref; u64 seq; - int num_snaps; + u32 num_snaps; u64 snaps[]; }; -- cgit v1.2.3 From 5accdf82ba25cacefd6c1867f1704beb4d244cdd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:34 +0200 Subject: fs: Improve filesystem freezing handling vfs_check_frozen() tests are racy since the filesystem can be frozen just after the test is performed. Thus in write paths we can end up marking some pages or inodes dirty even though the file system is already frozen. This creates problems with flusher thread hanging on frozen filesystem. Another problem is that exclusion between ->page_mkwrite() and filesystem freezing has been handled by setting page dirty and then verifying s_frozen. This guaranteed that either the freezing code sees the faulted page, writes it, and writeprotects it again or we see s_frozen set and bail out of page fault. This works to protect from page being marked writeable while filesystem freezing is running but has an unpleasant artefact of leaving dirty (although unmodified and writeprotected) pages on frozen filesystem resulting in similar problems with flusher thread as the first problem. This patch aims at providing exclusion between write paths and filesystem freezing. We implement a writer-freeze read-write semaphore in the superblock. Actually, there are three such semaphores because of lock ranking reasons - one for page fault handlers (->page_mkwrite), one for all other writers, and one of internal filesystem purposes (used e.g. to track running transactions). Write paths which should block freezing (e.g. directory operations, ->aio_write(), ->page_mkwrite) hold reader side of the semaphore. Code freezing the filesystem takes the writer side. Only that we don't really want to bounce cachelines of the semaphores between CPUs for each write happening. So we implement the reader side of the semaphore as a per-cpu counter and the writer side is implemented using s_writers.frozen superblock field. [AV: microoptimize sb_start_write(); we want it fast in normal case] BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/fs.h | 150 ++++++++++++++++++++++++++++++-- 2 files changed, 373 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index c743fb3be4b8..0f64ecb7b1bf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -33,12 +33,19 @@ #include #include #include +#include #include "internal.h" LIST_HEAD(super_blocks); DEFINE_SPINLOCK(sb_lock); +static char *sb_writers_name[SB_FREEZE_LEVELS] = { + "sb_writers", + "sb_pagefaults", + "sb_internal", +}; + /* * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. @@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) return total_objects; } +static int init_sb_writers(struct super_block *s, struct file_system_type *type) +{ + int err; + int i; + + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + err = percpu_counter_init(&s->s_writers.counter[i], 0); + if (err < 0) + goto err_out; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); + } + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + return 0; +err_out: + while (--i >= 0) + percpu_counter_destroy(&s->s_writers.counter[i]); + return err; +} + +static void destroy_sb_writers(struct super_block *s) +{ + int i; + + for (i = 0; i < SB_FREEZE_LEVELS; i++) + percpu_counter_destroy(&s->s_writers.counter[i]); +} + /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to @@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) if (s) { if (security_sb_alloc(s)) { + /* + * We cannot call security_sb_free() without + * security_sb_alloc() succeeding. So bail out manually + */ kfree(s); s = NULL; goto out; } #ifdef CONFIG_SMP s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) { - security_sb_free(s); - kfree(s); - s = NULL; - goto out; - } else { + if (!s->s_files) + goto err_out; + else { int i; for_each_possible_cpu(i) @@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) #else INIT_LIST_HEAD(&s->s_files); #endif + if (init_sb_writers(s, type)) + goto err_out; s->s_flags = flags; s->s_bdi = &default_backing_dev_info; INIT_HLIST_NODE(&s->s_instances); @@ -190,6 +229,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) } out: return s; +err_out: + security_sb_free(s); +#ifdef CONFIG_SMP + if (s->s_files) + free_percpu(s->s_files); +#endif + destroy_sb_writers(s); + kfree(s); + s = NULL; + goto out; } /** @@ -203,6 +252,7 @@ static inline void destroy_super(struct super_block *s) #ifdef CONFIG_SMP free_percpu(s->s_files); #endif + destroy_sb_writers(s); security_sb_free(s); WARN_ON(!list_empty(&s->s_mounts)); kfree(s->s_subtype); @@ -651,10 +701,11 @@ struct super_block *get_super_thawed(struct block_device *bdev) { while (1) { struct super_block *s = get_super(bdev); - if (!s || s->s_frozen == SB_UNFROZEN) + if (!s || s->s_writers.frozen == SB_UNFROZEN) return s; up_read(&s->s_umount); - vfs_check_frozen(s, SB_FREEZE_WRITE); + wait_event(s->s_writers.wait_unfrozen, + s->s_writers.frozen == SB_UNFROZEN); put_super(s); } } @@ -732,7 +783,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) int retval; int remount_ro; - if (sb->s_frozen != SB_UNFROZEN) + if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; #ifdef CONFIG_BLOCK @@ -1163,6 +1214,120 @@ out: return ERR_PTR(error); } +/* + * This is an internal function, please use sb_end_{write,pagefault,intwrite} + * instead. + */ +void __sb_end_write(struct super_block *sb, int level) +{ + percpu_counter_dec(&sb->s_writers.counter[level-1]); + /* + * Make sure s_writers are updated before we wake up waiters in + * freeze_super(). + */ + smp_mb(); + if (waitqueue_active(&sb->s_writers.wait)) + wake_up(&sb->s_writers.wait); + rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_); +} +EXPORT_SYMBOL(__sb_end_write); + +#ifdef CONFIG_LOCKDEP +/* + * We want lockdep to tell us about possible deadlocks with freezing but + * it's it bit tricky to properly instrument it. Getting a freeze protection + * works as getting a read lock but there are subtle problems. XFS for example + * gets freeze protection on internal level twice in some cases, which is OK + * only because we already hold a freeze protection also on higher level. Due + * to these cases we have to tell lockdep we are doing trylock when we + * already hold a freeze protection for a higher freeze level. + */ +static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, + unsigned long ip) +{ + int i; + + if (!trylock) { + for (i = 0; i < level - 1; i++) + if (lock_is_held(&sb->s_writers.lock_map[i])) { + trylock = true; + break; + } + } + rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); +} +#endif + +/* + * This is an internal function, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +int __sb_start_write(struct super_block *sb, int level, bool wait) +{ +retry: + if (unlikely(sb->s_writers.frozen >= level)) { + if (!wait) + return 0; + wait_event(sb->s_writers.wait_unfrozen, + sb->s_writers.frozen < level); + } + +#ifdef CONFIG_LOCKDEP + acquire_freeze_lock(sb, level, !wait, _RET_IP_); +#endif + percpu_counter_inc(&sb->s_writers.counter[level-1]); + /* + * Make sure counter is updated before we check for frozen. + * freeze_super() first sets frozen and then checks the counter. + */ + smp_mb(); + if (unlikely(sb->s_writers.frozen >= level)) { + __sb_end_write(sb, level); + goto retry; + } + return 1; +} +EXPORT_SYMBOL(__sb_start_write); + +/** + * sb_wait_write - wait until all writers to given file system finish + * @sb: the super for which we wait + * @level: type of writers we wait for (normal vs page fault) + * + * This function waits until there are no writers of given type to given file + * system. Caller of this function should make sure there can be no new writers + * of type @level before calling this function. Otherwise this function can + * livelock. + */ +static void sb_wait_write(struct super_block *sb, int level) +{ + s64 writers; + + /* + * We just cycle-through lockdep here so that it does not complain + * about returning with lock to userspace + */ + rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); + rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); + + do { + DEFINE_WAIT(wait); + + /* + * We use a barrier in prepare_to_wait() to separate setting + * of frozen and checking of the counter + */ + prepare_to_wait(&sb->s_writers.wait, &wait, + TASK_UNINTERRUPTIBLE); + + writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); + if (writers) + schedule(); + + finish_wait(&sb->s_writers.wait, &wait); + } while (writers); +} + /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock @@ -1170,6 +1335,31 @@ out: * Syncs the super to make sure the filesystem is consistent and calls the fs's * freeze_fs. Subsequent calls to this without first thawing the fs will return * -EBUSY. + * + * During this function, sb->s_writers.frozen goes through these values: + * + * SB_UNFROZEN: File system is normal, all writes progress as usual. + * + * SB_FREEZE_WRITE: The file system is in the process of being frozen. New + * writes should be blocked, though page faults are still allowed. We wait for + * all writes to complete and then proceed to the next stage. + * + * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked + * but internal fs threads can still modify the filesystem (although they + * should not dirty new pages or inodes), writeback can run etc. After waiting + * for all running page faults we sync the filesystem which will clean all + * dirty pages and inodes (no new dirty pages or inodes can be created when + * sync is running). + * + * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs + * modification are blocked (e.g. XFS preallocation truncation on inode + * reclaim). This is usually implemented by blocking new transactions for + * filesystems that have them and need this additional guard. After all + * internal writers are finished we call ->freeze_fs() to finish filesystem + * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is + * mostly auxiliary for filesystems to verify they do not modify frozen fs. + * + * sb->s_writers.frozen is protected by sb->s_umount. */ int freeze_super(struct super_block *sb) { @@ -1177,7 +1367,7 @@ int freeze_super(struct super_block *sb) atomic_inc(&sb->s_active); down_write(&sb->s_umount); - if (sb->s_frozen) { + if (sb->s_writers.frozen != SB_UNFROZEN) { deactivate_locked_super(sb); return -EBUSY; } @@ -1188,33 +1378,53 @@ int freeze_super(struct super_block *sb) } if (sb->s_flags & MS_RDONLY) { - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); + /* Nothing to do really... */ + sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); return 0; } - sb->s_frozen = SB_FREEZE_WRITE; + /* From now on, no new normal writers can start */ + sb->s_writers.frozen = SB_FREEZE_WRITE; + smp_wmb(); + + /* Release s_umount to preserve sb_start_write -> s_umount ordering */ + up_write(&sb->s_umount); + + sb_wait_write(sb, SB_FREEZE_WRITE); + + /* Now we go and block page faults... */ + down_write(&sb->s_umount); + sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; smp_wmb(); + sb_wait_write(sb, SB_FREEZE_PAGEFAULT); + + /* All writers are done so after syncing there won't be dirty data */ sync_filesystem(sb); - sb->s_frozen = SB_FREEZE_TRANS; + /* Now wait for internal filesystem counter */ + sb->s_writers.frozen = SB_FREEZE_FS; smp_wmb(); + sb_wait_write(sb, SB_FREEZE_FS); - sync_blockdev(sb->s_bdev); if (sb->s_op->freeze_fs) { ret = sb->s_op->freeze_fs(sb); if (ret) { printk(KERN_ERR "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; + sb->s_writers.frozen = SB_UNFROZEN; smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } } + /* + * This is just for debugging purposes so that fs can warn if it + * sees write activity when frozen is set to SB_FREEZE_COMPLETE. + */ + sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); return 0; } @@ -1231,7 +1441,7 @@ int thaw_super(struct super_block *sb) int error; down_write(&sb->s_umount); - if (sb->s_frozen == SB_UNFROZEN) { + if (sb->s_writers.frozen == SB_UNFROZEN) { up_write(&sb->s_umount); return -EINVAL; } @@ -1244,16 +1454,15 @@ int thaw_super(struct super_block *sb) if (error) { printk(KERN_ERR "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; up_write(&sb->s_umount); return error; } } out: - sb->s_frozen = SB_UNFROZEN; + sb->s_writers.frozen = SB_UNFROZEN; smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 80c819cbe272..aefed9426b03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -412,6 +412,7 @@ struct inodes_stat_t { #include #include #include +#include #include @@ -1439,6 +1440,8 @@ extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); +struct mm_struct; + /* * Umount options */ @@ -1452,6 +1455,32 @@ extern int send_sigurg(struct fown_struct *fown); extern struct list_head super_blocks; extern spinlock_t sb_lock; +/* Possible states of 'frozen' field */ +enum { + SB_UNFROZEN = 0, /* FS is unfrozen */ + SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ + SB_FREEZE_TRANS = 2, + SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ + SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop + * internal threads if needed) */ + SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ +}; + +#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) + +struct sb_writers { + /* Counters for counting writers at each level */ + struct percpu_counter counter[SB_FREEZE_LEVELS]; + wait_queue_head_t wait; /* queue for waiting for + writers / faults to finish */ + int frozen; /* Is sb frozen? */ + wait_queue_head_t wait_unfrozen; /* queue for waiting for + sb to be thawed */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map lock_map[SB_FREEZE_LEVELS]; +#endif +}; + struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ @@ -1501,6 +1530,7 @@ struct super_block { int s_frozen; wait_queue_head_t s_wait_unfrozen; + struct sb_writers s_writers; char s_id[32]; /* Informational name */ u8 s_uuid[16]; /* UUID */ @@ -1555,14 +1585,119 @@ extern struct timespec current_fs_time(struct super_block *sb); /* * Snapshotting support. */ -enum { - SB_UNFROZEN = 0, - SB_FREEZE_WRITE = 1, - SB_FREEZE_TRANS = 2, -}; +/* Will go away when all users are converted */ +#define vfs_check_frozen(sb, level) do { } while (0) + +void __sb_end_write(struct super_block *sb, int level); +int __sb_start_write(struct super_block *sb, int level, bool wait); + +/** + * sb_end_write - drop write access to a superblock + * @sb: the super we wrote to + * + * Decrement number of writers to the filesystem. Wake up possible waiters + * wanting to freeze the filesystem. + */ +static inline void sb_end_write(struct super_block *sb) +{ + __sb_end_write(sb, SB_FREEZE_WRITE); +} + +/** + * sb_end_pagefault - drop write access to a superblock from a page fault + * @sb: the super we wrote to + * + * Decrement number of processes handling write page fault to the filesystem. + * Wake up possible waiters wanting to freeze the filesystem. + */ +static inline void sb_end_pagefault(struct super_block *sb) +{ + __sb_end_write(sb, SB_FREEZE_PAGEFAULT); +} + +/** + * sb_end_intwrite - drop write access to a superblock for internal fs purposes + * @sb: the super we wrote to + * + * Decrement fs-internal number of writers to the filesystem. Wake up possible + * waiters wanting to freeze the filesystem. + */ +static inline void sb_end_intwrite(struct super_block *sb) +{ + __sb_end_write(sb, SB_FREEZE_FS); +} + +/** + * sb_start_write - get write access to a superblock + * @sb: the super we write to + * + * When a process wants to write data or metadata to a file system (i.e. dirty + * a page or an inode), it should embed the operation in a sb_start_write() - + * sb_end_write() pair to get exclusion against file system freezing. This + * function increments number of writers preventing freezing. If the file + * system is already frozen, the function waits until the file system is + * thawed. + * + * Since freeze protection behaves as a lock, users have to preserve + * ordering of freeze protection and other filesystem locks. Generally, + * freeze protection should be the outermost lock. In particular, we have: + * + * sb_start_write + * -> i_mutex (write path, truncate, directory ops, ...) + * -> s_umount (freeze_super, thaw_super) + */ +static inline void sb_start_write(struct super_block *sb) +{ + __sb_start_write(sb, SB_FREEZE_WRITE, true); +} + +static inline int sb_start_write_trylock(struct super_block *sb) +{ + return __sb_start_write(sb, SB_FREEZE_WRITE, false); +} + +/** + * sb_start_pagefault - get write access to a superblock from a page fault + * @sb: the super we write to + * + * When a process starts handling write page fault, it should embed the + * operation into sb_start_pagefault() - sb_end_pagefault() pair to get + * exclusion against file system freezing. This is needed since the page fault + * is going to dirty a page. This function increments number of running page + * faults preventing freezing. If the file system is already frozen, the + * function waits until the file system is thawed. + * + * Since page fault freeze protection behaves as a lock, users have to preserve + * ordering of freeze protection and other filesystem locks. It is advised to + * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault + * handling code implies lock dependency: + * + * mmap_sem + * -> sb_start_pagefault + */ +static inline void sb_start_pagefault(struct super_block *sb) +{ + __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); +} + +/* + * sb_start_intwrite - get write access to a superblock for internal fs purposes + * @sb: the super we write to + * + * This is the third level of protection against filesystem freezing. It is + * free for use by a filesystem. The only requirement is that it must rank + * below sb_start_pagefault. + * + * For example filesystem can call sb_start_intwrite() when starting a + * transaction which somewhat eases handling of freezing for internal sources + * of filesystem changes (internal fs threads, discarding preallocation on file + * close, etc.). + */ +static inline void sb_start_intwrite(struct super_block *sb) +{ + __sb_start_write(sb, SB_FREEZE_FS, true); +} -#define vfs_check_frozen(sb, level) \ - wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) extern bool inode_owner_or_capable(const struct inode *inode); @@ -1886,6 +2021,7 @@ struct file_system_type { struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; struct lock_class_key s_vfs_rename_key; + struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; -- cgit v1.2.3 From eb04c28288bb0098d0e75d81ba2a575239de71d8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:35 +0200 Subject: fs: Add freezing handling to mnt_want_write() / mnt_drop_write() Most of places where we want freeze protection coincides with the places where we also have remount-ro protection. So make mnt_want_write() and mnt_drop_write() (and their _file alternative) prevent freezing as well. For the few cases that are really interested only in remount-ro protection provide new function variants. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/file_table.c | 2 +- fs/inode.c | 4 +-- fs/internal.h | 4 +++ fs/namespace.c | 97 +++++++++++++++++++++++++++++++++++++++++++++------------ fs/open.c | 2 +- 5 files changed, 85 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index b54bf7fd0b15..701985e4ccda 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file) return; if (file_check_writeable(file) != 0) return; - mnt_drop_write(mnt); + __mnt_drop_write(mnt); file_release_write(file); } diff --git a/fs/inode.c b/fs/inode.c index 775cbabd4fa5..006c85ca06eb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1660,11 +1660,11 @@ int file_update_time(struct file *file) return 0; /* Finally allowed to write? Takes lock. */ - if (mnt_want_write_file(file)) + if (__mnt_want_write_file(file)) return 0; ret = update_time(inode, &now, sync_it); - mnt_drop_write_file(file); + __mnt_drop_write_file(file); return ret; } diff --git a/fs/internal.h b/fs/internal.h index a6fd56c68b11..371bcc4b1697 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -61,6 +61,10 @@ extern void __init mnt_init(void); extern struct lglock vfsmount_lock; +extern int __mnt_want_write(struct vfsmount *); +extern int __mnt_want_write_file(struct file *); +extern void __mnt_drop_write(struct vfsmount *); +extern void __mnt_drop_write_file(struct file *); /* * fs_struct.c diff --git a/fs/namespace.c b/fs/namespace.c index c53d3381b0d0..4d31f73e2561 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt) } /* - * Most r/o checks on a fs are for operations that take - * discrete amounts of time, like a write() or unlink(). - * We must keep track of when those operations start - * (for permission checks) and when they end, so that - * we can determine when writes are able to occur to - * a filesystem. + * Most r/o & frozen checks on a fs are for operations that take discrete + * amounts of time, like a write() or unlink(). We must keep track of when + * those operations start (for permission checks) and when they end, so that we + * can determine when writes are able to occur to a filesystem. */ /** - * mnt_want_write - get write access to a mount + * __mnt_want_write - get write access to a mount without freeze protection * @m: the mount on which to take a write * - * This tells the low-level filesystem that a write is - * about to be performed to it, and makes sure that - * writes are allowed before returning success. When - * the write operation is finished, mnt_drop_write() - * must be called. This is effectively a refcount. + * This tells the low-level filesystem that a write is about to be performed to + * it, and makes sure that writes are allowed (mnt it read-write) before + * returning success. This operation does not protect against filesystem being + * frozen. When the write operation is finished, __mnt_drop_write() must be + * called. This is effectively a refcount. */ -int mnt_want_write(struct vfsmount *m) +int __mnt_want_write(struct vfsmount *m) { struct mount *mnt = real_mount(m); int ret = 0; @@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m) ret = -EROFS; } preempt_enable(); + + return ret; +} + +/** + * mnt_want_write - get write access to a mount + * @m: the mount on which to take a write + * + * This tells the low-level filesystem that a write is about to be performed to + * it, and makes sure that writes are allowed (mount is read-write, filesystem + * is not frozen) before returning success. When the write operation is + * finished, mnt_drop_write() must be called. This is effectively a refcount. + */ +int mnt_want_write(struct vfsmount *m) +{ + int ret; + + sb_start_write(m->mnt_sb); + ret = __mnt_want_write(m); + if (ret) + sb_end_write(m->mnt_sb); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); @@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt) EXPORT_SYMBOL_GPL(mnt_clone_write); /** - * mnt_want_write_file - get write access to a file's mount + * __mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * - * This is like mnt_want_write, but it takes a file and can + * This is like __mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already */ -int mnt_want_write_file(struct file *file) +int __mnt_want_write_file(struct file *file) { struct inode *inode = file->f_dentry->d_inode; + if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) - return mnt_want_write(file->f_path.mnt); + return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); } + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + */ +int mnt_want_write_file(struct file *file) +{ + int ret; + + sb_start_write(file->f_path.mnt->mnt_sb); + ret = __mnt_want_write_file(file); + if (ret) + sb_end_write(file->f_path.mnt->mnt_sb); + return ret; +} EXPORT_SYMBOL_GPL(mnt_want_write_file); /** - * mnt_drop_write - give up write access to a mount + * __mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access * * Tells the low-level filesystem that we are done * performing writes to it. Must be matched with - * mnt_want_write() call above. + * __mnt_want_write() call above. */ -void mnt_drop_write(struct vfsmount *mnt) +void __mnt_drop_write(struct vfsmount *mnt) { preempt_disable(); mnt_dec_writers(real_mount(mnt)); preempt_enable(); } + +/** + * mnt_drop_write - give up write access to a mount + * @mnt: the mount on which to give up write access + * + * Tells the low-level filesystem that we are done performing writes to it and + * also allows filesystem to be frozen again. Must be matched with + * mnt_want_write() call above. + */ +void mnt_drop_write(struct vfsmount *mnt) +{ + __mnt_drop_write(mnt); + sb_end_write(mnt->mnt_sb); +} EXPORT_SYMBOL_GPL(mnt_drop_write); +void __mnt_drop_write_file(struct file *file) +{ + __mnt_drop_write(file->f_path.mnt); +} + void mnt_drop_write_file(struct file *file) { mnt_drop_write(file->f_path.mnt); diff --git a/fs/open.c b/fs/open.c index 8d2c8970029c..9ddc18565503 100644 --- a/fs/open.c +++ b/fs/open.c @@ -620,7 +620,7 @@ static inline int __get_file_write_access(struct inode *inode, /* * Balanced in __fput() */ - error = mnt_want_write(mnt); + error = __mnt_want_write(mnt); if (error) put_write_access(inode); } -- cgit v1.2.3 From 5d37e9e6dec65cd21be68ee92de99686213e916b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:36 +0200 Subject: fs: Skip atime update on frozen filesystem It is unexpected to block reading of frozen filesystem because of atime update. Also handling blocking on frozen filesystem because of atime update would make locking more complex than it already is. So just skip atime update when filesystem is frozen like we skip it when filesystem is remounted read-only. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 006c85ca06eb..74d7c20fac88 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1542,9 +1542,11 @@ void touch_atime(struct path *path) if (timespec_equal(&inode->i_atime, &now)) return; - if (mnt_want_write(mnt)) + if (!sb_start_write_trylock(inode->i_sb)) return; + if (__mnt_want_write(mnt)) + goto skip_update; /* * File systems can error out when updating inodes if they need to * allocate new space to modify an inode (such is the case for @@ -1553,7 +1555,9 @@ void touch_atime(struct path *path) * so just ignore the return value. */ update_time(inode, &now, S_ATIME); - mnt_drop_write(mnt); + __mnt_drop_write(mnt); +skip_update: + sb_end_write(inode->i_sb); } EXPORT_SYMBOL(touch_atime); -- cgit v1.2.3 From 14da9200140f8d722ad1767dfabadebd8b34f2ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:37 +0200 Subject: fs: Protect write paths by sb_start_write - sb_end_write There are several entry points which dirty pages in a filesystem. mmap (handled by block_page_mkwrite()), buffered write (handled by __generic_file_aio_write()), splice write (generic_file_splice_write), truncate, and fallocate (these can dirty last partial page - handled inside each filesystem separately). Protect these places with sb_start_write() and sb_end_write(). ->page_mkwrite() calls are particularly complex since they are called with mmap_sem held and thus we cannot use standard sb_start_write() due to lock ordering constraints. We solve the problem by using a special freeze protection sb_start_pagefault() which ranks below mmap_sem. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/buffer.c | 22 ++++------------------ fs/open.c | 7 ++++++- fs/splice.c | 3 +++ mm/filemap.c | 12 ++++++++++-- mm/filemap_xip.c | 5 +++-- 5 files changed, 26 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index d5ec360e332d..9f6d2e41281d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write); * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. * - * Direct callers of this function should call vfs_check_frozen() so that page - * fault does not busyloop until the fs is thawed. + * Direct callers of this function should protect against filesystem freezing + * using sb_start_write() - sb_end_write() functions. */ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) @@ -2345,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret < 0)) goto out_unlock; - /* - * Freezing in progress? We check after the page is marked dirty and - * with page lock held so if the test here fails, we are sure freezing - * code will wait during syncing until the page fault is done - at that - * point page will be dirty and unlocked so freezing code will write it - * and writeprotect it again. - */ set_page_dirty(page); - if (inode->i_sb->s_frozen != SB_UNFROZEN) { - ret = -EAGAIN; - goto out_unlock; - } wait_on_page_writeback(page); return 0; out_unlock: @@ -2371,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, int ret; struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; - /* - * This check is racy but catches the common case. The check in - * __block_page_mkwrite() is reliable. - */ - vfs_check_frozen(sb, SB_FREEZE_WRITE); + sb_start_pagefault(sb); ret = __block_page_mkwrite(vma, vmf, get_block); + sb_end_pagefault(sb); return block_page_mkwrite_return(ret); } EXPORT_SYMBOL(block_page_mkwrite); diff --git a/fs/open.c b/fs/open.c index 9ddc18565503..f3d96e7e7b19 100644 --- a/fs/open.c +++ b/fs/open.c @@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (IS_APPEND(inode)) goto out_putf; + sb_start_write(inode->i_sb); error = locks_verify_truncate(inode, file, length); if (!error) error = security_path_truncate(&file->f_path); if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + sb_end_write(inode->i_sb); out_putf: fput(file); out: @@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!file->f_op->fallocate) return -EOPNOTSUPP; - return file->f_op->fallocate(file, mode, offset, len); + sb_start_write(inode->i_sb); + ret = file->f_op->fallocate(file, mode, offset, len); + sb_end_write(inode->i_sb); + return ret; } SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) diff --git a/fs/splice.c b/fs/splice.c index 7bf08fa22ec9..41514dd89462 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; + sb_start_write(inode->i_sb); + pipe_lock(pipe); splice_from_pipe_begin(&sd); @@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, *ppos += ret; balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } + sb_end_write(inode->i_sb); return ret; } diff --git a/mm/filemap.c b/mm/filemap.c index 51efee65c2cc..fa5ca304148e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1718,6 +1718,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = vma->vm_file->f_path.dentry->d_inode; int ret = VM_FAULT_LOCKED; + sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping) { @@ -1725,7 +1726,14 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; goto out; } + /* + * We mark the page dirty already here so that when freeze is in + * progress, we are guaranteed that writeback during freezing will + * see the dirty page and writeprotect it again. + */ + set_page_dirty(page); out: + sb_end_pagefault(inode->i_sb); return ret; } EXPORT_SYMBOL(filemap_page_mkwrite); @@ -2426,8 +2434,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, count = ocount; pos = *ppos; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; written = 0; @@ -2526,6 +2532,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); @@ -2539,6 +2546,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ret = err; } blk_finish_plug(&plug); + sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL(generic_file_aio_write); diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 80b34ef82dfe..13e013b1270c 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -402,6 +402,8 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t pos; ssize_t ret; + sb_start_write(inode->i_sb); + mutex_lock(&inode->i_mutex); if (!access_ok(VERIFY_READ, buf, len)) { @@ -412,8 +414,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, pos = *ppos; count = len; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -437,6 +437,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, current->backing_dev_info = NULL; out_up: mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL_GPL(xip_file_write); -- cgit v1.2.3 From 8e8ad8a57c75f3bda2d03a4c4396a9a7024ad275 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:38 +0200 Subject: ext4: Convert to new freezing mechanism We remove most of frozen checks since upper layer takes care of blocking all writes. We have to handle protection in ext4_page_mkwrite() in a special way because we cannot use generic block_page_mkwrite(). Also we add a freeze protection to ext4_evict_inode() so that iput() of unlinked inode cannot modify a frozen filesystem (we cannot easily instrument ext4_journal_start() / ext4_journal_stop() with freeze protection because we are missing the superblock pointer in ext4_journal_stop() in nojournal mode). CC: linux-ext4@vger.kernel.org CC: "Theodore Ts'o" BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Acked-by: "Theodore Ts'o" Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ext4/inode.c | 15 ++++++++++----- fs/ext4/mmp.c | 6 ++++++ fs/ext4/super.c | 31 +++++++------------------------ 3 files changed, 23 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 02bc8cbe7281..301e1c2db891 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; + /* + * Protect us against freezing - iput() caller didn't have to have any + * protection against it + */ + sb_start_intwrite(inode->i_sb); handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); @@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); + sb_end_intwrite(inode->i_sb); goto no_delete; } } @@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode) else ext4_free_inode(handle, inode); ext4_journal_stop(handle); + sb_end_intwrite(inode->i_sb); return; no_delete: ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ @@ -4701,11 +4709,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) get_block_t *get_block; int retries = 0; - /* - * This check is racy but catches the common case. We rely on - * __block_page_mkwrite() to do a reliable check. - */ - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_pagefault(inode->i_sb); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -4773,5 +4777,6 @@ retry_alloc: out_ret: ret = block_page_mkwrite_return(ret); out: + sb_end_pagefault(inode->i_sb); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index f99a1311e847..fe7c63f4717e 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) { struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); + /* + * We protect against freezing so that we don't create dirty buffers + * on frozen filesystem. + */ + sb_start_write(sb); ext4_mmp_csum_set(sb, mmp); mark_buffer_dirty(bh); lock_buffer(bh); @@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) get_bh(bh); submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); + sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) return 1; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d8759401ecae..9cc9bfd5176b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -332,33 +332,17 @@ static void ext4_put_nojournal(handle_t *handle) * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if * appropriate. - * - * To avoid j_barrier hold in userspace when a user calls freeze(), - * ext4 prevents a new handle from being started by s_frozen, which - * is in an upper layer. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { journal_t *journal; - handle_t *handle; trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); + WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; - handle = ext4_journal_current_handle(); - - /* - * If a handle has been started, it should be allowed to - * finish, otherwise deadlock could happen between freeze - * and others(e.g. truncate) due to the restart of the - * journal handle if the filesystem is forzen and active - * handles are not stopped. - */ - if (!handle) - vfs_check_frozen(sb, SB_FREEZE_TRANS); - if (!journal) return ext4_get_nojournal(); /* @@ -2723,6 +2707,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) sb = elr->lr_super; ngroups = EXT4_SB(sb)->s_groups_count; + sb_start_write(sb); for (group = elr->lr_next_group; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) { @@ -2749,6 +2734,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; } + sb_end_write(sb); return ret; } @@ -4302,10 +4288,8 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) { - vfs_check_frozen(sb, SB_FREEZE_TRANS); + if (journal) ret = ext4_journal_force_commit(journal); - } return ret; } @@ -4342,9 +4326,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * gives us a chance to flush the journal completely and mark the fs clean. * * Note that only this function cannot bring a filesystem to be in a clean - * state independently, because ext4 prevents a new handle from being started - * by @sb->s_frozen, which stays in an upper layer. It thus needs help from - * the upper layer. + * state independently. It relies on upper layer to stop all data & metadata + * modifications. */ static int ext4_freeze(struct super_block *sb) { @@ -4371,7 +4354,7 @@ static int ext4_freeze(struct super_block *sb) EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: - /* we rely on s_frozen to stop further updates */ + /* we rely on upper layer to stop further updates */ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return error; } -- cgit v1.2.3 From d9457dc056249913a7abe8b71dc09e427e590e35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:39 +0200 Subject: xfs: Convert to new freezing code Generic code now blocks all writers from standard write paths. So we add blocking of all writers coming from ioctl (we get a protection of ioctl against racing remount read-only as a bonus) and convert xfs_file_aio_write() to a non-racy freeze protection. We also keep freeze protection on transaction start to block internal filesystem writes such as removal of preallocated blocks. CC: Ben Myers CC: Alex Elder CC: xfs@oss.sgi.com Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/xfs/xfs_aops.c | 18 +++++++++++++++++ fs/xfs/xfs_file.c | 10 +++++++--- fs/xfs/xfs_ioctl.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_ioctl32.c | 12 ++++++++++++ fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_mount.h | 3 --- fs/xfs/xfs_sync.c | 2 +- fs/xfs/xfs_trans.c | 17 +++++++++++++--- fs/xfs/xfs_trans.h | 2 ++ 10 files changed, 109 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8dad722c0041..daa42383ebd9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -123,6 +123,12 @@ xfs_setfilesize_trans_alloc( ioend->io_append_trans = tp; + /* + * We will pass freeze protection with a transaction. So tell lockdep + * we released it. + */ + rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 1, _THIS_IP_); /* * We hand off the transaction to the completion thread now, so * clear the flag here. @@ -199,6 +205,15 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; + if (ioend->io_append_trans) { + /* + * We've got freeze protection passed with the transaction. + * Tell lockdep about it. + */ + rwsem_acquire_read( + &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { ioend->io_error = -EIO; goto done; @@ -1410,6 +1425,9 @@ out_trans_cancel: if (ioend->io_append_trans) { current_set_flags_nested(&ioend->io_append_trans->t_pflags, PF_FSTRANS); + rwsem_acquire_read( + &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); xfs_trans_cancel(ioend->io_append_trans, 0); } out_destroy_ioend: diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9f7ec15a6522..f0081f20e5c0 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -781,10 +781,12 @@ xfs_file_aio_write( if (ocount == 0) return 0; - xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + ret = -EIO; + goto out; + } if (unlikely(file->f_flags & O_DIRECT)) ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); @@ -803,6 +805,8 @@ xfs_file_aio_write( ret = err; } +out: + sb_end_write(inode->i_sb); return ret; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f1535d25a9b..0e0232c3b6d9 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -364,9 +364,15 @@ xfs_fssetdm_by_handle( if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(parfilp); + if (error) + return error; + dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + mnt_drop_write_file(parfilp); return PTR_ERR(dentry); + } if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { error = -XFS_ERROR(EPERM); @@ -382,6 +388,7 @@ xfs_fssetdm_by_handle( fsd.fsd_dmstate); out: + mnt_drop_write_file(parfilp); dput(dentry); return error; } @@ -634,7 +641,11 @@ xfs_ioc_space( if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); + mnt_drop_write_file(filp); return -error; } @@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr( { struct fsxattr fa; unsigned int mask; + int error; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; @@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) mask |= FSX_NONBLOCK; - return -xfs_ioctl_setattr(ip, &fa, mask); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_ioctl_setattr(ip, &fa, mask); + mnt_drop_write_file(filp); + return -error; } STATIC int @@ -1196,6 +1213,7 @@ xfs_ioc_setxflags( struct fsxattr fa; unsigned int flags; unsigned int mask; + int error; if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -1210,7 +1228,12 @@ xfs_ioc_setxflags( mask |= FSX_NONBLOCK; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - return -xfs_ioctl_setattr(ip, &fa, mask); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_ioctl_setattr(ip, &fa, mask); + mnt_drop_write_file(filp); + return -error; } STATIC int @@ -1385,8 +1408,13 @@ xfs_file_ioctl( if (copy_from_user(&dmi, arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); + mnt_drop_write_file(filp); return -error; } @@ -1434,7 +1462,11 @@ xfs_file_ioctl( if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_swapext(&sxp); + mnt_drop_write_file(filp); return -error; } @@ -1463,9 +1495,14 @@ xfs_file_ioctl( if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; + /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); + mnt_drop_write_file(filp); if (error) return -error; @@ -1496,7 +1533,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_data(mp, &in); + mnt_drop_write_file(filp); return -error; } @@ -1506,7 +1547,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_log(mp, &in); + mnt_drop_write_file(filp); return -error; } @@ -1516,7 +1561,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_rt(mp, &in); + mnt_drop_write_file(filp); return -error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c4f2da0d2bf5..1244274a5674 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -600,7 +600,11 @@ xfs_file_compat_ioctl( if (xfs_compat_growfs_data_copyin(&in, arg)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_data(mp, &in); + mnt_drop_write_file(filp); return -error; } case XFS_IOC_FSGROWFSRT_32: { @@ -608,7 +612,11 @@ xfs_file_compat_ioctl( if (xfs_compat_growfs_rt_copyin(&in, arg)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_rt(mp, &in); + mnt_drop_write_file(filp); return -error; } #endif @@ -627,7 +635,11 @@ xfs_file_compat_ioctl( offsetof(struct xfs_swapext, sx_stat)) || xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_swapext(&sxp); + mnt_drop_write_file(filp); return -error; } case XFS_IOC_FSBULKSTAT_32: diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index aadfce6681ee..b3b9b26091a3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -680,9 +680,9 @@ xfs_iomap_write_unwritten( * the same inode that we complete here and might deadlock * on the iolock. */ - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + sb_start_intwrite(mp->m_super); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); - tp->t_flags |= XFS_TRANS_RESERVE; + tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 536021fb3d4e..b09a4a7eb640 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1544,7 +1544,7 @@ xfs_unmountfs( int xfs_fs_writable(xfs_mount_t *mp) { - return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || + return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 90c1fc9eaea4..c6bca0d92cb1 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -314,9 +314,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - /* * Flags for xfs_mountfs */ diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 1e9ee064dbb2..0b9feacdcd1f 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -394,7 +394,7 @@ xfs_sync_worker( if (!(mp->m_super->s_flags & MS_ACTIVE) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && + if (mp->m_super->s_writers.frozen == SB_UNFROZEN && xfs_log_need_covered(mp)) error = xfs_fs_log_dummy(mp); else diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdf324508c5e..06ed520a767f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -576,8 +576,12 @@ xfs_trans_alloc( xfs_mount_t *mp, uint type) { - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - return _xfs_trans_alloc(mp, type, KM_SLEEP); + xfs_trans_t *tp; + + sb_start_intwrite(mp->m_super); + tp = _xfs_trans_alloc(mp, type, KM_SLEEP); + tp->t_flags |= XFS_TRANS_FREEZE_PROT; + return tp; } xfs_trans_t * @@ -588,6 +592,7 @@ _xfs_trans_alloc( { xfs_trans_t *tp; + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); tp = kmem_zone_zalloc(xfs_trans_zone, memflags); @@ -611,6 +616,8 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); atomic_dec(&tp->t_mountp->m_active_trans); + if (tp->t_flags & XFS_TRANS_FREEZE_PROT) + sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); kmem_zone_free(xfs_trans_zone, tp); } @@ -643,7 +650,11 @@ xfs_trans_dup( ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); - ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); + ntp->t_flags = XFS_TRANS_PERM_LOG_RES | + (tp->t_flags & XFS_TRANS_RESERVE) | + (tp->t_flags & XFS_TRANS_FREEZE_PROT); + /* We gave our writer reference to the new transaction */ + tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7c37b533aa8e..19c174282878 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -179,6 +179,8 @@ struct xfs_log_item_desc { #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer + count in superblock */ /* * Values for call flags parameter. -- cgit v1.2.3 From fef6925cd4c6b564ecff477e07a0fca987542223 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:40 +0200 Subject: ocfs2: Convert to new freezing mechanism Protect ocfs2_page_mkwrite() and ocfs2_file_aio_write() using the new freeze protection. We also protect several ioctl entry points which were missing the protection. Finally, we add freeze protection to the journaling mechanism so that iput() of unlinked inode cannot modify a frozen filesystem. CC: Mark Fasheh CC: Joel Becker CC: ocfs2-devel@oss.oracle.com Acked-by: Joel Becker Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ocfs2/file.c | 11 +++++++++-- fs/ocfs2/ioctl.c | 14 ++++++++++++-- fs/ocfs2/journal.c | 7 ++++++- fs/ocfs2/mmap.c | 2 ++ 4 files changed, 29 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7602783d7f41..46a1f6d75104 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, { struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int ret; if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && !ocfs2_writes_unwritten_extents(osb)) @@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + ret = mnt_want_write_file(file); + if (ret) + return ret; + ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + mnt_drop_write_file(file); + return ret; } static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, @@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (iocb->ki_left == 0) return 0; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); appending = file->f_flags & O_APPEND ? 1 : 0; direct_io = file->f_flags & O_DIRECT ? 1 : 0; @@ -2436,6 +2442,7 @@ out_sems: ocfs2_iocb_clear_sem_locked(iocb); mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); if (written) ret = written; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d96f7f81d8dd..f20edcbfe700 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (get_user(new_clusters, (int __user *)arg)) return -EFAULT; - return ocfs2_group_extend(inode, new_clusters); + status = mnt_want_write_file(filp); + if (status) + return status; + status = ocfs2_group_extend(inode, new_clusters); + mnt_drop_write_file(filp); + return status; case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: if (!capable(CAP_SYS_RESOURCE)) @@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (copy_from_user(&input, (int __user *) arg, sizeof(input))) return -EFAULT; - return ocfs2_group_add(inode, &input); + status = mnt_want_write_file(filp); + if (status) + return status; + status = ocfs2_group_add(inode, &input); + mnt_drop_write_file(filp); + return status; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 0a42ae96dca7..2dd36af79e26 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) if (journal_current_handle()) return jbd2_journal_start(journal, max_buffs); + sb_start_intwrite(osb->sb); + down_read(&osb->journal->j_trans_barrier); handle = jbd2_journal_start(journal, max_buffs); if (IS_ERR(handle)) { up_read(&osb->journal->j_trans_barrier); + sb_end_intwrite(osb->sb); mlog_errno(PTR_ERR(handle)); @@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, if (ret < 0) mlog_errno(ret); - if (!nested) + if (!nested) { up_read(&journal->j_trans_barrier); + sb_end_intwrite(osb->sb); + } return ret; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 9cd41083e991..d150372fd81d 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sigset_t oldset; int ret; + sb_start_pagefault(inode->i_sb); ocfs2_block_signals(&oldset); /* @@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out: ocfs2_unblock_signals(&oldset); + sb_end_pagefault(inode->i_sb); return ret; } -- cgit v1.2.3 From 39263d5e71d0fad09eab0d855a9407ad2af8378c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:41 +0200 Subject: gfs2: Convert to new freezing mechanism We update gfs2_page_mkwrite() to use new freeze protection and the transaction code to use freeze protection while the transaction is running. That is needed to stop iput() of unlinked file from modifying the filesystem. The rest is handled by the generic code. CC: cluster-devel@redhat.com CC: Steven Whitehouse Acked-by: Steven Whitehouse Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/gfs2/file.c | 15 +++------------ fs/gfs2/trans.c | 4 ++++ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 07959150d44e..8ffeb031248e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -370,11 +370,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size; int ret; - /* Wait if fs is frozen. This is racy so we check again later on - * and retry if the fs has been frozen after the page lock has - * been acquired - */ - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); @@ -458,14 +454,9 @@ out: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); - /* This check must be post dropping of transaction lock */ - if (inode->i_sb->s_frozen == SB_UNFROZEN) { - wait_on_page_writeback(page); - } else { - ret = -EAGAIN; - unlock_page(page); - } + wait_on_page_writeback(page); } + sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ad3e2fb763d7..adbd27875ef9 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes, sizeof(u64)); + sb_start_intwrite(sdp->sd_vfs); gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); error = gfs2_glock_nq(&tr->tr_t_gh); @@ -68,6 +69,7 @@ fail_gunlock: gfs2_glock_dq(&tr->tr_t_gh); fail_holder_uninit: + sb_end_intwrite(sdp->sd_vfs); gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); @@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); } + sb_end_intwrite(sdp->sd_vfs); return; } @@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) gfs2_log_flush(sdp, NULL); + sb_end_intwrite(sdp->sd_vfs); } /** -- cgit v1.2.3 From 58ef6a75c38e9faa7d19bb7d7b45fe0df02e8621 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:42 +0200 Subject: fuse: Convert to new freezing mechanism Convert check in fuse_file_aio_write() to using new freeze protection. CC: fuse-devel@lists.sourceforge.net CC: Miklos Szeredi Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/fuse/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b321a688cde7..93d8d6c9494d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return err; count = ocount; - + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, out: current->backing_dev_info = NULL; mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); return written ? written : err; } -- cgit v1.2.3 From fbf8fb76505a9e5bbb47e91d964105b6ea59b0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:43 +0200 Subject: ntfs: Convert to new freezing mechanism Move check in ntfs_file_aio_write_nolock() to ntfs_file_aio_write() and use new freeze protection. CC: linux-ntfs-dev@lists.sourceforge.net CC: Anton Altaparmakov Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ntfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 7389d2d5e51d..1ecf46448f85 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, if (err) return err; pos = *ppos; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim. */ current->backing_dev_info = mapping->backing_dev_info; written = 0; @@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0) ret = err; } + sb_end_write(inode->i_sb); return ret; } -- cgit v1.2.3 From 2c22b337b5bbb497c41b348b2357b7070ed5ba88 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:44 +0200 Subject: nilfs2: Convert to new freezing mechanism We change nilfs_page_mkwrite() to provide proper freeze protection for writeable page faults (we must wait for frozen filesystem even if the page is fully mapped). We remove all vfs_check_frozen() checks since they are now handled by the generic code. CC: linux-nilfs@vger.kernel.org CC: KONISHI Ryusuke Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/nilfs2/file.c | 18 +++++++++++------- fs/nilfs2/ioctl.c | 2 -- fs/nilfs2/segment.c | 5 ++++- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 62cebc8e1a1f..a4d56ac02e6c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_dentry->d_inode; struct nilfs_transaction_info ti; - int ret; + int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ + sb_start_pagefault(inode->i_sb); lock_page(page); if (page->mapping != inode->i_mapping || page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); - return VM_FAULT_NOPAGE; /* make the VM retry the fault */ + ret = -EFAULT; /* make the VM retry the fault */ + goto out; } /* @@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); /* never returns -ENOMEM, but may return -ENOSPC */ if (unlikely(ret)) - return VM_FAULT_SIGBUS; + goto out; - ret = block_page_mkwrite(vma, vmf, nilfs_get_block); - if (ret != VM_FAULT_LOCKED) { + ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + if (ret) { nilfs_transaction_abort(inode->i_sb); - return ret; + goto out; } nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: wait_on_page_writeback(page); - return VM_FAULT_LOCKED; + out: + sb_end_pagefault(inode->i_sb); + return block_page_mkwrite_return(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 06658caa18bd..08f27968a7a9 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; } - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 88e11fb346b6..a5752a589932 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; - vfs_check_frozen(sb, SB_FREEZE_WRITE); + sb_start_intwrite(sb); nilfs = sb->s_fs_info; down_read(&nilfs->ns_segctor_sem); @@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb, current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return ret; } @@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb) err = nilfs_construct_segment(sb); if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return err; } @@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb) current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); } void nilfs_relax_pressure_in_lock(struct super_block *sb) -- cgit v1.2.3 From b2b5ef5c8e89f19b68c174bf246f3ca212dbf0bc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:45 +0200 Subject: btrfs: Convert to new freezing mechanism We convert btrfs_file_aio_write() to use new freeze check. We also add proper freeze protection to btrfs_page_mkwrite(). We also add freeze protection to the transaction mechanism to avoid starting transactions on frozen filesystem. At minimum this is necessary to stop iput() of unlinked file to change frozen filesystem during truncation. Checks in cleaner_kthread() and transaction_kthread() can be safely removed since btrfs_freeze() will lock the mutexes and thus block the threads (and they shouldn't have anything to do anyway). CC: linux-btrfs@vger.kernel.org CC: Chris Mason Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 3 --- fs/btrfs/file.c | 3 ++- fs/btrfs/inode.c | 6 +++++- fs/btrfs/transaction.c | 7 +++++++ 4 files changed, 14 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2936ca49b3b4..20c49b16b759 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1533,8 +1533,6 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; do { - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - if (!(root->fs_info->sb->s_flags & MS_RDONLY) && mutex_trylock(&root->fs_info->cleaner_mutex)) { btrfs_run_delayed_iputs(root); @@ -1566,7 +1564,6 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; delay = HZ * 30; - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9aa01ec2138d..5caf285c6e4d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ssize_t err = 0; size_t count, ocount; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); @@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, num_written = err; } out: + sb_end_write(inode->i_sb); current->backing_dev_info = NULL; return num_written ? num_written : err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb8d671d00e6..f4d901786b3c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6620,6 +6620,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; + sb_start_pagefault(inode->i_sb); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { ret = file_update_time(vma->vm_file); @@ -6709,12 +6710,15 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); out_unlock: - if (!ret) + if (!ret) { + sb_end_pagefault(inode->i_sb); return VM_FAULT_LOCKED; + } unlock_page(page); out: btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); out_noreserve: + sb_end_pagefault(inode->i_sb); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b72b068183ec..fa67ba51516e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -329,6 +329,8 @@ again: if (!h) return ERR_PTR(-ENOMEM); + sb_start_intwrite(root->fs_info->sb); + if (may_wait_transaction(root, type)) wait_current_trans(root); @@ -339,6 +341,7 @@ again: } while (ret == -EBUSY); if (ret < 0) { + sb_end_intwrite(root->fs_info->sb); kmem_cache_free(btrfs_trans_handle_cachep, h); return ERR_PTR(ret); } @@ -528,6 +531,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, count++; } + sb_end_intwrite(root->fs_info->sb); + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { trans->transaction->blocked = 1; @@ -1517,6 +1522,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + sb_end_intwrite(root->fs_info->sb); + trace_btrfs_transaction_commit(root); btrfs_scrub_continue(root); -- cgit v1.2.3 From 1e8b212fe5dcee9d3dbb152d235f3c33458fb26e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:46 +0200 Subject: ext2: Implement freezing The only missing piece to make freezing work reliably with ext2 is to stop iput() of unlinked inode from deleting the inode on frozen filesystem. So add a necessary protection to ext2_evict_inode(). We also provide appropriate ->freeze_fs and ->unfreeze_fs functions. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ext2/inode.c | 5 ++++- fs/ext2/super.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 264d315f6c47..6363ac66fafa 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode) truncate_inode_pages(&inode->i_data, 0); if (want_delete) { + sb_start_intwrite(inode->i_sb); /* set dtime */ EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); @@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode) if (unlikely(rsv)) kfree(rsv); - if (want_delete) + if (want_delete) { ext2_free_inode(inode); + sb_end_intwrite(inode->i_sb); + } } typedef struct { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5df3d2d8169c..15761e638dae 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb, static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); +static int ext2_freeze(struct super_block *sb); +static int ext2_unfreeze(struct super_block *sb); void ext2_error(struct super_block *sb, const char *function, const char *fmt, ...) @@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = { .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, .sync_fs = ext2_sync_fs, + .freeze_fs = ext2_freeze, + .unfreeze_fs = ext2_unfreeze, .statfs = ext2_statfs, .remount_fs = ext2_remount, .show_options = ext2_show_options, @@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait) return 0; } +static int ext2_freeze(struct super_block *sb) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + + /* + * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared + * because we have unattached inodes and thus filesystem is not fully + * consistent. + */ + if (atomic_long_read(&sb->s_remove_count)) { + ext2_sync_fs(sb, 1); + return 0; + } + /* Set EXT2_FS_VALID flag */ + spin_lock(&sbi->s_lock); + sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state); + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, sbi->s_es, 1); + + return 0; +} + +static int ext2_unfreeze(struct super_block *sb) +{ + /* Just write sb to clear EXT2_VALID_FS flag */ + ext2_write_super(sb); + + return 0; +} void ext2_write_super(struct super_block *sb) { -- cgit v1.2.3 From d9c95bdd53a8d9116d269c91ce3d151472e6bcd6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:47 +0200 Subject: fs: Remove old freezing mechanism Now that all users are converted, we can remove functions, variables, and constants defined by the old freezing mechanism. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 1 - include/linux/fs.h | 5 ----- 2 files changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 0f64ecb7b1bf..a87dc1b1ac92 100644 --- a/fs/super.c +++ b/fs/super.c @@ -217,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) mutex_init(&s->s_dquot.dqio_mutex); mutex_init(&s->s_dquot.dqonoff_mutex); init_rwsem(&s->s_dquot.dqptr_sem); - init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; diff --git a/include/linux/fs.h b/include/linux/fs.h index aefed9426b03..0f4b79be8717 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1459,7 +1459,6 @@ extern spinlock_t sb_lock; enum { SB_UNFROZEN = 0, /* FS is unfrozen */ SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ - SB_FREEZE_TRANS = 2, SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop * internal threads if needed) */ @@ -1528,8 +1527,6 @@ struct super_block { struct hlist_node s_instances; struct quota_info s_dquot; /* Diskquota specific options */ - int s_frozen; - wait_queue_head_t s_wait_unfrozen; struct sb_writers s_writers; char s_id[32]; /* Informational name */ @@ -1585,8 +1582,6 @@ extern struct timespec current_fs_time(struct super_block *sb); /* * Snapshotting support. */ -/* Will go away when all users are converted */ -#define vfs_check_frozen(sb, level) do { } while (0) void __sb_end_write(struct super_block *sb, int level); int __sb_start_write(struct super_block *sb, int level, bool wait); -- cgit v1.2.3 From b042414feb240df5f5911b9bca39b61e6738e814 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 31 Jul 2012 14:40:12 +1000 Subject: nfs: increase number of permitted callback connections. By default a sunrpc service is limited to (N+3)*20 connections where N is the number of threads. This is 80 when N==1. If this number is exceeded a warning is printed suggesting that the number of threads be increased. However with services which run a single thread, this is impossible. For such services there is a ->sv_maxconn setting that can be used to forcibly increase the limit, and silence the message. This is used by lockd. The nfs client uses a sunrpc service to handle callbacks and it too is single-threaded, so to avoid the useless messages, and to allow a reasonable number of concurrent connections, we need to set ->sv_maxconn. 1024 seems like a good number. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index ca3ac992028b..4c8459e5bdee 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -241,6 +241,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) ret = -ENOMEM; goto out_err; } + /* As there is only one thread we need to over-ride the + * default maximum of 80 connections + */ + serv->sv_maxconn = 1024; ret = svc_bind(serv, net); if (ret < 0) { -- cgit v1.2.3 From ad0fcd4eb68059de02e1766948263c71b8a5b1dc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 15:46:23 -0400 Subject: nfs: explicitly reject LOCK_MAND flock() requests We have no mechanism to emulate LOCK_MAND locks on NFSv4, so explicitly return -EINVAL if someone requests it. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1557978ca7b3..b039a17ee941 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -834,6 +834,15 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + /* + * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of + * any standard. In principle we might be able to support LOCK_MAND + * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the + * NFS code is not set up for it. + */ + if (fl->fl_type & LOCK_MAND) + return -EINVAL; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; -- cgit v1.2.3 From 3965c9ae47d64aadf6f13b6fcd37767b83c0689a Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 31 Jul 2012 16:41:52 -0700 Subject: mm: prepare for removal of obsolete /proc/sys/vm/nr_pdflush_threads Since per-BDI flusher threads were introduced in 2.6, the pdflush mechanism is not used any more. But the old interface exported through /proc/sys/vm/nr_pdflush_threads still exists and is obviously useless. For back-compatibility, printk warning information and return 2 to notify the users that the interface is removed. Signed-off-by: Wanpeng Li Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../ABI/obsolete/proc-sys-vm-nr_pdflush_threads | 5 +++++ Documentation/feature-removal-schedule.txt | 8 ++++++++ Documentation/sysctl/vm.txt | 11 ----------- fs/fs-writeback.c | 5 ----- include/linux/backing-dev.h | 3 +++ include/linux/writeback.h | 5 ----- kernel/sysctl.c | 8 +++----- kernel/sysctl_binary.c | 2 +- mm/backing-dev.c | 20 ++++++++++++++++++++ 9 files changed, 40 insertions(+), 27 deletions(-) create mode 100644 Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads (limited to 'fs') diff --git a/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads new file mode 100644 index 000000000000..b0b0eeb20fe3 --- /dev/null +++ b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads @@ -0,0 +1,5 @@ +What: /proc/sys/vm/nr_pdflush_threads +Date: June 2012 +Contact: Wanpeng Li +Description: Since pdflush is replaced by per-BDI flusher, the interface of old pdflush + exported in /proc/sys/vm/ should be removed. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index e9237fb71950..88f2fa48bb63 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -13,6 +13,14 @@ Who: Jim Cromie , Jason Baron --------------------------- +What: /proc/sys/vm/nr_pdflush_threads +When: 2012 +Why: Since pdflush is deprecated, the interface exported in /proc/sys/vm/ + should be removed. +Who: Wanpeng Li + +--------------------------- + What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle When: 2012 Why: This optional sub-feature of APM is of dubious reliability, diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 84eb25cd69aa..06d662b1c5d5 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -42,7 +42,6 @@ Currently, these files are in /proc/sys/vm: - mmap_min_addr - nr_hugepages - nr_overcommit_hugepages -- nr_pdflush_threads - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -426,16 +425,6 @@ See Documentation/vm/hugetlbpage.txt ============================================================== -nr_pdflush_threads - -The current number of pdflush threads. This value is read-only. -The value changes according to the number of dirty pages in the system. - -When necessary, additional pdflush threads are created, one per second, up to -nr_pdflush_threads_max. - -============================================================== - nr_trim_pages This is available only on NOMMU kernels. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 50d0b78130a1..be3efc4f64f4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,11 +52,6 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; -/* - * We don't actually have pdflush, but this one is exported though /proc... - */ -int nr_pdflush_threads; - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 489de625cd25..c97c6b9cd38e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,6 +17,7 @@ #include #include #include +#include struct page; struct device; @@ -304,6 +305,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync); void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); long wait_iff_congested(struct zone *zone, int sync, long timeout); +int pdflush_proc_obsolete(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 6d0a0fcd80e7..c66fe3332d83 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -189,9 +189,4 @@ void tag_pages_for_writeback(struct address_space *mapping, void account_page_redirty(struct page *page); -/* pdflush.c */ -extern int nr_pdflush_threads; /* Global so it can be exported to sysctl - read-only. */ - - #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 97186b99b0e4..6502d35a25ba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1101,11 +1101,9 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { - .procname = "nr_pdflush_threads", - .data = &nr_pdflush_threads, - .maxlen = sizeof nr_pdflush_threads, - .mode = 0444 /* read-only*/, - .proc_handler = proc_dointvec, + .procname = "nr_pdflush_threads", + .mode = 0444 /* read-only */, + .proc_handler = pdflush_proc_obsolete, }, { .procname = "swappiness", diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a1..65bdcf198d4e 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -147,7 +147,7 @@ static const struct bin_table bin_vm_table[] = { { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ - { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" }, + /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */ { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, /* VM_PAGEBUF unused */ /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3387aea11209..6b4718e2ee34 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -886,3 +886,23 @@ out: return ret; } EXPORT_SYMBOL(wait_iff_congested); + +int pdflush_proc_obsolete(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char kbuf[] = "0\n"; + + if (*ppos) { + *lenp = 0; + return 0; + } + + if (copy_to_user(buffer, kbuf, sizeof(kbuf))) + return -EFAULT; + printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal\n", + table->procname); + + *lenp = 2; + *ppos += *lenp; + return 2; +} -- cgit v1.2.3 From 24669e58477e2752c1fbca9c1c988e9dd0d79d15 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Tue, 31 Jul 2012 16:42:03 -0700 Subject: hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages Use a mmu_gather instead of a temporary linked list for accumulating pages when we unmap a hugepage range Signed-off-by: Aneesh Kumar K.V Reviewed-by: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Hillf Danton Cc: Michal Hocko Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- include/linux/hugetlb.h | 22 +++++++++++++----- mm/hugetlb.c | 59 +++++++++++++++++++++++++++++-------------------- mm/memory.c | 7 ++++-- 4 files changed, 59 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e13e9bdb0bf5..8349a899912e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -416,8 +416,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) else v_offset = 0; - __unmap_hugepage_range(vma, - vma->vm_start + v_offset, vma->vm_end, NULL); + unmap_hugepage_range(vma, vma->vm_start + v_offset, + vma->vm_end, NULL); } } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 217f52859fa7..0f23c1840c9b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -7,6 +7,7 @@ struct ctl_table; struct user_struct; +struct mmu_gather; #ifdef CONFIG_HUGETLB_PAGE @@ -40,9 +41,10 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, unsigned int flags); void unmap_hugepage_range(struct vm_area_struct *, - unsigned long, unsigned long, struct page *); -void __unmap_hugepage_range(struct vm_area_struct *, - unsigned long, unsigned long, struct page *); + unsigned long, unsigned long, struct page *); +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); @@ -98,7 +100,6 @@ static inline unsigned long hugetlb_total_pages(void) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) -#define unmap_hugepage_range(vma, start, end, page) BUG() static inline void hugetlb_report_meminfo(struct seq_file *m) { } @@ -112,13 +113,24 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define huge_pte_offset(mm, address) 0 -#define dequeue_hwpoisoned_huge_page(page) 0 +static inline int dequeue_hwpoisoned_huge_page(struct page *page) +{ + return 0; +} + static inline void copy_huge_page(struct page *dst, struct page *src) { } #define hugetlb_change_protection(vma, address, end, newprot) +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start, + unsigned long end, struct page *ref_page) +{ + BUG(); +} + #endif /* !CONFIG_HUGETLB_PAGE */ #define HUGETLB_ANON_FILE "anon_hugepage" diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b1e0ed1ea912..e54b695336f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,8 +24,9 @@ #include #include -#include +#include +#include #include #include #include "internal.h" @@ -2310,30 +2311,26 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) return 0; } -void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page) +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page) { + int force_flush = 0; struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *ptep; pte_t pte; struct page *page; - struct page *tmp; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - /* - * A page gathering list, protected by per file i_mmap_mutex. The - * lock is used to avoid list corruption from multiple unmapping - * of the same page since we are using page->lru. - */ - LIST_HEAD(page_list); - WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); + tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, start, end); +again: spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); @@ -2372,30 +2369,45 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } pte = huge_ptep_get_and_clear(mm, address, ptep); + tlb_remove_tlb_entry(tlb, ptep, address); if (pte_dirty(pte)) set_page_dirty(page); - list_add(&page->lru, &page_list); + page_remove_rmap(page); + force_flush = !__tlb_remove_page(tlb, page); + if (force_flush) + break; /* Bail out after unmapping reference page if supplied */ if (ref_page) break; } - flush_tlb_range(vma, start, end); spin_unlock(&mm->page_table_lock); - mmu_notifier_invalidate_range_end(mm, start, end); - list_for_each_entry_safe(page, tmp, &page_list, lru) { - page_remove_rmap(page); - list_del(&page->lru); - put_page(page); + /* + * mmu_gather ran out of room to batch pages, we break out of + * the PTE lock to avoid doing the potential expensive TLB invalidate + * and page-free while holding it. + */ + if (force_flush) { + force_flush = 0; + tlb_flush_mmu(tlb); + if (address < end && !ref_page) + goto again; } + mmu_notifier_invalidate_range_end(mm, start, end); + tlb_end_vma(tlb, vma); } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { - mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); - __unmap_hugepage_range(vma, start, end, ref_page); - mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + struct mm_struct *mm; + struct mmu_gather tlb; + + mm = vma->vm_mm; + + tlb_gather_mmu(&tlb, mm, 0); + __unmap_hugepage_range(&tlb, vma, start, end, ref_page); + tlb_finish_mmu(&tlb, start, end); } /* @@ -2440,9 +2452,8 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * from the time of fork. This would look like data corruption */ if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) - __unmap_hugepage_range(iter_vma, - address, address + huge_page_size(h), - page); + unmap_hugepage_range(iter_vma, address, + address + huge_page_size(h), page); } mutex_unlock(&mapping->i_mmap_mutex); diff --git a/mm/memory.c b/mm/memory.c index 91f69459d3e8..59e5bebc2e35 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1343,8 +1343,11 @@ static void unmap_single_vma(struct mmu_gather *tlb, * Since no pte has actually been setup, it is * safe to do nothing in this case. */ - if (vma->vm_file) - unmap_hugepage_range(vma, start, end, NULL); + if (vma->vm_file) { + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); + __unmap_hugepage_range(tlb, vma, start, end, NULL); + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + } } else unmap_page_range(tlb, vma, start, end, details); } -- cgit v1.2.3 From 8e125cd85517c9716695b0abfabc0a4a3fcb94f3 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 31 Jul 2012 16:43:16 -0700 Subject: vmscan: remove obsolete shrink_control comment 09f363c7 ("vmscan: fix shrinker callback bug in fs/super.c") fixed a shrinker callback which was returning -1 when nr_to_scan is zero, which caused excessive slab scanning. But 635697c6 ("vmscan: fix initial shrinker size handling") fixed the problem, again so we can freely return -1 although nr_to_scan is zero. So let's revert 09f363c7 because the comment added in 09f363c7 made an unnecessary rule. Signed-off-by: Minchan Kim Cc: Al Viro Cc: Mikulas Patocka Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 2 +- include/linux/shrinker.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 4c5d82f56ec4..4bf714459a4b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -62,7 +62,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) return -1; if (!grab_super_passive(sb)) - return !sc->nr_to_scan ? 0 : -1; + return -1; if (sb->s_op && sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb); diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 07ceb97d53fa..ac6b8ee07825 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -20,7 +20,6 @@ struct shrink_control { * 'nr_to_scan' entries and attempt to free them up. It should return * the number of objects which remain in the cache. If it returns -1, it means * it cannot do any scanning at this time (eg. there is a risk of deadlock). - * The callback must not return -1 if nr_to_scan is zero. * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. -- cgit v1.2.3 From d56b4ddf7781ef8dd050542781cc7f55673af002 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:06 -0700 Subject: nfs: teach the NFS client how to treat PG_swapcache pages Replace all relevant occurences of page->index and page->mapping in the NFS client with the new page_file_index() and page_file_mapping() functions. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/file.c | 6 +++--- fs/nfs/internal.h | 7 ++++--- fs/nfs/pagelist.c | 2 +- fs/nfs/read.c | 6 +++--- fs/nfs/write.c | 36 ++++++++++++++++++------------------ 5 files changed, 29 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 70d124a61b98..acd4e4cd2906 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -430,7 +430,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_cancel(page->mapping->host, page); + nfs_wb_page_cancel(page_file_mapping(page)->host, page); nfs_fscache_invalidate_page(page, page->mapping->host); } @@ -472,7 +472,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) */ static int nfs_launder_page(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", @@ -521,7 +521,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); lock_page(page); - mapping = page->mapping; + mapping = page_file_mapping(page); if (mapping != dentry->d_inode->i_mapping) goto out_unlock; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cfafd13b6fe9..4be14b3e0a18 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -546,13 +546,14 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page->mapping->host); + loff_t i_size = i_size_read(page_file_mapping(page)->host); if (i_size > 0) { + pgoff_t page_index = page_file_index(page); pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index < end_index) + if (page_index < end_index) return PAGE_CACHE_SIZE; - if (page->index == end_index) + if (page_index == end_index) return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; } return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f4..9ef8b3cf7fc7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -117,7 +117,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - req->wb_index = page->index; + req->wb_index = page_file_index(page); page_cache_get(page); req->wb_offset = offset; req->wb_pgbase = offset; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6267b873bbcb..7cb020782687 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -522,11 +522,11 @@ static const struct rpc_call_ops nfs_read_common_ops = { int nfs_readpage(struct file *file, struct page *page) { struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int error; dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_CACHE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); nfs_add_stats(inode, NFSIOS_READPAGES, 1); @@ -580,7 +580,7 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f312860c15d0..d0feca32b4fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,7 +153,7 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) static struct nfs_page *nfs_page_find_request(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); @@ -165,16 +165,16 @@ static struct nfs_page *nfs_page_find_request(struct page *page) /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (i_size > 0 && page->index < end_index) + if (i_size > 0 && page_file_index(page) < end_index) goto out; - end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; i_size_write(inode, end); @@ -187,7 +187,7 @@ out: static void nfs_set_pageerror(struct page *page) { SetPageError(page); - nfs_zap_mapping(page->mapping->host, page->mapping); + nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } /* We can set the PG_uptodate flag if we see that a write request @@ -228,7 +228,7 @@ static int nfs_set_page_writeback(struct page *page) int ret = test_set_page_writeback(page); if (!ret) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > @@ -242,7 +242,7 @@ static int nfs_set_page_writeback(struct page *page) static void nfs_end_page_writeback(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); @@ -252,7 +252,7 @@ static void nfs_end_page_writeback(struct page *page) static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; int ret; @@ -313,13 +313,13 @@ out: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - nfs_pageio_cond_complete(pgio, page->index); + nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); @@ -336,7 +336,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page->mapping->host)->write_pageio_init(&pgio, + NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, page->mapping->host, wb_priority(wbc), &nfs_async_write_completion_ops); @@ -471,7 +471,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -538,7 +538,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } static void @@ -789,7 +789,7 @@ out_err: static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; req = nfs_try_to_update_request(inode, page, offset, bytes); @@ -842,7 +842,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_page(page->mapping->host, page); + status = nfs_wb_page(page_file_mapping(page)->host, page); } while (status == 0); return status; } @@ -872,7 +872,7 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -880,7 +880,7 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) + offset)); + (long long)(page_file_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -1469,7 +1469,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); -- cgit v1.2.3 From 29418aa4bd487c82016733ef5c6a06d656ed3c7d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:10 -0700 Subject: nfs: disable data cache revalidation for swapfiles The VM does not like PG_private set on PG_swapcache pages. As suggested by Trond in http://lkml.org/lkml/2006/8/25/348, this patch disables NFS data cache revalidation on swap files. as it does not make sense to have other clients change the file while it is being used as swap. This avoids setting PG_private on swap pages, since there ought to be no further races with invalidate_inode_pages2() to deal with. Since we cannot set PG_private we cannot use page->private which is already used by PG_swapcache pages to store the nfs_page. Thus augment the new nfs_page_find_request logic. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 4 ++++ fs/nfs/write.c | 49 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 35f7e4bc680e..1d57fe9f49a9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -882,6 +882,10 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + goto out; + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d0feca32b4fe..974e9c2d31fd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -139,15 +139,28 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } -static struct nfs_page *nfs_page_find_request_locked(struct page *page) +static struct nfs_page * +nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; - if (PagePrivate(page)) { + if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - if (req != NULL) - kref_get(&req->wb_kref); + else if (unlikely(PageSwapCache(page))) { + struct nfs_page *freq, *t; + + /* Linearly search the commit list for the correct req */ + list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { + if (freq->wb_page == page) { + req = freq; + break; + } + } } + + if (req) + kref_get(&req->wb_kref); + return req; } @@ -157,7 +170,7 @@ static struct nfs_page *nfs_page_find_request(struct page *page) struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -258,7 +271,7 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) break; if (nfs_lock_request(req)) @@ -413,9 +426,15 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_lock(&inode->i_lock); if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; - set_bit(PG_MAPPED, &req->wb_flags); - SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); + /* + * Swap-space should not get truncated. Hence no need to plug the race + * with invalidate/truncate. + */ + if (likely(!PageSwapCache(req->wb_page))) { + set_bit(PG_MAPPED, &req->wb_flags); + SetPagePrivate(req->wb_page); + set_page_private(req->wb_page, (unsigned long)req); + } nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -432,9 +451,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (likely(!PageSwapCache(req->wb_page))) { + set_page_private(req->wb_page, 0); + ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); + } nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -730,7 +751,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; @@ -1744,7 +1765,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) */ int nfs_wb_page(struct inode *inode, struct page *page) { - loff_t range_start = page_offset(page); + loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, -- cgit v1.2.3 From a564b8f0398636ba30b07c0eaebdef7ff7837249 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:12 -0700 Subject: nfs: enable swap on NFS Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol ->connect() method. PF_MEMALLOC should allow the allocation of struct socket and related objects and the early (re)setting of SOCK_MEMALLOC should allow us to receive the packets required for the TCP connection buildup. [jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases] [dfeng@redhat.com: Fix handling of multiple swap files] [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/Kconfig | 8 +++++ fs/nfs/direct.c | 82 +++++++++++++++++++++++++++++---------------- fs/nfs/file.c | 22 ++++++++++-- include/linux/nfs_fs.h | 4 +-- include/linux/sunrpc/xprt.h | 3 ++ net/sunrpc/Kconfig | 5 +++ net/sunrpc/clnt.c | 9 +++++ net/sunrpc/sched.c | 7 ++-- net/sunrpc/xprtsock.c | 43 ++++++++++++++++++++++++ 9 files changed, 149 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac394..6fd5f2cdcd1e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -86,6 +86,14 @@ config NFS_V4 If unsure, say Y. +config NFS_SWAP + bool "Provide swap over NFS support" + default n + depends on NFS_FS + select SUNRPC_SWAP + help + This option enables swapon to work on files located on NFS mounts. + config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_V4 && EXPERIMENTAL diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec70..bf9c8d0ec16a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * @nr_segs: size of iovec array * * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, we shunt off direct - * read and write requests before the VFS gets them, so this method - * should never be called. + * the NFS client supports direct I/O. However, for most direct IO, we + * shunt off direct read and write requests before the VFS gets them, + * so this method is only ever called for swap. */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { +#ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, (long long) pos, nr_segs); return -EINVAL; +#else + VM_BUG_ON(iocb->ki_left != PAGE_SIZE); + VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + + if (rw == READ || rw == KERNEL_READ) + return nfs_file_direct_read(iocb, iov, nr_segs, pos, + rw == READ ? true : false); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, + rw == WRITE ? true : false); +#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { */ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de GFP_KERNEL); if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 1, pagevec); + if (WARN_ON(result != 1)) + break; + } + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { @@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; @@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); NFS_I(inode)->read_io += result; @@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + npages, 0, 0, pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 0, pagevec); + if (WARN_ON(result != 1)) + break; + } if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; @@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos); + result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, - size_t count) + size_t count, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -867,7 +893,7 @@ out: * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, task_io_account_read(count); - retval = nfs_direct_read(iocb, iov, nr_segs, pos); + retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); if (retval > 0) iocb->ki_pos = pos + retval; @@ -923,7 +949,7 @@ out: * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); if (retval > 0) { struct inode *inode = mapping->host; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index acd4e4cd2906..50fb83a88b1b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } +#ifdef CONFIG_NFS_SWAP +static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + *span = sis->pages; + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); +} + +static void nfs_swap_deactivate(struct file *file) +{ + xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); +} +#endif + const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = { .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, .error_remove_page = generic_error_remove_page, +#ifdef CONFIG_NFS_SWAP + .swap_activate = nfs_swap_activate, + .swap_deactivate = nfs_swap_deactivate, +#endif }; /* @@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4b6043c20f77..35994f975a7f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -473,10 +473,10 @@ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - loff_t pos); + loff_t pos, bool uio); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - loff_t pos); + loff_t pos, bool uio); /* * linux/fs/nfs/dir.c diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 77d278defa70..cff40aa7db62 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -174,6 +174,8 @@ struct rpc_xprt { unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ resvport : 1; /* use a reserved port */ + unsigned int swapper; /* we're swapping over this + transport */ unsigned int bind_index; /* bind function index */ /* @@ -316,6 +318,7 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); +int xs_swapper(struct rpc_xprt *xprt, int enable); /* * Reserved bit positions in xprt->state diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9fe8857d8d59..03d03e37a7d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. +config SUNRPC_SWAP + bool + depends on SUNRPC + select NETVM + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" depends on SUNRPC && CRYPTO diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b05df36692ff..fa48c60aef23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (sk_memalloc_socks()) { + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + if (xprt->swapper) + task->tk_flags |= RPC_TASK_SWAPPER; + rcu_read_unlock(); + } /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 994cfea2bad6..83a4c43cee7f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -812,7 +812,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT; + + if (RPC_IS_SWAPPER(task)) + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -886,7 +889,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62d0dac8f780..bd59d01f035b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1927,6 +1927,45 @@ out: xprt_wake_pending_tasks(xprt, status); } +#ifdef CONFIG_SUNRPC_SWAP +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (xprt->swapper) + sk_set_memalloc(transport->inet); +} + +/** + * xs_swapper - Tag this transport as being used for swap. + * @xprt: transport to tag + * @enable: enable/disable + * + */ +int xs_swapper(struct rpc_xprt *xprt, int enable) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + int err = 0; + + if (enable) { + xprt->swapper++; + xs_set_memalloc(xprt); + } else if (xprt->swapper) { + xprt->swapper--; + sk_clear_memalloc(transport->inet); + } + + return err; +} +EXPORT_SYMBOL_GPL(xs_swapper); +#else +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ +} +#endif + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1951,6 +1990,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->sock = sock; transport->inet = sk; + xs_set_memalloc(xprt); + write_unlock_bh(&sk->sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -2075,6 +2116,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!xprt_bound(xprt)) goto out; + xs_set_memalloc(xprt); + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; -- cgit v1.2.3 From 192e501b0438bb0e1574179773537f84c4752e25 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:16 -0700 Subject: nfs: prevent page allocator recursions with swap over NFS. GFP_NOFS is _more_ permissive than GFP_NOIO in that it will initiate IO, just not of any filesystem data. The problem is that previously NOFS was correct because that avoids recursion into the NFS code. With swap-over-NFS, it is no longer correct as swap IO can lead to this recursion. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/pagelist.c | 2 +- fs/nfs/write.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9ef8b3cf7fc7..7de1646c4e6b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -70,7 +70,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); if (p) INIT_LIST_HEAD(&p->wb_list); return p; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 974e9c2d31fd..211ba6566772 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -52,7 +52,7 @@ static mempool_t *nfs_commit_mempool; struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); if (p) { memset(p, 0, sizeof(*p)); @@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); struct nfs_write_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; -- cgit v1.2.3 From 068535f1fef4c90aee23eb7b9b9a71c5b72d7cd0 Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Wed, 1 Aug 2012 07:56:16 -0400 Subject: locks: remove unused lm_release_private In commit 3b6e2723f32d ("locks: prevent side-effects of locks_release_private before file_lock is initialized") we removed the last user of lm_release_private without removing the field itself. Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 2 -- fs/locks.c | 6 +----- include/linux/fs.h | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2db1900d7538..7f647e17830c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -359,7 +359,6 @@ prototypes: int (*lm_compare_owner)(struct file_lock *, struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); - void (*lm_release_private)(struct file_lock *); void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); @@ -368,7 +367,6 @@ locking rules: lm_compare_owner: yes no lm_notify: yes no lm_grant: no no -lm_release_private: maybe no lm_break: yes no lm_change yes no diff --git a/fs/locks.c b/fs/locks.c index cdcf219a7391..7e81bfc75164 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -200,11 +200,7 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } - if (fl->fl_lmops) { - if (fl->fl_lmops->lm_release_private) - fl->fl_lmops->lm_release_private(fl); - fl->fl_lmops = NULL; - } + fl->fl_lmops = NULL; } EXPORT_SYMBOL_GPL(locks_release_private); diff --git a/include/linux/fs.h b/include/linux/fs.h index d7eed5b98ae2..4ba5c8715523 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1162,7 +1162,6 @@ struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); - void (*lm_release_private)(struct file_lock *); void (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock **, int); }; -- cgit v1.2.3 From 53362a05ae683e12a20d9ffdf58a88094a0bed9d Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 2 Aug 2012 09:50:39 +0200 Subject: fs/block-dev.c:fix performance regression in O_DIRECT writes to md block devices For regular file, write operaion used blk_plug function.But for block file,write operation did not use blk_plug. This patch is also for write-cache mode for block-device. Signed-off-by: Jianpeng Ma Reviewed-by: NeilBrown Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e519195d45b..38e721b35d45 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); + blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } + blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); -- cgit v1.2.3 From 0e8d96dd2c99405c707f540b5922ec869b848979 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Mon, 20 Feb 2012 03:46:12 -0500 Subject: exofs: readpage_strip: Add a BUG_ON to check for PageLocked(page) readpage_strip can be called from several code paths all of which require that the page be locked before any operations are carried out. Since we export the exofs_readpage callback to the VFS, add a BUG_ON to check for PageLocked(page) to make sure that this understanding is never compromised. Signed-off-by: Kautuk Consul Signed-off-by: Boaz Harrosh --- fs/exofs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 5badb0c039de..9a5ed30b8a35 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -389,6 +389,8 @@ static int readpage_strip(void *data, struct page *page) size_t len; int ret; + BUG_ON(!PageLocked(page)); + /* FIXME: Just for debugging, will be removed */ if (PageUptodate(page)) EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, -- cgit v1.2.3 From 66153f6e0f89c75d18e490739b0149dfd2e53b69 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 4 Jun 2012 14:48:27 +0300 Subject: exofs: stop using s_dirt Exofs has the '->write_super()' handler and makes some use of the '->s_dirt' superblock flag, but it really needs neither of them because it never sets 's_dirt' to one which means the VFS never calls its '->write_super()' handler. Thus, remove both. Note, I am trying to remove both 's_dirt' and 'write_super()' from VFS altogether once all users are gone. Signed-off-by: Artem Bityutskiy Signed-off-by: Boaz Harrosh --- fs/exofs/super.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 433783624d10..dde41a75c7c8 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -400,8 +400,6 @@ static int exofs_sync_fs(struct super_block *sb, int wait) ret = ore_write(ios); if (unlikely(ret)) EXOFS_ERR("%s: ore_write failed.\n", __func__); - else - sb->s_dirt = 0; unlock_super(sb); @@ -412,14 +410,6 @@ out: return ret; } -static void exofs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - exofs_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - static void _exofs_print_device(const char *msg, const char *dev_path, struct osd_dev *od, u64 pid) { @@ -952,7 +942,6 @@ static const struct super_operations exofs_sops = { .write_inode = exofs_write_inode, .evict_inode = exofs_evict_inode, .put_super = exofs_put_super, - .write_super = exofs_write_super, .sync_fs = exofs_sync_fs, .statfs = exofs_statfs, }; -- cgit v1.2.3 From 4b74f6ea8417e48bb1fc65880a0574134a8b4745 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 20 Jul 2012 15:50:27 +0300 Subject: exofs: Fix __r4w_get_page when offset is beyond i_size It is very common for the end of the file to be unaligned on stripe size. But since we know it's beyond file's end then the XOR should be preformed with all zeros. Old code used to just read zeros out of the OSD devices, which is a great waist. But what scares me more about this situation is that, we now have pages attached to the file's mapping that are beyond i_size. I don't like the kind of bugs this calls for. Fix both birds, by returning a global ZERO_PAGE, if offset is beyond i_size. Signed-off-by: Boaz Harrosh --- fs/exofs/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 9a5ed30b8a35..3b2ee72aecbe 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -574,8 +574,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) if (!pcol->that_locked_page || (pcol->that_locked_page->index != index)) { - struct page *page = find_get_page(pcol->inode->i_mapping, index); + struct page *page; + loff_t i_size = i_size_read(pcol->inode); + if (offset >= i_size) { + *uptodate = true; + EXOFS_DBGMSG("offset >= i_size index=0x%lx\n", index); + return ZERO_PAGE(0); + } + + page = find_get_page(pcol->inode->i_mapping, index); if (!page) { page = find_or_create_page(pcol->inode->i_mapping, index, GFP_NOFS); @@ -604,12 +612,13 @@ static void __r4w_put_page(void *priv, struct page *page) { struct page_collect *pcol = priv; - if (pcol->that_locked_page != page) { + if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) { EXOFS_DBGMSG("index=0x%lx\n", page->index); page_cache_release(page); return; } - EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index); + EXOFS_DBGMSG("that_locked_page index=0x%lx\n", + ZERO_PAGE(0) == page ? -1 : page->index); } static const struct _ore_r4w_op _r4w_op = { -- cgit v1.2.3 From be388f3d9af4155db0dc9e7e59dd49db90271c1c Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 2 Aug 2012 14:59:57 +0300 Subject: exofs: Use proper max_IO calculations from ore exofs_max_io_pages should just use the ORE's calculated layout->max_io_length, And avoid unnecessary BUGs, calculations made here were also a layering violation. Signed-off-by: Boaz Harrosh --- fs/exofs/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 3b2ee72aecbe..1562c27a2fab 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -37,15 +37,12 @@ #define EXOFS_DBGMSG2(M...) do {} while (0) -enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), }; - unsigned exofs_max_io_pages(struct ore_layout *layout, unsigned expected_pages) { - unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); + unsigned pages = min_t(unsigned, expected_pages, + layout->max_io_length / PAGE_SIZE); - /* TODO: easily support bio chaining */ - pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE); return pages; } @@ -101,7 +98,8 @@ static void _pcol_reset(struct page_collect *pcol) * it might not end here. don't be left with nothing */ if (!pcol->expected_pages) - pcol->expected_pages = MAX_PAGES_KMALLOC; + pcol->expected_pages = + exofs_max_io_pages(&pcol->sbi->layout, ~0); } static int pcol_try_alloc(struct page_collect *pcol) -- cgit v1.2.3 From 9e62bb4458ad2cf28bd701aa5fab380b846db326 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 1 Aug 2012 17:48:36 +0300 Subject: ore: Fix out-of-bounds access in _ios_obj() _ios_obj() is accessed by group_index not device_table index. The oc->comps array is only a group_full of devices at a time it is not like ore_comp_dev() which is indexed by a global device_table index. This did not BUG until now because exofs only uses a single COMP for all devices. But with other FSs like PanFS this is not true. This bug was only in the write_path, all other users were using it correctly [This is a bug since 3.2 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/exofs/ore.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 24a49d47e935..1585db1aa365 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -837,11 +837,11 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) bio->bi_rw |= REQ_WRITE; } - osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, - bio, per_dev->length); + osd_req_write(or, _ios_obj(ios, cur_comp), + per_dev->offset, bio, per_dev->length); ORE_DBGMSG("write(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), + _LLU(_ios_obj(ios, cur_comp)->id), _LLU(per_dev->offset), _LLU(per_dev->length), dev); } else if (ios->kern_buff) { @@ -853,20 +853,20 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) (ios->si.unit_off + ios->length > ios->layout->stripe_unit)); - ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev), + ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp), per_dev->offset, ios->kern_buff, ios->length); if (unlikely(ret)) goto out; ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), + _LLU(_ios_obj(ios, cur_comp)->id), _LLU(per_dev->offset), _LLU(ios->length), per_dev->dev); } else { - osd_req_set_attributes(or, _ios_obj(ios, dev)); + osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), + _LLU(_ios_obj(ios, cur_comp)->id), ios->out_attr_len, dev); } -- cgit v1.2.3 From 5ef50c3bec20060bc114f62d6503c5d86d70bdd7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 31 Jul 2012 11:27:36 -0700 Subject: ceph: simplify+fix atomic_open The initial ->atomic_open op was carried over from the old intent code, which was incomplete and didn't really work. Replace it with a fresh method. In particular: * always attempt to do an atomic open+lookup, both for the create case and for lookups of existing files. * fix symlink handling by returning 1 to the VFS so that we can follow the link to its destination. This fixes a longstanding ceph bug (#2392). Signed-off-by: Sage Weil --- fs/ceph/dir.c | 38 ----------------------------------- fs/ceph/file.c | 62 ++++++++++++++++++++++++++++++++++----------------------- fs/ceph/super.h | 6 +++--- 3 files changed, 40 insertions(+), 66 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f391f1e75414..e5b77319c97b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -633,44 +633,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } -int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) -{ - int err; - struct dentry *res = NULL; - - if (!(flags & O_CREAT)) { - if (dentry->d_name.len > NAME_MAX) - return -ENAMETOOLONG; - - err = ceph_init_dentry(dentry); - if (err < 0) - return err; - - return ceph_lookup_open(dir, dentry, file, flags, mode, opened); - } - - if (d_unhashed(dentry)) { - res = ceph_lookup(dir, dentry, 0); - if (IS_ERR(res)) - return PTR_ERR(res); - - if (res) - dentry = res; - } - - /* We don't deal with positive dentries here */ - if (dentry->d_inode) - return finish_no_open(file, res); - - *opened |= FILE_CREATED; - err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); - dput(res); - - return err; -} - /* * If we do a create but get no trace back from the MDS, follow up with * a lookup (the VFS expects us to link up the provided dentry). diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 1b81d6c31878..ecebbc09bfc7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -106,9 +107,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) } /* - * If the filp already has private_data, that means the file was - * already opened by intent during lookup, and we do nothing. - * * If we already have the requisite capabilities, we can satisfy * the open request locally (no need to request new caps from the * MDS). We do, however, need to inform the MDS (asynchronously) @@ -207,24 +205,29 @@ out: /* - * Do a lookup + open with a single request. - * - * If this succeeds, but some subsequent check in the vfs - * may_open() fails, the struct *file gets cleaned up (i.e. - * ceph_release gets called). So fear not! + * Do a lookup + open with a single request. If we get a non-existent + * file or symlink, return 1 so the VFS can retry. */ -int ceph_lookup_open(struct inode *dir, struct dentry *dentry, +int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct dentry *ret; + struct dentry *dn; int err; - dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", - dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); + dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", + dir, dentry, dentry->d_name.len, dentry->d_name.name, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + + if (dentry->d_name.len > NAME_MAX) + return -ENAMETOOLONG; + + err = ceph_init_dentry(dentry); + if (err < 0) + return err; /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); @@ -241,22 +244,31 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); err = ceph_handle_snapdir(req, dentry, err); - if (err) - goto out; - if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (err) - goto out; - err = finish_open(file, req->r_dentry, ceph_open, opened); -out: - ret = ceph_finish_lookup(req, dentry, err); - ceph_mdsc_put_request(req); - dout("ceph_lookup_open result=%p\n", ret); - if (IS_ERR(ret)) - return PTR_ERR(ret); + if (d_unhashed(dentry)) { + dn = ceph_finish_lookup(req, dentry, err); + if (IS_ERR(dn)) + err = PTR_ERR(dn); + } else { + /* we were given a hashed negative dentry */ + dn = NULL; + } + if (err) + goto out_err; + if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) { + /* make vfs retry on splice, ENOENT, or symlink */ + dout("atomic_open finish_no_open on dn %p\n", dn); + err = finish_no_open(file, dn); + } else { + dout("atomic_open finish_open on dn %p\n", dn); + err = finish_open(file, dentry, ceph_open, opened); + } - dput(ret); +out_err: + ceph_mdsc_put_request(req); + dout("atomic_open result=%d\n", err); return err; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ebc95cc652be..66ebe720e40d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct file *od, unsigned flags, - umode_t mode, int *opened); +extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned flags, umode_t mode, + int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ -- cgit v1.2.3 From 3dd4765fce04c0b4af1e0bc4c0b10f906f95fabc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 2 Aug 2012 14:30:56 -0400 Subject: nfs: tear down caches in nfs_init_writepagecache when allocation fails ...and ensure that we tear down the nfs_commit_data cache too when unloading the module. Cc: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5829d0ce7cfb..e3b55372726c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void) nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) - return -ENOMEM; + goto out_destroy_write_cache; nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", sizeof(struct nfs_commit_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_cdata_cachep == NULL) - return -ENOMEM; + goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) - return -ENOMEM; + goto out_destroy_commit_cache; /* * NFS congestion size, scale with available memory. @@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void) nfs_congestion_kb = 256*1024; return 0; + +out_destroy_commit_cache: + kmem_cache_destroy(nfs_cdata_cachep); +out_destroy_write_mempool: + mempool_destroy(nfs_wdata_mempool); +out_destroy_write_cache: + kmem_cache_destroy(nfs_wdata_cachep); + return -ENOMEM; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); + kmem_cache_destroy(nfs_cdata_cachep); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); } -- cgit v1.2.3 From 8554116e17eef055d9dd58a94b3427cb2ad1c317 Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:10 +0300 Subject: pnfs: defer release of pages in layoutget we have encountered a bug whereby reading a lot of files (copying fedora's /bin) from a pNFS mount and hitting Ctrl+C in the middle caused a general protection fault in xdr_shrink_bufhead. this function is called when decoding the response from LAYOUTGET. the decoding is done by a worker thread, and the caller of LAYOUTGET waits for the worker thread to complete. hitting Ctrl+C caused the synchronous wait to end and the next thing the caller does is to free the pages, so when the worker thread calls xdr_shrink_bufhead, the pages are gone. therefore, the cleanup of these pages has been moved to nfs4_layoutget_release. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.c | 39 +------------------------------------ fs/nfs/pnfs.h | 2 +- 3 files changed, 58 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d948721..6a78d49da5c1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6223,11 +6223,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6239,9 +6286,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6259,6 +6307,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76875bfcf19c..2e00feacd4be 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; dprintk("--> %s\n", __func__); @@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); + nfs4_proc_layoutget(lgp, gfp_flags); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c6c80503ba4..5ea019e80b4c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From 21d1f58aedc5f7ac4bb0c4e3d78c74ea31ac050f Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:11 +0300 Subject: pnfs: nfs4_proc_layoutget returns void since the only user of nfs4_proc_layoutget is send_layoutget, which ignores its return value, there is no reason to return any value. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/pnfs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a78d49da5c1..f94f6b3928fc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6310,7 +6310,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return -ENOMEM; + return; } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,7 +6319,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; @@ -6327,7 +6327,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) status = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + return; } static void diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5ea019e80b4c..745aa1b39e7c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From f6166384095b7ecf77752b5e9096e6d03d75f7ae Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 2 Aug 2012 15:36:09 +0300 Subject: NFS41: add pg_layout_private to nfs_pageio_descriptor To allow layout driver to pass private information around pg_init/pg_doio. Signed-off-by: Peng Tao Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ include/linux/nfs_page.h | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1a6732ed04a4..311a79681e2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->dreq = desc->pg_dreq; + hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_error = 0; desc->pg_lseg = NULL; desc->pg_dreq = NULL; + desc->pg_layout_private = NULL; } EXPORT_SYMBOL_GPL(nfs_pageio_init); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 880805774f9f..92ce5783b707 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -69,6 +69,7 @@ struct nfs_pageio_descriptor { const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; + void *pg_layout_private; }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 00485e084394..ac7c8ae254f2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1248,6 +1248,7 @@ struct nfs_pgio_header { void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; struct nfs_direct_req *dreq; + void *layout_private; spinlock_t lock; /* fields protected by lock */ int pnfs_error; -- cgit v1.2.3 From 7de6e28417c65919cf2c1621841a650c4a3afbbd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 2 Aug 2012 15:38:23 +0300 Subject: pnfs-obj: Better IO pattern in case of unaligned offset Depending on layout and ARCH, ORE has some limits on max IO sizes which is communicated on (what else) ore_layout->max_io_length, which is always stripe aligned. This was considered as the pg_test boundary for splitting and starting a new IO. But in the case of a long IO where the start offset is not aligned what would happen is that both end of IO[N] and start of IO[N+1] would be unaligned, causing each IO boundary parity unit to be calculated and written twice. So what we do in this patch is split the very start of an unaligned IO, up to a stripe boundary, and then next IO's can continue fully aligned til the end. We might be sacrificing the case where the full unaligned IO would fit within a single max_io_length, but the sacrifice is well worth the elimination of double calculation and parity units IO. Actually the sacrificing is marginal and is almost unmeasurable. TODO: If we know the total expected linear segment that will be received, at pg_init, we could use that information in many places: 1. blocks-layout get_layout write segment size 2. Better mds-threshold 3. In above situation for a better clean split I will do this in future submission. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 55 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f22..ea6d111b03e9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + (unsigned long)pgio->pg_layout_private; +} + +void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; +} + +static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, + unsigned long *stripe_end) +{ + u32 stripe_off; + unsigned stripe_size; + + if (layout->raid_algorithm == PNFS_OSD_RAID_0) + return true; + + stripe_size = layout->stripe_unit * + (layout->group_width - layout->parity); + + div_u64_rem(offset, stripe_size, &stripe_off); + if (!stripe_off) + return true; + + *stripe_end = stripe_size - stripe_off; + return false; +} + +void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + unsigned long stripe_end = 0; + + pnfs_generic_pg_init_write(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + if (req->wb_offset || + !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, + &OBJIO_LSEG(pgio->pg_lseg)->layout, + &stripe_end)) { + pgio->pg_layout_private = (void *)stripe_end; + } else { + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + } } static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From f0cd2dbb6cf387c11f87265462e370bb5469299e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:11:59 +0300 Subject: vfs: kill write_super and sync_supers Finally we can kill the 'sync_supers' kernel thread along with the '->write_super()' superblock operation because all the users are gone. Now every file-system is supposed to self-manage own superblock and its dirty state. The nice thing about killing this thread is that it improves power management. Indeed, 'sync_supers' is a source of monotonic system wake-ups - it woke up every 5 seconds no matter what - even if there were no dirty superblocks and even if there were no file-systems using this service (e.g., btrfs and journalled ext4 do not need it). So it was wasting power most of the time. And because the thread was in the core of the kernel, all systems had to have it. So I am quite happy to make it go away. Interestingly, this thread is a left-over from the pdflush kernel thread which was a self-forking kernel thread responsible for all the write-back in old Linux kernels. It was turned into per-block device BDI threads, and 'sync_supers' was a left-over. Thus, R.I.P, pdflush as well. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/super.c | 40 ---------------------------------- include/linux/backing-dev.h | 1 - include/linux/fs.h | 3 --- mm/backing-dev.c | 52 --------------------------------------------- mm/page-writeback.c | 1 - 5 files changed, 97 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index b05cf47463d0..0902cfa6a12e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -536,46 +536,6 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); -/** - * sync_supers - helper for periodic superblock writeback - * - * Call the write_super method if present on all dirty superblocks in - * the system. This is for the periodic writeback used by most older - * filesystems. For data integrity superblock writeback use - * sync_filesystems() instead. - * - * Note: check the dirty flag before waiting, so we don't - * hold up the sync while mounting a device. (The newly - * mounted device won't need syncing.) - */ -void sync_supers(void) -{ - struct super_block *sb, *p = NULL; - - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; - if (sb->s_op->write_super && sb->s_dirt) { - sb->s_count++; - spin_unlock(&sb_lock); - - down_read(&sb->s_umount); - if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN)) - sb->s_op->write_super(sb); - up_read(&sb->s_umount); - - spin_lock(&sb_lock); - if (p) - __put_super(p); - p = sb; - } - } - if (p) - __put_super(p); - spin_unlock(&sb_lock); -} - /** * iterate_supers - call function for all active superblocks * @f: function to call diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c97c6b9cd38e..2a9a9abc9126 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -124,7 +124,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); -void bdi_arm_supers_timer(void); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); diff --git a/include/linux/fs.h b/include/linux/fs.h index 38dba16c4176..aa110476a95b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1491,7 +1491,6 @@ struct sb_writers { struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned char s_dirt; unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ @@ -1861,7 +1860,6 @@ struct super_operations { int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); - void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_fs) (struct super_block *); int (*unfreeze_fs) (struct super_block *); @@ -2397,7 +2395,6 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); -extern void sync_supers(void); extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6b4718e2ee34..b41823cc05e6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -39,12 +39,6 @@ DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); LIST_HEAD(bdi_pending_list); -static struct task_struct *sync_supers_tsk; -static struct timer_list sync_supers_timer; - -static int bdi_sync_supers(void *); -static void sync_supers_timer_fn(unsigned long); - void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { @@ -250,12 +244,6 @@ static int __init default_bdi_init(void) { int err; - sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); - BUG_ON(IS_ERR(sync_supers_tsk)); - - setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); - bdi_arm_supers_timer(); - err = bdi_init(&default_backing_dev_info); if (!err) bdi_register(&default_backing_dev_info, NULL, "default"); @@ -270,46 +258,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) return wb_has_dirty_io(&bdi->wb); } -/* - * kupdated() used to do this. We cannot do it from the bdi_forker_thread() - * or we risk deadlocking on ->s_umount. The longer term solution would be - * to implement sync_supers_bdi() or similar and simply do it from the - * bdi writeback thread individually. - */ -static int bdi_sync_supers(void *unused) -{ - set_user_nice(current, 0); - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - - /* - * Do this periodically, like kupdated() did before. - */ - sync_supers(); - } - - return 0; -} - -void bdi_arm_supers_timer(void) -{ - unsigned long next; - - if (!dirty_writeback_interval) - return; - - next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; - mod_timer(&sync_supers_timer, round_jiffies_up(next)); -} - -static void sync_supers_timer_fn(unsigned long unused) -{ - wake_up_process(sync_supers_tsk); - bdi_arm_supers_timer(); -} - static void wakeup_timer_fn(unsigned long data) { struct backing_dev_info *bdi = (struct backing_dev_info *)data; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e5363f34e025..5ad5ce23c1e0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1532,7 +1532,6 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); - bdi_arm_supers_timer(); return 0; } -- cgit v1.2.3 From d3009c6cffd37e9bc8435a002fe862548c440d97 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:02 +0300 Subject: ext3: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from ext3. Cc: Jan Kara Cc: Andrew Morton Cc: Andreas Dilger Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ext3/inode.c | 8 -------- fs/ext3/super.c | 11 ----------- 2 files changed, 19 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9a4a5c48b1c9..a07597307fd1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3459,14 +3459,6 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode, * inode out, but prune_icache isn't a user-visible syncing function. * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) * we start and wait on commits. - * - * Is this efficient/effective? Well, we're being nice to the system - * by cleaning up our inodes proactively so they can be reaped - * without I/O. But we are potentially leaving up to five seconds' - * worth of inodes floating about which prune_icache wants us to - * write out. One way to fix that would be to get prune_icache() - * to do a write_super() to free up some memory. It has the desired - * effect. */ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index ff9bcdc5b0d5..8c892e93d8e7 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -64,11 +64,6 @@ static int ext3_freeze(struct super_block *sb); /* * Wrappers for journal_start/end. - * - * The only special thing we need to do here is to make sure that all - * journal_end calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. */ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) { @@ -90,12 +85,6 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) return journal_start(journal, nblocks); } -/* - * The only special thing we need to do here is to make sure that all - * journal_stop calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ int __ext3_journal_stop(const char *where, handle_t *handle) { struct super_block *sb; -- cgit v1.2.3 From 7652bdfcb5888a389a7850ed19a4630e3a09cb9c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:03 +0300 Subject: ext4: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from ext3. Cc: "Theodore Ts'o" Cc: Andreas Dilger Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ext4/inode.c | 8 -------- fs/ext4/super.c | 11 ----------- 2 files changed, 19 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6324f74e0342..bcb60d08a964 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4589,14 +4589,6 @@ static int ext4_expand_extra_isize(struct inode *inode, * inode out, but prune_icache isn't a user-visible syncing function. * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) * we start and wait on commits. - * - * Is this efficient/effective? Well, we're being nice to the system - * by cleaning up our inodes proactively so they can be reaped - * without I/O. But we are potentially leaving up to five seconds' - * worth of inodes floating about which prune_icache wants us to - * write out. One way to fix that would be to get prune_icache() - * to do a write_super() to free up some memory. It has the desired - * effect. */ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d76ec8277d3f..3e0851e4f468 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -326,11 +326,6 @@ static void ext4_put_nojournal(handle_t *handle) /* * Wrappers for jbd2_journal_start/end. - * - * The only special thing we need to do here is to make sure that all - * journal_end calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { @@ -356,12 +351,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) return jbd2_journal_start(journal, nblocks); } -/* - * The only special thing we need to do here is to make sure that all - * jbd2_journal_stop calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) { struct super_block *sb; -- cgit v1.2.3 From f6463b0da6937e288b115d641ccd46c70fb3a4a8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:04 +0300 Subject: ext4: nuke pdflush from comments The pdflush thread is long gone, so this patch removes references to pdflush from ext4 comments. Cc: "Theodore Ts'o" Cc: Andreas Dilger Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bcb60d08a964..dff171c3a123 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1970,7 +1970,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); * This function can get called via... * - ext4_da_writepages after taking page lock (have journal handle) * - journal_submit_inode_data_buffers (no journal handle) - * - shrink_page_list via pdflush (no journal handle) + * - shrink_page_list via the kswapd/direct reclaim (no journal handle) * - grab_page_cache when doing write_begin (have journal handle) * * We don't do any block allocation in this function. If we have page with -- cgit v1.2.3 From 34eaadaf22b0dd453288c6b115e0c823a0fb74d5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:05 +0300 Subject: btrfs: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from btrfs. Cc: Chris Mason Cc: linux-btrfs@vger.kernel.org Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/btrfs/super.c | 4 ---- fs/btrfs/volumes.c | 4 ---- 2 files changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8c6e61d6eed5..f2eb24c477a3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -100,10 +100,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info) fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; } -/* NOTE: - * We move write_super stuff at umount in order to avoid deadlock - * for umount hold all lock. - */ static void save_error_info(struct btrfs_fs_info *fs_info) { __save_error_info(fs_info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b8708f994e67..e86ae04abe6a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1744,10 +1744,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->fs_devices = root->fs_info->fs_devices; - /* - * we don't want write_supers to jump in here with our device - * half setup - */ mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); list_add(&device->dev_alloc_list, -- cgit v1.2.3 From b257031408945eb89980e14cb79d5fd854d8f25f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:06 +0300 Subject: btrfs: nuke pdflush from comments The pdflush thread is long gone, so this patch removes references to pdflush from btrfs comments. Cc: Chris Mason Cc: linux-btrfs@vger.kernel.org Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/btrfs/inode.c | 3 ++- fs/btrfs/ordered-data.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 83baec24946d..6e8f416773d4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow, * If this code finds it can't get good compression, it puts an * entry onto the work queue to write the uncompressed bytes. This * makes sure that both compressed inodes and uncompressed inodes - * are written in the same order that pdflush sent them down. + * are written in the same order that the flusher thread sent them + * down. */ static noinline int compress_file_range(struct inode *inode, struct page *locked_page, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 643335a4fe3c..051c7fe551dd 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -596,7 +596,7 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * pages in the range can be dirty, clean or writeback. We * start IO on any dirty ones so the wait doesn't stall waiting - * for pdflush to find them + * for the flusher thread to find them */ if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) filemap_fdatawrite_range(inode->i_mapping, start, end); -- cgit v1.2.3 From 12810ad70858af10f5e00b3c178085c03baa457b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:07 +0300 Subject: jbd/jbd2: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from various jbd and jbd2. Cc: Andrew Morton Cc: Jan Kara Cc: "Theodore Ts'o" Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/jbd/journal.c | 4 ++-- fs/jbd2/journal.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 425c2f2cf170..09357508ec9a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -534,8 +534,8 @@ int journal_start_commit(journal_t *journal, tid_t *ptid) ret = 1; } else if (journal->j_committing_transaction) { /* - * If ext3_write_super() recently started a commit, then we - * have to wait for completion of that transaction + * If commit has been started, then we have to wait for + * completion of that transaction. */ if (ptid) *ptid = journal->j_committing_transaction->t_tid; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e9a3c4c85594..8625da27eccf 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -612,8 +612,8 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) ret = 1; } else if (journal->j_committing_transaction) { /* - * If ext3_write_super() recently started a commit, then we - * have to wait for completion of that transaction + * If commit has been started, then we have to wait for + * completion of that transaction. */ if (ptid) *ptid = journal->j_committing_transaction->t_tid; -- cgit v1.2.3 From 0d5c3eba2e1e5aa74e097f49bc90b58f607e101c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:08 +0300 Subject: vfs: nuke pdflush from comments The pdflush thread is long gone, so this patch removes references to pdflush from vfs comments. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/bio.c | 2 +- include/linux/writeback.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 73922abba832..5eaa70c9d96e 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1312,7 +1312,7 @@ EXPORT_SYMBOL(bio_copy_kern); * Note that this code is very hard to test under normal circumstances because * direct-io pins the pages with get_user_pages(). This makes * is_page_cache_freeable return false, and the VM will not clean the pages. - * But other code (eg, pdflush) could clean the pages if they are mapped + * But other code (eg, flusher threads) could clean the pages if they are mapped * pagecache. * * Simply disabling the call to bio_set_pages_dirty() is a good way to test the diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c66fe3332d83..50c3e8fa06a8 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -104,7 +104,6 @@ static inline void wait_on_inode(struct inode *inode) wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); } - /* * mm/page-writeback.c */ -- cgit v1.2.3 From 50640bcc0a0e5a66587fa051b327654c739c9c19 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:09 +0300 Subject: hfs: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from hfs. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/hfs/mdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 5fd51a5833ff..b7ec224910c5 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -236,10 +236,10 @@ out: * hfs_mdb_commit() * * Description: - * This updates the MDB on disk (look also at hfs_write_super()). + * This updates the MDB on disk. * It does not check, if the superblock has been modified, or * if the filesystem has been mounted read-only. It is mainly - * called by hfs_write_super() and hfs_btree_extend(). + * called by hfs_sync_fs() and flush_mdb(). * Input Variable(s): * struct hfs_mdb *mdb: Pointer to the hfs MDB * int backup; -- cgit v1.2.3 From 166ac34b74faa757ca936fde790d2121a3f40f65 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:11 +0300 Subject: nilfs2: nuke write_super from comments The '->write_super' superblock method is gone, and this patch removes all the references to 'write_super' from ntfs. Cc: KONISHI Ryusuke Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/nilfs2/super.c | 4 ---- fs/nilfs2/the_nilfs.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6522cac6057c..6a10812711c1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -676,17 +676,13 @@ static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, - /* .write_inode = nilfs_write_inode, */ - /* .drop_inode = nilfs_drop_inode, */ .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, - /* .write_super = nilfs_write_super, */ .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, .remount_fs = nilfs_remount, - /* .umount_begin */ .show_options = nilfs_show_options }; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 6eee4177807b..be1267a34cea 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -107,8 +107,6 @@ struct the_nilfs { * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. - * - protecting s_dirt in the super_block struct - * (see nilfs_write_super) and the following fields. */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; -- cgit v1.2.3 From e76e0ec9849bcd3290c19ba6f71c7c01e7988414 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:13 +0300 Subject: gfs2: nuke pdflush from comments The pdflush thread is long gone, so this patch removes references to pdflush from gfs comments. Cc: Steven Whitehouse Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/gfs2/meta_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3a56c8d94de0..22255d96b27e 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -52,7 +52,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb /* * If it's a fully non-blocking write attempt and we cannot * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd + * potentially cause a busy-wait loop from flusher thread and kswapd * activity, but those code paths have their own higher-level * throttling. */ -- cgit v1.2.3 From 5c57f20b824a163bd7dfa42abc76582ad24a745a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:12:14 +0300 Subject: UBIFS: nuke pdflush from comments The pdflush thread is long gone, so this patch removes references to pdflush from UBIFS comments. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/ubifs/file.c | 10 +++++----- fs/ubifs/super.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 35389ca2d267..7bd6e72afd11 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -37,11 +37,11 @@ * * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we * implement. However, this is not true for 'ubifs_writepage()', which may be - * called with @i_mutex unlocked. For example, when pdflush is doing background - * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" - * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the - * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' - * we are only guaranteed that the page is locked. + * called with @i_mutex unlocked. For example, when flusher thread is doing + * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. + * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. + * in the "sys_write -> alloc_pages -> direct reclaim path". So, in + * 'ubifs_writepage()' we are only guaranteed that the page is locked. * * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1c766c39c038..c3fa6c5327a3 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -303,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) mutex_lock(&ui->ui_mutex); /* * Due to races between write-back forced by budgeting - * (see 'sync_some_inodes()') and pdflush write-back, the inode may + * (see 'sync_some_inodes()') and background write-back, the inode may * have already been synchronized, do not do this again. This might * also happen if it was synchronized in an VFS operation, e.g. * 'ubifs_link()'. -- cgit v1.2.3 From fe7c80518e34d1786f4a940ce673a0bfcbe53298 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Aug 2012 08:39:23 +0400 Subject: missed mnt_drop_write() in do_dentry_open() This one ought to be __mnt_drop_write(), to match __mnt_want_write() in the beginning... Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index f3d96e7e7b19..bc132e167d2d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -717,7 +717,7 @@ cleanup_all: * here, so just reset the state. */ file_reset_write(f); - mnt_drop_write(f->f_path.mnt); + __mnt_drop_write(f->f_path.mnt); } } cleanup_file: -- cgit v1.2.3 From d796c52ef0b71a988364f6109aeb63d79c5b116b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Aug 2012 19:04:57 -0400 Subject: ext4: make sure the journal sb is written in ext4_clear_journal_err() After we transfer set the EXT4_ERROR_FS bit in the file system superblock, it's not enough to call jbd2_journal_clear_err() to clear the error indication from journal superblock --- we need to call jbd2_journal_update_sb_errno() as well. Otherwise, when the root file system is mounted read-only, the journal is replayed, and the error indicator is transferred to the superblock --- but the s_errno field in the jbd2 superblock is left set (since although we cleared it in memory, we never flushed it out to disk). This can end up confusing e2fsck. We should make e2fsck more robust in this case, but the kernel shouldn't be leaving things in this confused state, either. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 1 + fs/jbd2/journal.c | 3 ++- include/linux/jbd2.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d76ec8277d3f..ccc4bcad5616 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4430,6 +4430,7 @@ static void ext4_clear_journal_err(struct super_block *sb, ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); + jbd2_journal_update_sb_errno(journal); } } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e9a3c4c85594..bd23f2ebaa67 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) * Update a journal's errno. Write updated superblock to disk waiting for IO * to complete. */ -static void jbd2_journal_update_sb_errno(journal_t *journal) +void jbd2_journal_update_sb_errno(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; @@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) jbd2_write_superblock(journal, WRITE_SYNC); } +EXPORT_SYMBOL(jbd2_journal_update_sb_errno); /* * Read the superblock for a given journal, performing initial diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f334c7fab967..3efc43f3f162 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1125,6 +1125,7 @@ extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); +extern void jbd2_journal_update_sb_errno(journal_t *); extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); -- cgit v1.2.3 From 7e731bc9a12339f344cddf82166b82633d99dd86 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Aug 2012 23:28:16 -0400 Subject: ext4: avoid kmemcheck complaint from reading uninitialized memory Commit 03179fe923 introduced a kmemcheck complaint in ext4_da_get_block_prep() because we save and restore ei->i_da_metadata_calc_last_lblock even though it is left uninitialized in the case where i_da_metadata_calc_len is zero. This doesn't hurt anything, but silencing the kmemcheck complaint makes it easier for people to find real bugs. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=45631 (which is marked as a regression). Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ccc4bcad5616..56bcaec9149c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -959,6 +959,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; + ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; -- cgit v1.2.3 From fb6ccff667712c46b4501b920ea73a326e49626a Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 24 Jul 2012 12:10:11 -0700 Subject: fuse: verify all ioctl retry iov elements Commit 7572777eef78ebdee1ecb7c258c0ef94d35bad16 attempted to verify that the total iovec from the client doesn't overflow iov_length() but it only checked the first element. The iovec could still overflow by starting with a small element. The obvious fix is to check all the elements. The overflow case doesn't look dangerous to the kernel as the copy is limited by the length after the overflow. This fix restores the intention of returning an error instead of successfully copying less than the iovec represented. I found this by code inspection. I built it but don't have a test case. I'm cc:ing stable because the initial commit did as well. Signed-off-by: Zach Brown Signed-off-by: Miklos Szeredi CC: [2.6.37+] --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5800101e5ce1..2eed3acfb6a6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1703,7 +1703,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) size_t n; u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; - for (n = 0; n < count; n++) { + for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; -- cgit v1.2.3 From 47fbf7976e0b7d9dcdd799e2a1baba19064d9631 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2012 16:03:13 -0400 Subject: NFSv4.1: Remove a bogus BUG_ON() in nfs4_layoutreturn_done Ever since commit 0a57cdac3f (NFSv4.1 send layoutreturn to fence disconnected data server) we've been sending layoutreturn calls while there is potentially still outstanding I/O to the data servers. The reason we do this is to avoid races between replayed writes to the MDS and the original writes to the DS. When this happens, the BUG_ON() in nfs4_layoutreturn_done can be triggered because it assumes that we would never call layoutreturn without knowing that all I/O to the DS is finished. The fix is to remove the BUG_ON() now that the assumptions behind the test are obsolete. Reported-by: Boaz Harrosh Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>=3.5] --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f94f6b3928fc..c77d296bdaa6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6359,12 +6359,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); -- cgit v1.2.3 From 389d7b26d9e4f78b17366c23a3aa16b3c5cb3bde Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Thu, 9 Aug 2012 15:19:25 +0200 Subject: bio: Fix potential memory leak in bio_find_or_create_slab() Do not leak memory by updating pointer with potentially NULL realloc return value. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- fs/bio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 73922abba832..fed1f799cb56 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; - struct bio_slab *bslab; + struct bio_slab *bslab, *new_bio_slabs; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (bio_slab_nr == bio_slab_max && entry == -1) { bio_slab_max <<= 1; - bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!bio_slabs) + new_bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!new_bio_slabs) goto out_unlock; + bio_slabs = new_bio_slabs; } if (entry == -1) entry = bio_slab_nr++; -- cgit v1.2.3 From 647d1e4c5235763b83fbfe74a09d148edc6ca152 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 9 Aug 2012 15:23:09 +0200 Subject: block: move down direct IO plugging Move unplugging for direct I/O from around ->direct_IO() down to do_blockdev_direct_IO(). This implicitly adds plugging for direct writes. CC: Li Shaohua Acked-by: Jeff Moyer Signed-off-by: Wu Fengguang Signed-off-by: Jens Axboe --- fs/direct-io.c | 5 +++++ mm/filemap.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 1faf4cb56f39..f86c720dba0e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, unsigned long user_addr; size_t bytes; struct buffer_head map_bh = { 0, }; + struct blk_plug plug; if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, PAGE_SIZE - user_addr / PAGE_SIZE); } + blk_start_plug(&plug); + for (seg = 0; seg < nr_segs; seg++) { user_addr = (unsigned long)iov[seg].iov_base; sdio.size += bytes = iov[seg].iov_len; @@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (sdio.bio) dio_bio_submit(dio, &sdio); + blk_finish_plug(&plug); + /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. diff --git a/mm/filemap.c b/mm/filemap.c index 2b0952974cb9..384344575c37 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1412,12 +1412,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); if (!retval) { - struct blk_plug plug; - - blk_start_plug(&plug); retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); - blk_finish_plug(&plug); } if (retval > 0) { *ppos = pos + retval; -- cgit v1.2.3 From e00da2067b78a9246f767012a3803224c40b1f9f Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Thu, 2 Aug 2012 17:16:20 -0600 Subject: Btrfs: remove mnt_want_write call in btrfs_mksubvol We got a recursive lock in mksubvol because the caller already held a lock. I think we got into this due to a merge error. Commit a874a63 removed the mnt_want_write call from btrfs_mksubvol and added a replacement call to mnt_want_write_file in btrfs_ioctl_snap_create_transid. Commit e7848683 however tried to move all calls to mnt_want_write above i_mutex. So somewhere while merging this, it got mixed up. The solution is to remove the mnt_want_write call completely from mksubvol. Reported-by: David Sterba Signed-off-by: Alexander Block Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bc2f6ffff3cf..7bb755677a22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -664,10 +664,6 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; - error = mnt_want_write(parent->mnt); - if (error) - return error; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); @@ -703,7 +699,6 @@ out_dput: dput(dentry); out_unlock: mutex_unlock(&dir->i_mutex); - mnt_drop_write(parent->mnt); return error; } -- cgit v1.2.3 From bb2b6d19ec8b593b66402e2895c4314955b19833 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 23 Jul 2012 16:39:29 +0000 Subject: udf: fix udf_setsize() for file data in ICB If the new size is larger than the old size and the old file data was stored in the ICB (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) and the new size still fits in the ICB, skip the call to udf_extend_file() as it does not handle this i_alloc_type value (it calls BUG()). Signed-off-by: Ian Abbott Signed-off-by: Jan Kara --- fs/udf/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..aa233469b3c1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (err) return err; down_write(&iinfo->i_data_sem); - } else + } else { iinfo->i_lenAlloc = newsize; + goto set_size; + } } err = udf_extend_file(inode, newsize); if (err) { up_write(&iinfo->i_data_sem); return err; } +set_size: truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); } else { -- cgit v1.2.3 From dc141a402b9dc03a4188cd978a4cf149c397172c Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Sat, 21 Jul 2012 16:35:17 +0530 Subject: UDF: During mount free lvid_bh before rescanning with different blocksize If s_lvid_bh is not freed and set to NULL before re-scanning partition with default block size, we might end up using wrong lvid in case s_lvid_bh is not updated in udf_load_logicalvolint during rescan. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: Jan Kara --- fs/udf/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..9f55f7981b7d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2000,6 +2000,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!silent) pr_notice("Rescanning with blocksize %d\n", UDF_DEFAULT_BLOCKSIZE); + brelse(sbi->s_lvid_bh); + sbi->s_lvid_bh = NULL; uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; ret = udf_load_vrs(sb, &uopt, silent, &fileset); } -- cgit v1.2.3 From 6ea2eea1fa930b9308a06f77fce65c38931eeb13 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Wed, 18 Jul 2012 12:12:41 +0800 Subject: quota: Move down dqptr_sem read after initializing default warn[] type at __dquot_alloc_space(). sb->s_dqopt->dqptr_sem is used to serialize ops using pointers from inode to dquots. But for __dquot_alloc_space(), it could be safely moved down after the default warn[] array got initialized. Signed-off-by: Jie Liu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..c495a3055e2a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) -- cgit v1.2.3 From 48d1788493f874e5d32dccb2911a7bc91c248b4b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 2 Aug 2012 21:36:04 -0400 Subject: reiserfs: fix deadlocks with quotas The BKL push-down for reiserfs made lock recursion a special case that needs to be handled explicitly. One of the cases that was unhandled is dropping the quota during inode eviction. Both reiserfs_evict_inode and reiserfs_write_dquot take the write lock, but when the journal lock is taken it only drops one the references. The locking rules are that the journal lock be acquired before the write lock so leaving the reference open leads to a ABBA deadlock. This patch pushes the unlock up before clear_inode and avoids the recursive locking. Another ABBA situation can occur when the write lock is dropped while reading the bitmap buffer while in the quota code. When the lock is reacquired, it will deadlock against dquot->dq_lock and dqopt->dqio_mutex in the dquot_acquire path. It's safe to retain the lock across the read and should be cached under write load. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara --- fs/reiserfs/bitmap.c | 2 -- fs/reiserfs/inode.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4c0c7d163d15..a98b7740a0fc 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else if (bitmap == 0) block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - reiserfs_write_unlock(sb); bh = sb_bread(sb, block); - reiserfs_write_lock(sb); if (bh == NULL) reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " "reading failed", __func__, block); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..855da58db145 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode) ; } out: + reiserfs_write_unlock_once(inode->i_sb, depth); clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ dquot_drop(inode); inode->i_blocks = 0; - reiserfs_write_unlock_once(inode->i_sb, depth); return; no_delete: -- cgit v1.2.3 From e68726ff72cf7ba5e7d789857fcd9a75ca573f03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: canonicalize create mode in build_open_flags() Userspace can pass weird create mode in open(2) that we canonicalize to "(mode & S_IALLUGO) | S_IFREG" in vfs_create(). The problem is that we use the uncanonicalized mode before calling vfs_create() with unforseen consequences. So do the canonicalization early in build_open_flags(). Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones CC: stable@vger.kernel.org --- fs/open.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index bc132e167d2d..e1f2cdb91a4d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -852,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; -- cgit v1.2.3 From 62b259d8b3ea9d4a73108fc599e40c863ec25ae6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: atomic_open(): fix create mode usage Don't mask S_ISREG off the create mode before passing to ->atomic_open(). Other methods (->create, ->mknod) also get the complete file mode and filesystems expect it. Reported-by: Steve Reported-by: Richard W.M. Jones Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde8..5bac1bb6e585 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2414,7 +2414,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - mode = op->mode & S_IALLUGO; + mode = op->mode; if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); -- cgit v1.2.3 From 38227f78a5020b3100cbb0406c89807563b10dae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: pass right create mode to may_o_create() Pass the umask-ed create mode to may_o_create() instead of the original one. Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 5bac1bb6e585..26c28ec4f4af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2452,7 +2452,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (open_flag & O_CREAT) { - error = may_o_create(&nd->path, dentry, op->mode); + error = may_o_create(&nd->path, dentry, mode); if (error) { create_error = error; if (open_flag & O_EXCL) -- cgit v1.2.3 From af109bca94a8a223c4632a4ff769b3419fe7ed8c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: fuse: check create mode in atomic open Verify that the VFS is passing us a complete create mode with the S_IFREG to atomic open. Reported-by: Steve Reported-by: Richard W.M. Jones Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/fuse/dir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8964cf3999b2..324bc0850534 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_file *ff; + /* Userspace expects S_IFREG in create mode */ + BUG_ON((mode & S_IFMT) != S_IFREG); + forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) -- cgit v1.2.3 From 2e84f2641ea91a730642ead558a4ee3bd52310c9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Aug 2012 13:50:27 +0200 Subject: jbd: don't write superblock when unmounting an ro filesystem This sequence: results in an IO error when unmounting the RO filesystem. The bug was introduced by: commit 9754e39c7bc51328f145e933bfb0df47cd67b6e9 Author: Jan Kara Date: Sat Apr 7 12:33:03 2012 +0200 jbd: Split updating of journal superblock and marking journal empty which lost some of the magic in journal_update_superblock() which used to test for a journal with no outstanding transactions. This is a port of a jbd2 fix by Eric Sandeen. CC: # 3.4.x Signed-off-by: Jan Kara --- fs/jbd/journal.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 09357508ec9a..a2862339323b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); spin_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + spin_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); -- cgit v1.2.3 From 68766a2edcd5cd744262a70a2f67a320ac944760 Mon Sep 17 00:00:00 2001 From: Nikola Pajkovsky Date: Wed, 15 Aug 2012 00:38:08 +0200 Subject: udf: fix retun value on error path in udf_load_logicalvol In case we detect a problem and bail out, we fail to set "ret" to a nonzero value, and udf_load_logicalvol will mistakenly report success. Signed-off-by: Nikola Pajkovsky Signed-off-by: Jan Kara --- fs/udf/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 9f55f7981b7d..18fc038a438d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { -- cgit v1.2.3 From 62b2ce964bb901f00a480104bd35a2e1f8d2cf58 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 15 Aug 2012 13:30:12 -0700 Subject: vfs: fix propagation of atomic_open create error on negative dentry If ->atomic_open() returns -ENOENT, we take care to return the create error (e.g., EACCES), if any. Do the same when ->atomic_open() returns 1 and provides a negative dentry. This fixes a regression where an unprivileged open O_CREAT fails with ENOENT instead of EACCES, introduced with the new atomic_open code. It is tested by the open/08.t test in the pjd posix test suite, and was observed on top of fuse (backed by ceph-fuse). Signed-off-by: Sage Weil Signed-off-by: Miklos Szeredi --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 26c28ec4f4af..db76b866a097 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2489,6 +2489,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } + if (create_error && dentry->d_inode == NULL) { + error = create_error; + goto out; + } goto looked_up; } -- cgit v1.2.3 From a45440f05e9ebc26f2a375df911823fdef5b5281 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 6 Aug 2012 09:37:47 +0800 Subject: autofs4 - fix get_next_positive_subdir() Following a report of a crash during an automount expire I found that the locking in fs/autofs4/expire.c:get_next_positive_subdir() was wrong. Not only is the locking wrong but the function is more complex than it needs to be. The function is meant to calculate (and dget) the next entry in the list of directories contained in the root of an autofs mount point (an autofs indirect mount to be precise). The main problem was that the d_lock of the owner of the list was not being taken when walking the list, which lead to list corruption under load. The only other lock that needs to be taken is against the next dentry candidate so it can be checked for usability. Signed-off-by: Ian Kent Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1feb68ecef95..8c0e56d92938 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -94,25 +94,21 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, { struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); struct list_head *next; - struct dentry *p, *q; + struct dentry *q; spin_lock(&sbi->lookup_lock); + spin_lock(&root->d_lock); - if (prev == NULL) { - spin_lock(&root->d_lock); + if (prev) + next = prev->d_u.d_child.next; + else { prev = dget_dlock(root); next = prev->d_subdirs.next; - p = prev; - goto start; } - p = prev; - spin_lock(&p->d_lock); -again: - next = p->d_u.d_child.next; -start: +cont: if (next == &root->d_subdirs) { - spin_unlock(&p->d_lock); + spin_unlock(&root->d_lock); spin_unlock(&sbi->lookup_lock); dput(prev); return NULL; @@ -121,16 +117,15 @@ start: q = list_entry(next, struct dentry, d_u.d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); - /* Negative dentry - try next */ - if (!simple_positive(q)) { - spin_unlock(&p->d_lock); - lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_); - p = q; - goto again; + /* Already gone or negative dentry (under construction) - try next */ + if (q->d_count == 0 || !simple_positive(q)) { + spin_unlock(&q->d_lock); + next = q->d_u.d_child.next; + goto cont; } dget_dlock(q); spin_unlock(&q->d_lock); - spin_unlock(&p->d_lock); + spin_unlock(&root->d_lock); spin_unlock(&sbi->lookup_lock); dput(prev); -- cgit v1.2.3 From 1ae811ee27912a0521e4b92dc9a1850c0243a247 Mon Sep 17 00:00:00 2001 From: bjschuma@gmail.com Date: Wed, 8 Aug 2012 13:57:06 -0400 Subject: NFS: Fix a regression when loading the NFS v4 module Some systems have a modprobe.d/nfs.conf file that sets an nfs4 alias pointing to nfs.ko, rather than nfs4.ko. This can prevent the v4 module from loading on mount, since the kernel sees that something named "nfs4" has already been loaded. To work around this, I've renamed the modules to "nfsv2.ko" "nfsv3.ko" and "nfsv4.ko". I also had to move the nfs4_fs_type back to nfs.ko to ensure that `mount -t nfs4` still works. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 18 +++++++++--------- fs/nfs/client.c | 2 +- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4super.c | 15 --------------- fs/nfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- 5 files changed, 49 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8bf3a3f6925a..b7db60897f91 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o -obj-$(CONFIG_NFS_V2) += nfs2.o -nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V2) += nfsv2.o +nfsv2-y := nfs2super.o proc.o nfs2xdr.o -obj-$(CONFIG_NFS_V3) += nfs3.o -nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V3) += nfsv3.o +nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -obj-$(CONFIG_NFS_V4) += nfs4.o -nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ +obj-$(CONFIG_NFS_V4) += nfsv4.o +nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs4getroot.o nfs4client.o -nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9fc0d9dfc91b..99694442b93f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version) if (IS_ERR(nfs)) { mutex_lock(&nfs_version_mutex); - request_module("nfs%d", version); + request_module("nfsv%d", version); nfs = find_nfs_version(version); mutex_unlock(&nfs_version_mutex); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3b950dd81e81..da0618aeeadb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); +/* super.c */ +extern struct file_system_type nfs4_fs_type; + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 12a31a9dbcdd..bd61221ad2c5 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -344,14 +336,8 @@ static int __init init_nfs_v4(void) if (err) goto out1; - err = register_filesystem(&nfs4_fs_type); - if (err < 0) - goto out2; - register_nfs_version(&nfs_v4); return 0; -out2: - nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -361,7 +347,6 @@ out: static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); - unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac6a3c55dce4..c4a15c55519c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops); static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); + +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; +EXPORT_SYMBOL_GPL(nfs4_fs_type); + +static int __init register_nfs4_fs(void) +{ + return register_filesystem(&nfs4_fs_type); +} + +static void unregister_nfs4_fs(void) +{ + unregister_filesystem(&nfs4_fs_type); +} +#else +static int __init register_nfs4_fs(void) +{ + return 0; +} + +static void unregister_nfs4_fs(void) +{ +} #endif static struct shrinker acl_shrinker = { @@ -337,12 +365,18 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; - ret = nfs_register_sysctl(); + ret = register_nfs4_fs(); if (ret < 0) goto error_1; + + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_2; register_shrinker(&acl_shrinker); return 0; +error_2: + unregister_nfs4_fs(); error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); + unregister_nfs4_fs(); unregister_filesystem(&nfs_fs_type); } -- cgit v1.2.3 From 425e776d93a7a5070b77d4f458a5bab0f924652c Mon Sep 17 00:00:00 2001 From: bjschuma@gmail.com Date: Wed, 8 Aug 2012 13:57:10 -0400 Subject: NFS: Alias the nfs module to nfs4 This allows distros to remove the line from their modprobe configuration. Signed-off-by: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c4a15c55519c..239aff7338eb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2680,4 +2680,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 519d3959e30a98f8e135e7a16647c10af5ad63d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:30:10 -0400 Subject: NFSv4: Fix pointer arithmetic in decode_getacl Resetting the cursor xdr->p to a previous value is not a safe practice: if the xdr_stream has crossed out of the initial iovec, then a bunch of other fields would need to be reset too. Fix this issue by using xdr_enter_page() so that the buffer gets page aligned at the bitmap _before_ we decode it. Also fix the confusion of the ACL length with the page buffer length by not adding the base offset to the ACL length... Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 21 +++++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c77d296bdaa6..286ab7078413 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3819,7 +3819,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; + acl_len = res.acl_len; if (acl_len > args.acl_len) nfs4_write_cached_acl(inode, NULL, 0, acl_len); else diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca13483edd60..54d3f5a9faa6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5049,18 +5049,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - size_t page_len = xdr->buf->page_len; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + xdr_enter_page(xdr, xdr->buf->page_len); + bm_p = xdr->p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; - /* Check if the acl data starts beyond the allocated buffer */ - if (res->acl_data_offset > page_len) - return -ERANGE; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5074,23 +5070,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - xdr->p = bm_p; + res->acl_data_offset = (xdr->p - bm_p) << 2; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - attrlen += res->acl_data_offset; - if (attrlen > page_len) { + res->acl_len = attrlen; + if (attrlen + res->acl_data_offset > xdr->buf->page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ - res->acl_len = attrlen; goto out; } - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, page_len); + dprintk("NFS: acl reply: attrlen %u > page_len %u\n", + attrlen, xdr->buf->page_len); return -EINVAL; } - xdr_read_pages(xdr, attrlen); - res->acl_len = attrlen; } else status = -EOPNOTSUPP; -- cgit v1.2.3 From b291f1b1c86aa0c7bc3df2994e6a1a4e53f1fde0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 18:30:41 -0400 Subject: NFSv4: Fix the acl cache size calculation Currently, we do not take into account the size of the 16 byte struct nfs4_cached_acl header, when deciding whether or not we should cache the acl data. Consequently, we will end up allocating an 8k buffer in order to fit a maximum size 4k acl. This patch adjusts the calculation so that we limit the cache size to 4k for the acl header+data. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 286ab7078413..635274140b18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3737,9 +3737,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (pages && buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; -- cgit v1.2.3 From cff298c721099c9ac4cea7196a37097ba2847946 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:14:17 -0400 Subject: NFSv4: Don't use private xdr_stream fields in decode_getacl Instead of using the private field xdr->p from struct xdr_stream, use the public xdr_stream_pos(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 54d3f5a9faa6..1bfbd67c556d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5045,10 +5045,10 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { unsigned int savep; - __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; + unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5056,7 +5056,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - bm_p = xdr->p; + /* Calculate the offset of the page data */ + pg_offset = xdr->buf->head[0].iov_len; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5070,18 +5071,18 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = (xdr->p - bm_p) << 2; + res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ res->acl_len = attrlen; - if (attrlen + res->acl_data_offset > xdr->buf->page_len) { + if (attrlen > (xdr->nwords << 2)) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ goto out; } dprintk("NFS: acl reply: attrlen %u > page_len %u\n", - attrlen, xdr->buf->page_len); + attrlen, xdr->nwords << 2); return -EINVAL; } } else -- cgit v1.2.3 From c5066945b7ea346a11424dbeb7830b7d7d00c206 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:49 -0400 Subject: NFS: Clear key construction data if the idmap upcall fails idmap_pipe_downcall already clears this field if the upcall succeeds, but if it fails (rpc.idmapd isn't running) the field will still be set on the next call triggering a BUG_ON(). This patch tries to handle all possible ways that the upcall could fail and clear the idmap key data for each one. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b701358c39c3..6703c73307a5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -61,6 +61,12 @@ struct idmap { struct mutex idmap_mutex; }; +struct idmap_legacy_upcalldata { + struct rpc_pipe_msg pipe_msg; + struct idmap_msg idmap_msg; + struct idmap *idmap; +}; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields * @fattr: fully initialised struct nfs_fattr @@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); + idmap->idmap_key_cons = NULL; mutex_unlock(&idmap->idmap_mutex); } return ret; @@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = { static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); +static void idmap_release_pipe(struct inode *); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, + .release_pipe = idmap_release_pipe, .destroy_msg = idmap_pipe_destroy_msg, }; @@ -616,7 +625,8 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, +static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, + struct idmap_msg *im, struct rpc_pipe_msg *msg) { substring_t substr; @@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) { + struct idmap_legacy_upcalldata *data; struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; @@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - goto out0; - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (!im) + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) goto out1; - ret = nfs_idmap_prepare_message(key->description, im, msg); + msg = &data->pipe_msg; + im = &data->idmap_msg; + data->idmap = idmap; + + ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) goto out2; @@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - goto out2; + goto out3; return ret; +out3: + idmap->idmap_key_cons = NULL; out2: - kfree(im); + kfree(data); out1: - kfree(msg); -out0: complete_request_key(cons, ret); return ret; } @@ -775,9 +786,26 @@ out_incomplete: static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct idmap_legacy_upcalldata *data = container_of(msg, + struct idmap_legacy_upcalldata, + pipe_msg); + struct idmap *idmap = data->idmap; + struct key_construction *cons; + if (msg->errno) { + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + complete_request_key(cons, msg->errno); + } /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + kfree(data); +} + +static void +idmap_release_pipe(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + idmap->idmap_key_cons = NULL; } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -- cgit v1.2.3 From 12dfd080556124088ed61a292184947711b46cbe Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:50 -0400 Subject: NFS: return -ENOKEY when the upcall fails to map the name This allows the normal error-paths to handle the error, rather than making a special call to complete_request_key() just for this instance. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 6703c73307a5..a850079467d8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -760,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); @@ -779,7 +778,6 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) out: complete_request_key(cons, ret); -out_incomplete: return ret; } -- cgit v1.2.3 From 7a4c5de27efa4c2ecca87af0a3deea63446367e2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 10 Aug 2012 13:57:52 -0400 Subject: ext4: don't call ext4_error while block group is locked While in ext4_validate_block_bitmap(), if an block allocation bitmap is found to be invalid, we call ext4_error() while the block group is still locked. This causes ext4_commit_super() to call a function which might sleep while in an atomic context. There's no need to keep the block group locked at this point, so hoist the ext4_error() call up to ext4_validate_block_bitmap() and release the block group spinlock before calling ext4_error(). The reported stack trace can be found at: http://article.gmane.org/gmane.comp.file-systems.ext4/33731 Reported-by: Dave Jones Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/balloc.c | 62 +++++++++++++++++++++++++++++++++----------------------- fs/ext4/bitmap.c | 1 - 2 files changed, 37 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d23b31ca9d7a..1b5089067d01 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, return desc; } -static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) +/* + * Return the block number which was discovered to be invalid, or 0 if + * the block bitmap is valid. + */ +static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) { ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; - ext4_fsblk_t bitmap_blk; + ext4_fsblk_t blk; ext4_fsblk_t group_first_block; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { @@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb, * or it has to also read the block group where the bitmaps * are located to verify they are set. */ - return 1; + return 0; } group_first_block = ext4_group_first_block_no(sb, block_group); /* check whether block bitmap block number is set */ - bitmap_blk = ext4_block_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_block_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode bitmap block number is set */ - bitmap_blk = ext4_inode_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode table block number is set */ - bitmap_blk = ext4_inode_table(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_table(sb, desc); + offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, offset + EXT4_SB(sb)->s_itb_per_group, offset); - if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) - /* good bitmap for inode tables */ - return 1; - -err_out: - ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", - block_group, bitmap_blk); + if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) + /* bad bitmap for inode tables */ + return blk; return 0; } @@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb, unsigned int block_group, struct buffer_head *bh) { + ext4_fsblk_t blk; + if (buffer_verified(bh)) return; ext4_lock_group(sb, block_group); - if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && - ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8)) - set_buffer_verified(bh); + blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); + if (unlikely(blk != 0)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: block %llu: invalid block bitmap", + block_group, blk); + return; + } + if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, + desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); + return; + } + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); } diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index f8716eab9995..5c2d1813ebe9 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, if (provided == calculated) return 1; - ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); return 0; } -- cgit v1.2.3 From 0548bbb85337e532ca2ed697c3e9b227ff2ed4b4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 16 Aug 2012 11:59:04 -0400 Subject: ext4: fix long mount times on very big file systems Commit 8aeb00ff85a: "ext4: fix overhead calculation used by ext4_statfs()" introduced a O(n**2) calculation which makes very large file systems take forever to mount. Fix this with an optimization for non-bigalloc file systems. (For bigalloc file systems the overhead needs to be set in the the superblock.) Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 56bcaec9149c..598498904035 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3120,6 +3120,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + sbi->s_itb_per_group + 2); + first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + (grp * EXT4_BLOCKS_PER_GROUP(sb)); last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; -- cgit v1.2.3 From 89a4e48f8479f8145eca9698f39fe188c982212f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 08:54:52 -0400 Subject: ext4: fix kernel BUG on large-scale rm -rf commands Commit 968dee7722: "ext4: fix hole punch failure when depth is greater than 0" introduced a regression in v3.5.1/v3.6-rc1 which caused kernel crashes when users ran run "rm -rf" on large directory hierarchy on ext4 filesystems on RAID devices: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 Process rm (pid: 18229, threadinfo ffff8801276bc000, task ffff880123631710) Call Trace: [] ? __ext4_handle_dirty_metadata+0x83/0x110 [] ext4_ext_truncate+0x193/0x1d0 [] ? ext4_mark_inode_dirty+0x7f/0x1f0 [] ext4_truncate+0xf5/0x100 [] ext4_evict_inode+0x461/0x490 [] evict+0xa2/0x1a0 [] iput+0x103/0x1f0 [] do_unlinkat+0x154/0x1c0 [] ? sys_newfstatat+0x2a/0x40 [] sys_unlinkat+0x1b/0x50 [] system_call_fastpath+0x16/0x1b Code: 8b 4d 20 0f b7 41 02 48 8d 04 40 48 8d 04 81 49 89 45 18 0f b7 49 02 48 83 c1 01 49 89 4d 00 e9 ae f8 ff ff 0f 1f 00 49 8b 45 28 <48> 8b 40 28 49 89 45 20 e9 85 f8 ff ff 0f 1f 80 00 00 00 RIP [] ext4_ext_remove_space+0xa34/0xdf0 This could be reproduced as follows: The problem in commit 968dee7722 was that caused the variable 'i' to be left uninitialized if the truncate required more space than was available in the journal. This resulted in the function ext4_ext_truncate_extend_restart() returning -EAGAIN, which caused ext4_ext_remove_space() to restart the truncate operation after starting a new jbd2 handle. Reported-by: Maciej Å»enczykowski Reported-by: Marti Raudsepp Tested-by: Fengguang Wu Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd0c7ed06772..aabbb3f53683 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2662,6 +2662,7 @@ cont: } path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); + i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; -- cgit v1.2.3 From d807ff838f48e7778996e577e2a57a5796c32e84 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 17 Aug 2012 11:09:04 +0800 Subject: autofs4 - fix expire check In some cases when an autofs indirect mount is contained in a file system that is marked as shared (such as when systemd does the equivalent of "mount --make-rshared /" early in the boot), mounts stop expiring. When this happens the first expiry check on a mountpoint dentry in autofs_expire_indirect() sees a mountpoint dentry with a higher than minimal reference count. Consequently the dentry is condidered busy and the actual expiry check is never done. This particular check was originally meant as an optimisation to detect a path walk in progress but with the addition of rcu-walk it can be ineffective anyway. Removing the test allows automounts to expire again since the actual expire check doesn't rely on the dentry reference count. Signed-off-by: Ian Kent Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 8c0e56d92938..842d00048a65 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -399,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - /* Path walk currently on this dentry? */ - ino_count = atomic_read(&ino->count) + 2; - if (dentry->d_count > ino_count) - goto next; - /* Can we umount this guy */ if (autofs4_mount_busy(mnt, dentry)) goto next; -- cgit v1.2.3 From b7ca69289680cf631fb20b7d436467c4ec1153cd Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 3 Aug 2012 08:43:01 -0500 Subject: CIFS: Protect i_nlink from being negative that can cause warning messages. Pavel had initially suggested a smaller patch around drop_nlink, after a similar problem was discovered NFS. Protecting additional places where nlink is touched was suggested by Jeff Layton and is included in this. Reviewed-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Steve French --- fs/cifs/inode.c | 24 ++++++++++++++++-------- fs/cifs/link.c | 2 ++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7354877fa3bd..cb79c7edecb0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - unsigned long oldtime = cifs_i->time; cifs_revalidate_cache(inode, fattr); + spin_lock(&inode->i_lock); inode->i_atime = fattr->cf_atime; inode->i_mtime = fattr->cf_mtime; inode->i_ctime = fattr->cf_ctime; @@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else cifs_i->time = jiffies; - cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode, - oldtime, cifs_i->time); - cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; cifs_i->server_eof = fattr->cf_eof; @@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) * Can't safely change the file size here if the client is writing to * it due to potential races. */ - spin_lock(&inode->i_lock); if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { i_size_write(inode, fattr->cf_eof); @@ -859,12 +855,14 @@ struct inode *cifs_root_iget(struct super_block *sb) if (rc && tcon->ipc) { cFYI(1, "ipc connection - fake read inode"); + spin_lock(&inode->i_lock); inode->i_mode |= S_IFDIR; set_nlink(inode, 2); inode->i_op = &cifs_ipc_inode_ops; inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + spin_unlock(&inode->i_lock); } else if (rc) { iget_failed(inode); inode = ERR_PTR(rc); @@ -1110,6 +1108,15 @@ undo_setattr: goto out_close; } +/* copied from fs/nfs/dir.c with small changes */ +static void +cifs_drop_nlink(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&inode->i_lock); +} /* * If dentry->d_inode is null (usually meaning the cached dentry @@ -1166,13 +1173,13 @@ retry_std_delete: psx_del_no_retry: if (!rc) { if (inode) - drop_nlink(inode); + cifs_drop_nlink(inode); } else if (rc == -ENOENT) { d_drop(dentry); } else if (rc == -ETXTBSY) { rc = cifs_rename_pending_delete(full_path, dentry, xid); if (rc == 0) - drop_nlink(inode); + cifs_drop_nlink(inode); } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { @@ -1241,9 +1248,10 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, * setting nlink not necessary except in cases where we failed to get it * from the server or was set bogus */ + spin_lock(&dentry->d_inode->i_lock); if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) set_nlink(dentry->d_inode, 2); - + spin_unlock(&dentry->d_inode->i_lock); mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ if (inode->i_mode & S_ISGID) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 09e4b3ae4564..e6ce3b112875 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -433,7 +433,9 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, if (old_file->d_inode) { cifsInode = CIFS_I(old_file->d_inode); if (rc == 0) { + spin_lock(&old_file->d_inode->i_lock); inc_nlink(old_file->d_inode); + spin_unlock(&old_file->d_inode->i_lock); /* BB should we make this contingent on superblock flag NOATIME? */ /* old_file->d_inode->i_ctime = CURRENT_TIME;*/ /* parent dir timestamps will update from srv -- cgit v1.2.3 From 7411286088d5ba879e9ffcaaa296f657642ef2c4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 27 Jul 2012 01:20:41 +0400 Subject: CIFS: Fix log messages in packet checking for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 16 +++++++++------- fs/cifs/smb2pdu.h | 10 ++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a4ff5d547554..e4d3b9964167 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) cERROR(1, "Bad protocol string signature header %x", *(unsigned int *) hdr->ProtocolId); if (mid != hdr->MessageId) - cERROR(1, "Mids do not match"); + cERROR(1, "Mids do not match: %llu and %llu", mid, + hdr->MessageId); } cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); return 1; @@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length) * ie Validate the wct via smb2_struct_sizes table above */ - if (length < 2 + sizeof(struct smb2_hdr)) { + if (length < sizeof(struct smb2_pdu)) { if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { pdu->StructureSize2 = 0; /* @@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length) return 1; } if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { - cERROR(1, "SMB length greater than maximum, mid=%lld", mid); + cERROR(1, "SMB length greater than maximum, mid=%llu", mid); return 1; } if (check_smb2_hdr(hdr, mid)) return 1; - if (hdr->StructureSize != SMB2_HEADER_SIZE) { - cERROR(1, "Illegal structure size %d", + if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { + cERROR(1, "Illegal structure size %u", le16_to_cpu(hdr->StructureSize)); return 1; } @@ -161,8 +162,9 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cFYI(1, "Calculated size %u length %u mismatch mid %llu", clc_len, 4 + len, mid); - if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ - return 0; /* BB workaround Samba 3 bug SessSetup rsp */ + /* server can return one byte more */ + if (clc_len == 4 + len + 1) + return 0; return 1; } return 0; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f37a1b41b402..c5fbfac5d576 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -87,10 +87,6 @@ #define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) -#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64) - -#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) - /* * SMB2 Header Definition * @@ -99,6 +95,9 @@ * "PDU" : "Protocol Data Unit" (ie a network "frame") * */ + +#define SMB2_HEADER_STRUCTURE_SIZE __constant_le16_to_cpu(64) + struct smb2_hdr { __be32 smb2_buf_length; /* big endian on wire */ /* length is only two or three bytes - with @@ -140,6 +139,9 @@ struct smb2_pdu { * command code name for the struct. Note that structures must be packed. * */ + +#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) + struct smb2_err_rsp { struct smb2_hdr hdr; __le16 StructureSize; -- cgit v1.2.3 From 985e4ff016b5f3d95c12fe8073d1df89300dab3d Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 3 Aug 2012 09:42:45 -0500 Subject: cifs: print error code if smb signature verification fails While trying to debug a SMB signature related issue with Windows Servers figured out it might be easier to debug if we print the error code from cifs_verify_signature(). Also, fix indendation while at it. Signed-off-by: Suresh Jayaraman Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 11 ++++++++--- fs/cifs/transport.c | 9 ++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 074923ce593d..f0cf934ba877 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1576,9 +1576,14 @@ cifs_readv_callback(struct mid_q_entry *mid) /* result already set, check signature */ if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { - if (cifs_verify_signature(rdata->iov, rdata->nr_iov, - server, mid->sequence_number + 1)) - cERROR(1, "Unexpected SMB signature"); + int rc = 0; + + rc = cifs_verify_signature(rdata->iov, rdata->nr_iov, + server, + mid->sequence_number + 1); + if (rc) + cERROR(1, "SMB signature verification returned " + "error = %d", rc); } /* FIXME: should this be counted toward the initiating task? */ task_io_account_read(rdata->bytes); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 83867ef348df..d9b639b95fa8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -503,13 +503,16 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, /* convert the length into a more usable form */ if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { struct kvec iov; + int rc = 0; iov.iov_base = mid->resp_buf; iov.iov_len = len; /* FIXME: add code to kill session */ - if (cifs_verify_signature(&iov, 1, server, - mid->sequence_number + 1) != 0) - cERROR(1, "Unexpected SMB signature"); + rc = cifs_verify_signature(&iov, 1, server, + mid->sequence_number + 1); + if (rc) + cERROR(1, "SMB signature verification returned error = " + "%d", rc); } /* BB special case reconnect tid and uid here? */ -- cgit v1.2.3 From ea7b4887e7266b93fa0c203cc452a926a0fef4f0 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 17 Aug 2012 18:02:19 +0400 Subject: CIFS: Fix cifs_do_create error hadnling Commit d2c127197dfc0b2bae62a52e1e0d3e3ff493919e caused a regression in cifs_do_create error handling. Fix this by closing a file handle in the case of a get_inode_info(_unix) error. Also remove unnecessary checks for newinode being NULL. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/dir.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index cbe709ad6663..781025be48bc 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -356,19 +356,12 @@ cifs_create_get_file_info: cifs_create_set_dentry: if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); + CIFSSMBClose(xid, tcon, *fileHandle); goto out; } d_drop(direntry); d_add(direntry, newinode); - /* ENOENT for create? How weird... */ - rc = -ENOENT; - if (!newinode) { - CIFSSMBClose(xid, tcon, *fileHandle); - goto out; - } - rc = 0; - out: kfree(buf); kfree(full_path); -- cgit v1.2.3 From 7653f6ff4ebab2a094e65b60fb19ee66ed2f78e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:12:29 -0400 Subject: NFSv4: Ensure that nfs4_alloc_client cleans up on error. Any pointer that was allocated through nfs_alloc_client() needs to be freed via a call to nfs_free_client(). Reported-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cbcdfaf32505..24eb663f8ed5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error: - kfree(clp); + nfs_free_client(clp); return ERR_PTR(err); } -- cgit v1.2.3 From 086600430493e04b802bee6e5b3ce0458e4eb77f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:42:15 -0400 Subject: NFSv3: Ensure that do_proc_get_root() reports errors correctly If the rpc call to NFS3PROC_FSINFO fails, then we need to report that error so that the mount fails. Otherwise we can end up with a superblock with completely unusable values for block sizes, maxfilesize, etc. Reported-by: Yuanming Chen Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0952c791df36..d6b3b5f2d779 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); -- cgit v1.2.3 From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 19 Aug 2012 12:29:16 -0700 Subject: libceph: delay debugfs initialization until we learn global_id The debugfs directory includes the cluster fsid and our unique global_id. We need to delay the initialization of the debug entry until we have learned both the fsid and our global_id from the monitor or else the second client can't create its debugfs entry and will fail (and multiple client instances aren't properly reflected in debugfs). Reported by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Yehuda Sadeh --- fs/ceph/debugfs.c | 1 + net/ceph/ceph_common.c | 1 - net/ceph/debugfs.c | 4 ++++ net/ceph/mon_client.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 51 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); + BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69e38db28e5f..a8020293f342 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) return -1; } } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a01121..38b5dc1823d4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, client->monc.auth->global_id); + dout("ceph_debugfs_client_init %p %s\n", client, name); + + BUG_ON(client->debugfs_dir); client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); if (!client->debugfs_dir) goto out; @@ -234,6 +237,7 @@ out: void ceph_debugfs_client_cleanup(struct ceph_client *client) { + dout("ceph_debugfs_client_cleanup %p\n", client); debugfs_remove(client->debugfs_osdmap); debugfs_remove(client->debugfs_monmap); debugfs_remove(client->osdc.debugfs_file); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 105d533b55f3..900ea0f043fc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -310,6 +310,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_open_session); +/* + * We require the fsid and global_id in order to initialize our + * debugfs dir. + */ +static bool have_debugfs_info(struct ceph_mon_client *monc) +{ + dout("have_debugfs_info fsid %d globalid %lld\n", + (int)monc->client->have_fsid, monc->auth->global_id); + return monc->client->have_fsid && monc->auth->global_id > 0; +} + /* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. @@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_client *client = monc->client; struct ceph_monmap *monmap = NULL, *old = monc->monmap; void *p, *end; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); + dout("handle_monmap\n"); p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, if (!client->have_fsid) { client->have_fsid = true; + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); - /* - * do debugfs initialization without mutex to avoid - * creating a locking dependency - */ - ceph_debugfs_client_init(client); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } + goto out_unlocked; } out: @@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, { int ret; int was_auth = 0; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); if (monc->auth->ops) was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; @@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc, __send_subscribe(monc); __resend_generic_request(monc); } + + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } } static int __validate_auth(struct ceph_mon_client *monc) -- cgit v1.2.3 From 0e665d5d1125f9f4ccff56a75e814f10f88861a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Aug 2012 15:28:00 +0100 Subject: vfs: missed source of ->f_pos races compat_sys_{read,write}v() need the same "pass a copy of file->f_pos" thing as sys_{read,write}{,v}(). Signed-off-by: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/compat.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 6161255fac45..1bdb350ea5d3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } -- cgit v1.2.3 From 39307655a1effa8d913bba054c0e985bfaca808c Mon Sep 17 00:00:00 2001 From: J. Bruce Fields Date: Thu, 16 Aug 2012 17:01:21 -0400 Subject: nfsd4: fix security flavor of NFSv4.0 callback Commit d5497fc693a446ce9100fcf4117c3f795ddfd0d2 "nfsd4: move rq_flavor into svc_cred" forgot to remove cl_flavor from the client, leaving two places (cl_flavor and cl_cred.cr_flavor) for the flavor to be stored. After that patch, the latter was the one that was updated, but the former was the one that the callback used. Symptoms were a long delay on utime(). This is because the utime() generated a setattr which recalled a delegation, but the cb_recall was ignored by the client because it had the wrong security flavor. Cc: stable@vger.kernel.org Tested-by: Jamie Heilman Reported-by: Jamie Heilman Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 4 ++-- fs/nfsd/state.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cbaf4f8bb7b7..4c7bd35b1876 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; - args.authflavor = clp->cl_flavor; + args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { if (!conn->cb_xprt) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e6173147f982..22bd0a66c356 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -231,7 +231,6 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ - u32 cl_flavor; /* setclientid pseudoflavor */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ -- cgit v1.2.3 From 73e8712aa02d924844fbd5bd84a2445a1c3f68d7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 20 Aug 2012 14:10:00 +0300 Subject: UBIFS: remove stale commentary Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c3fa6c5327a3..71a197f0f93d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c) * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { -- cgit v1.2.3 From 11e3be0be2a1314e0861304857e7efcaed5d3e54 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 20 Aug 2012 15:16:24 +0300 Subject: UBIFS: fix crash on error path This patch fixes a regression introduced by "4994297 UBIFS: make ubifs_lpt_init clean-up in case of failure" which I've hit while running the 'integck -p' test. When remount the file-system from R/O mode to R/W mode and 'lpt_init_wr()' fails, we free _all_ LPT resources by calling 'ubifs_lpt_free(c, 0)', even those needed for R/O mode. This leads to subsequent crashes, e.g., if we try to unmount the file-system. Cc: stable@vger.kernel.org [v3.5+] Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ce33b2beb151..8640920766ed 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) return 0; out_err: - ubifs_lpt_free(c, 0); + if (wr) + ubifs_lpt_free(c, 1); + if (rd) + ubifs_lpt_free(c, 0); return err; } -- cgit v1.2.3 From c212f4020de7b5d35a71327d1483120a698d60a0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 21 Aug 2012 13:45:35 +0300 Subject: UBIFS: fix replay regression Commit "d51f17e UBIFS: simplify reply code a bit" introduces a bug with the following symptoms: UBIFS error (pid 1): replay_log_leb: first CS node at LEB 3:0 has wrong commit number 0 expected 1 The issue is that we start replaying the log from UBIFS_LOG_LNUM instead of c->lhead_lnum. This patch fixes that. Reported-by: Uwe Kleine-König Signed-off-by: Artem Bityutskiy --- fs/ubifs/replay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eba46d4a7619..94d78fc5d4e0 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c) c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - lnum = UBIFS_LOG_LNUM; do { err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) @@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; lnum = ubifs_next_log_lnum(c, lnum); - } while (lnum != UBIFS_LOG_LNUM); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) -- cgit v1.2.3 From 6c5e50fa614fea5325a2973be06f7ec6f1055316 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 15:55:25 -0700 Subject: ceph: tolerate (and warn on) extraneous dentry from mds If the MDS gives us a dentry and we weren't prepared to handle it, WARN_ON_ONCE instead of crashing. Reported-by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..4b5762ef7c2b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (rinfo->head->is_dentry) { struct inode *dir = req->r_locked_dir; - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, - &req->r_caps_reservation); - if (err < 0) - return err; + if (dir) { + err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1, + &req->r_caps_reservation); + if (err < 0) + return err; + } else { + WARN_ON_ONCE(1); + } } /* @@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, * will have trouble splicing in the virtual snapdir later */ if (rinfo->head->is_dentry && !req->r_aborted && + req->r_locked_dir && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { -- cgit v1.2.3 From 45f2e081f573526977abfa781a12728f83e9641f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 12:11:51 -0700 Subject: ceph: avoid divide by zero in __validate_layout() If "l->stripe_unit" is zero the the mod on the next line will cause a divide by zero bug. This comes from the copy_from_user() in ceph_ioctl_set_layout_policy(). Passing 0 is valid, though (it means "do not change") so avoid the % check in that case. Reported-by: Dan Carpenter Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8e3fb69fbe62..1396ceb46797 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc, /* validate striping parameters */ if ((l->object_size & ~PAGE_MASK) || (l->stripe_unit & ~PAGE_MASK) || - ((unsigned)l->object_size % (unsigned)l->stripe_unit)) + (l->stripe_unit != 0 && + ((unsigned)l->object_size % (unsigned)l->stripe_unit))) return -EINVAL; /* make sure it's a valid data pool */ -- cgit v1.2.3 From 65b455b123c7e2b835a0b7148f9bae584f95000e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 21 Aug 2012 21:50:58 +0300 Subject: UBIFS: fix complaints about too small debug buffer size When debugging is enabled, we use a temporary on-stack buffer for formatting the key strings like "(11368871, direntry, 0xcd0750)". The buffer size is 32 bytes and sometimes it is not enough to fit the key string - e.g., when inode numbers are high. This is not fatal, but the key strings are incomplete and UBIFS complains like this: UBIFS assert failed in dbg_snprintf_key at 137 (pid 1) This is a regression caused by "515315a UBIFS: fix key printing". Fix the issue by increasing the buffer to 48 bytes. Reported-by: Michael Hench Signed-off-by: Artem Bityutskiy Tested-by: Michael Hench Cc: stable@vger.kernel.org [v3.3+] --- fs/ubifs/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8b8cc4e945f4..760de723dadb 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -167,7 +167,7 @@ struct ubifs_global_debug_info { #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) -#define DBG_KEY_BUF_LEN 32 +#define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ -- cgit v1.2.3 From 98022748f6c7bce85b9f123fd4d1a621219dd8d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 22:42:36 -0400 Subject: eventpoll: use-after-possible-free in epoll_create1() As soon as we'd installed the file into descriptor table, it can get closed by another thread. Freeing ep in process... Signed-off-by: Al Viro --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..eedec84c1809 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) error = PTR_ERR(file); goto out_free_fd; } - fd_install(fd, file); ep->file = file; + fd_install(fd, file); return fd; out_free_fd: -- cgit v1.2.3 From 55852635a8e2803cbc22d0e143d727813f0fcdb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 17:39:25 -0700 Subject: fs: fix fs/namei.c kernel-doc warnings Fix kernel-doc warnings in fs/namei.c: Warning(fs/namei.c:360): No description found for parameter 'inode' Warning(fs/namei.c:672): No description found for parameter 'nd' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index db76b866a097..dd1ed1b8e98e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is -- cgit v1.2.3 From 69f9025894c391fec2f7c7ea9150203418454915 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Aug 2012 16:47:28 +0300 Subject: UBIFS: fix error messages spelling Corruptio -> corruption. Signed-off-by: Artem Bityutskiy --- fs/ubifs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c30d976b4be8..edeec499c048 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ - ubifs_err("corruptio %d", ret); + ubifs_err("corruption %d", ret); ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); -- cgit v1.2.3 From 676ce6d5ca3098339c028d44fe0427d1566a4d2d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 23 Aug 2012 12:17:36 +0200 Subject: block: replace __getblk_slow misfix by grow_dev_page fix Commit 91f68c89d8f3 ("block: fix infinite loop in __getblk_slow") is not good: a successful call to grow_buffers() cannot guarantee that the page won't be reclaimed before the immediate next call to __find_get_block(), which is why there was always a loop there. Yesterday I got "EXT4-fs error (device loop0): __ext4_get_inode_loc:3595: inode #19278: block 664: comm cc1: unable to read itable block" on console, which pointed to this commit. I've been trying to bisect for weeks, why kbuild-on-ext4-on-loop-on-tmpfs sometimes fails from a missing header file, under memory pressure on ppc G5. I've never seen this on x86, and I've never seen it on 3.5-rc7 itself, despite that commit being in there: bisection pointed to an irrelevant pinctrl merge, but hard to tell when failure takes between 18 minutes and 38 hours (but so far it's happened quicker on 3.6-rc2). (I've since found such __ext4_get_inode_loc errors in /var/log/messages from previous weeks: why the message never appeared on console until yesterday morning is a mystery for another day.) Revert 91f68c89d8f3, restoring __getblk_slow() to how it was (plus a checkpatch nitfix). Simplify the interface between grow_buffers() and grow_dev_page(), and avoid the infinite loop beyond end of device by instead checking init_page_buffers()'s end_block there (I presume that's more efficient than a repeated call to blkdev_max_block()), returning -ENXIO to __getblk_slow() in that case. And remove akpm's ten-year-old "__getblk() cannot fail ... weird" comment, but that is worrying: are all users of __getblk() really now prepared for a NULL bh beyond end of device, or will some oops?? Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org # 3.0 3.2 3.4 3.5 Signed-off-by: Jens Axboe --- fs/buffer.c | 66 ++++++++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..58e2e7b77372 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; - +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -999,7 +1007,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ -- cgit v1.2.3 From 0b9e3f6d84ce619f697bb622d9165cccaa93d67c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 31 Jul 2012 14:55:51 +1000 Subject: xfs: fix uninitialised variable in xfs_rtbuf_get() Results in this assert failure in generic/090: XFS: Assertion failed: *nmap >= 1, file: fs/xfs/xfs_bmap.c, line: 4363 ..... Call Trace: [] xfs_bmapi_read+0x6b/0x370 [] xfs_rtbuf_get+0x42/0x130 [] xfs_rtget_summary+0x89/0x120 [] xfs_rtallocate_extent_size+0xce/0x340 [] xfs_rtallocate_extent+0x240/0x290 [] xfs_bmap_rtalloc+0x1ba/0x340 [] xfs_bmap_alloc+0x35/0x40 [] xfs_bmapi_allocate+0xf1/0x350 [] xfs_bmapi_write+0x66e/0xa60 [] xfs_iomap_write_direct+0x22a/0x3f0 [] __xfs_get_blocks+0x38b/0x5d0 [] xfs_get_blocks_direct+0x14/0x20 [] do_blockdev_direct_IO+0xf71/0x1eb0 [] __blockdev_direct_IO+0x55/0x60 [] xfs_vm_direct_IO+0x11a/0x1e0 [] generic_file_direct_write+0xd7/0x1b0 [] xfs_file_dio_aio_write+0x13c/0x320 [] xfs_file_aio_write+0x1c2/0x1d0 [] do_sync_write+0xa7/0xe0 [] vfs_write+0xa8/0x160 [] sys_pwrite64+0x92/0xb0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_rtalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -857,7 +857,7 @@ xfs_rtbuf_get( xfs_buf_t *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; - int nmap; + int nmap = 1; int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; -- cgit v1.2.3 From 761290309939743ddf97e2bd94c6da18c6436b79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Aug 2012 02:02:02 -0400 Subject: xfs: unlock the AGI buffer when looping in xfs_dialloc Also update some commens in the area to make the code easier to read. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..5aceb3f8ecd6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -962,23 +962,22 @@ xfs_dialloc( if (!pag->pagi_freecount && !okalloc) goto nextag; + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) goto out_error; - /* - * Once the AGI has been read in we have to recheck - * pagi_freecount with the AGI buffer lock held. - */ if (pag->pagi_freecount) { xfs_perag_put(pag); goto out_alloc; } - if (!okalloc) { - xfs_trans_brelse(tp, agbp); - goto nextag; - } + if (!okalloc) + goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); if (error) { @@ -1007,6 +1006,8 @@ xfs_dialloc( return 0; } +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) -- cgit v1.2.3 From a672e1be30d5bc848cd0067c55ed29b2015b7c17 Mon Sep 17 00:00:00 2001 From: Tomas Racek Date: Tue, 14 Aug 2012 10:35:04 +0200 Subject: xfs: check for possible overflow in xfs_ioc_trim If range.start or range.minlen is bigger than filesystem size, return invalid value error. This fixes possible overflow in BTOBB macro when passed value was nearly ULLONG_MAX. Signed-off-by: Tomas Racek Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_discard.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,12 +179,14 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ + if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + return -XFS_ERROR(EINVAL); + start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) - return -XFS_ERROR(EINVAL); if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; -- cgit v1.2.3 From aa2ffd06168e25689e0eb9662bf4595ba2bbac14 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 26 Jul 2012 03:40:35 -0600 Subject: Btrfs: fix a misplaced address operator in a condition This should obviously not be "if (&flag)" but "if (flag)". Signed-off-by: Stefan Behrens --- fs/btrfs/locking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a44eff074805..2a1762c66041 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { if (eb->lock_nested) { read_lock(&eb->lock); - if (&eb->lock_nested && current->pid == eb->lock_owner) { + if (eb->lock_nested && current->pid == eb->lock_owner) { read_unlock(&eb->lock); return; } -- cgit v1.2.3 From 5986802c2fcc754040bb7ed95f30bb16c4a843b7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:16:10 -0600 Subject: Btrfs: fix some error codes in btrfs_qgroup_inherit() These are returning zero when it should be returning a negative error code. Signed-off-by: Dan Carpenter --- fs/btrfs/qgroup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bc424ae5a81a..229ef8927e6b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1369,8 +1369,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); - if (!srcgroup) + if (!srcgroup) { + ret = -EINVAL; goto unlock; + } dstgroup->rfer = srcgroup->rfer - level_size; dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; srcgroup->excl = level_size; @@ -1379,8 +1381,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, qgroup_dirty(fs_info, srcgroup); } - if (!inherit) + if (!inherit) { + ret = -EINVAL; goto unlock; + } i_qgroups = (u64 *)(inherit + 1); for (i = 0; i < inherit->num_qgroups; ++i) { -- cgit v1.2.3 From 57a5a882031dba5cb7bc7ebc955b897498365fe2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:15:43 -0600 Subject: Btrfs: checking for NULL instead of IS_ERR add_qgroup_rb() never returns NULL, only error pointers. Signed-off-by: Dan Carpenter --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 229ef8927e6b..38b42e7bc91d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); dstgroup = add_qgroup_rb(fs_info, objectid); - if (!dstgroup) + if (IS_ERR(dstgroup)) { + ret = PTR_ERR(dstgroup); goto unlock; + } if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); -- cgit v1.2.3 From 55e591ffde38e0088b022129e035e18a8d04c7e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:15:15 -0600 Subject: Btrfs: unlock on error in btrfs_delalloc_reserve_metadata() We should release this mutex before returning the error code. Signed-off-by: Dan Carpenter --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c47..45c69c4184c9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4571,8 +4571,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (root->fs_info->quota_enabled) { ret = btrfs_qgroup_reserve(root, num_bytes + nr_extents * root->leafsize); - if (ret) + if (ret) { + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); return ret; + } } ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); -- cgit v1.2.3 From dadd1105ca9a1e506c678e8e410e9623efdda821 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:10:44 -0600 Subject: Btrfs: fix some endian bugs handling the root times "trans->transid" is cpu endian but we want to store the data as little endian. "item->ctime.nsec" is only 32 bits, not 64. Signed-off-by: Dan Carpenter --- fs/btrfs/ioctl.c | 2 +- fs/btrfs/root-tree.c | 4 ++-- fs/btrfs/transaction.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 43f0012016e3..a1fbca0a1003 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root, uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); + root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6bb465cca20f..10d8e4d88071 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_times_lock); - item->ctransid = trans->transid; + item->ctransid = cpu_to_le64(trans->transid); item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le64(ct.tv_nsec); + item->ctime.nsec = cpu_to_le32(ct.tv_nsec); spin_unlock(&root->root_times_lock); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ac7cdcc294e..7208ada41e0e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1061,7 +1061,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item->parent_uuid, root->root_item.uuid, BTRFS_UUID_SIZE); new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); + new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); -- cgit v1.2.3 From eb838e73dc2121d2bae47d5678952cd7d48793b5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 31 Jul 2012 16:28:48 -0400 Subject: Btrfs: lock extents as we map them in DIO A deadlock in xfstests 113 was uncovered by commit d187663ef24cd3d033f0cbf2867e70b36a3a90b8 This is because we would not return EIOCBQUEUED for short AIO reads, instead we'd wait for the DIO to complete and then return the amount of data we transferred, which would allow our stuff to unlock the remaning amount. But with this change this no longer happens, so if we have a short AIO read (for example if we try to read past EOF), we could leave the section from EOF to the end of where we tried to read locked. Fixing this is tricky since there is no clear way to know exactly how much data DIO truly submitted for IO, so to make this less hard on ourselves and less combersome we need to lock the extents as we try to map them, and then we unlock any areas we didn't actually map. This makes us completely safe from deadlocks and reliance on a particular behavior of the DIO code. This also lays the groundwork for allowing us to use the normal csum storage method for reads which means we can remove an allocation. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 256 +++++++++++++++++++++++++++---------------------------- 1 file changed, 127 insertions(+), 129 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dac1fc21d809..09182449cbdf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5773,18 +5773,109 @@ out: return ret; } +static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, + struct extent_state **cached_state, int writing) +{ + struct btrfs_ordered_extent *ordered; + int ret = 0; + + while (1) { + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, + 0, cached_state); + /* + * We're concerned with the entire range that we're going to be + * doing DIO to, so we need to make sure theres no ordered + * extents in this range. + */ + ordered = btrfs_lookup_ordered_range(inode, lockstart, + lockend - lockstart + 1); + + /* + * We need to make sure there are no buffered pages in this + * range either, we could have raced between the invalidate in + * generic_file_direct_write and locking the extent. The + * invalidate needs to happen so that reads after a write do not + * get stale data. + */ + if (!ordered && (!writing || + !test_range_bit(&BTRFS_I(inode)->io_tree, + lockstart, lockend, EXTENT_UPTODATE, 0, + *cached_state))) + break; + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, + cached_state, GFP_NOFS); + + if (ordered) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } else { + /* Screw you mmap */ + ret = filemap_write_and_wait_range(inode->i_mapping, + lockstart, + lockend); + if (ret) + break; + + /* + * If we found a page that couldn't be invalidated just + * fall back to buffered. + */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + lockstart >> PAGE_CACHE_SHIFT, + lockend >> PAGE_CACHE_SHIFT); + if (ret) + break; + } + + cond_resched(); + } + + return ret; +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_state *cached_state = NULL; u64 start = iblock << inode->i_blkbits; + u64 lockstart, lockend; u64 len = bh_result->b_size; struct btrfs_trans_handle *trans; + int unlock_bits = EXTENT_LOCKED; + int ret; + + lockstart = start; + lockend = start + len - 1; + if (create) { + ret = btrfs_delalloc_reserve_space(inode, len); + if (ret) + return ret; + unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + } + + /* + * If this errors out it's because we couldn't invalidate pagecache for + * this range and we need to fallback to buffered. + */ + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) + return -ENOTBLK; + + if (create) { + ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, EXTENT_DELALLOC, NULL, + &cached_state, GFP_NOFS); + if (ret) + goto unlock_err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } /* * Ok for INLINE and COMPRESSED extents we need to fallback on buffered @@ -5803,17 +5894,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - return -ENOTBLK; + ret = -ENOTBLK; + goto unlock_err; } /* Just a good old fashioned hole, return */ if (!create && (em->block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { free_extent_map(em); - /* DIO will do one hole at a time, so just unlock a sector */ - unlock_extent(&BTRFS_I(inode)->io_tree, start, - start + root->sectorsize - 1); - return 0; + ret = 0; + goto unlock_err; } /* @@ -5826,8 +5916,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * */ if (!create) { - len = em->len - (start - em->start); - goto map; + len = min(len, em->len - (start - em->start)); + lockstart = start + len; + goto unlock; } if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || @@ -5859,7 +5950,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); - return ret; + goto unlock_err; } goto unlock; } @@ -5872,14 +5963,12 @@ must_cow: */ len = bh_result->b_size; em = btrfs_new_extent_direct(inode, em, start, len); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } len = min(len, em->len - (start - em->start)); unlock: - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, - EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, - 0, NULL, GFP_NOFS); -map: bh_result->b_blocknr = (em->block_start + (start - em->start)) >> inode->i_blkbits; bh_result->b_size = len; @@ -5897,9 +5986,28 @@ map: i_size_write(inode, start + len); } + /* + * In the case of write we need to clear and unlock the entire range, + * in the case of read we need to unlock only the end area that we + * aren't using if there is any left over space. + */ + if (lockstart < lockend) + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, 1, 0, &cached_state, GFP_NOFS); + else + free_extent_state(cached_state); + free_extent_map(em); return 0; + +unlock_err: + if (create) + unlock_bits |= EXTENT_DO_ACCOUNTING; + + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, 1, 0, &cached_state, GFP_NOFS); + return ret; } struct btrfs_dio_private { @@ -6340,132 +6448,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io out: return retval; } + static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct btrfs_ordered_extent *ordered; - struct extent_state *cached_state = NULL; - u64 lockstart, lockend; - ssize_t ret; - int writing = rw & WRITE; - int write_bits = 0; - size_t count = iov_length(iov, nr_segs); if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, - offset, nr_segs)) { + offset, nr_segs)) return 0; - } - - lockstart = offset; - lockend = offset + count - 1; - - if (writing) { - ret = btrfs_delalloc_reserve_space(inode, count); - if (ret) - goto out; - } - while (1) { - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, &cached_state); - /* - * We're concerned with the entire range that we're going to be - * doing DIO to, so we need to make sure theres no ordered - * extents in this range. - */ - ordered = btrfs_lookup_ordered_range(inode, lockstart, - lockend - lockstart + 1); - - /* - * We need to make sure there are no buffered pages in this - * range either, we could have raced between the invalidate in - * generic_file_direct_write and locking the extent. The - * invalidate needs to happen so that reads after a write do not - * get stale data. - */ - if (!ordered && (!writing || - !test_range_bit(&BTRFS_I(inode)->io_tree, - lockstart, lockend, EXTENT_UPTODATE, 0, - cached_state))) - break; - - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state, GFP_NOFS); - - if (ordered) { - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } else { - /* Screw you mmap */ - ret = filemap_write_and_wait_range(file->f_mapping, - lockstart, - lockend); - if (ret) - goto out; - - /* - * If we found a page that couldn't be invalidated just - * fall back to buffered. - */ - ret = invalidate_inode_pages2_range(file->f_mapping, - lockstart >> PAGE_CACHE_SHIFT, - lockend >> PAGE_CACHE_SHIFT); - if (ret) { - if (ret == -EBUSY) - ret = 0; - goto out; - } - } - - cond_resched(); - } - - /* - * we don't use btrfs_set_extent_delalloc because we don't want - * the dirty or uptodate bits - */ - if (writing) { - write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; - ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - EXTENT_DELALLOC, NULL, &cached_state, - GFP_NOFS); - if (ret) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, - lockend, EXTENT_LOCKED | write_bits, - 1, 0, &cached_state, GFP_NOFS); - goto out; - } - } - - free_extent_state(cached_state); - cached_state = NULL; - - ret = __blockdev_direct_IO(rw, iocb, inode, + return __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, 0); - - if (ret < 0 && ret != -EIOCBQUEUED) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { - /* - * We're falling back to buffered, unlock the section we didn't - * do IO on. - */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } -out: - free_extent_state(cached_state); - return ret; } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From 3627bf4503b504077332c13496cb1bd54713bcbb Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 1 Aug 2012 04:28:01 -0600 Subject: Btrfs: fix that error value is changed by mistake In iterate_inodes_from_logical() the error result from extent_from_logical() is patched by mistake. Typically ENOENT is patched to EINVAL because (-ENOENT & BTRFS_EXTENT_FLAG_TREE_BLOCK) evaluates to true. Signed-off-by: Stefan Behrens --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a256f3b2a845..ff6475f409d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, ret = extent_from_logical(fs_info, logical, path, &found_key); btrfs_release_path(path); - if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) - ret = -EINVAL; if (ret < 0) return ret; + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) + return -EINVAL; extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(fs_info, found_key.objectid, -- cgit v1.2.3 From aa9ddcd4b5557102fa25695c11904f249b4dec49 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 2 Aug 2012 10:22:20 -0400 Subject: Btrfs: do not use missing devices when showing devname If you do the following mkfs.btrfs /dev/sdb /dev/sdc rmmod btrfs dd if=/dev/zero of=/dev/sdb bs=1M count=1 mount -o degraded /dev/sdc /mnt/btrfs-test the box will panic trying to deref the name for the missing dev since it is the lower numbered devid. So fix show_devname to not use missing devices. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 75ee2c7791f0..2e06f124f284 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1505,6 +1505,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) while (cur_devices) { head = &cur_devices->devices; list_for_each_entry(dev, head, dev_list) { + if (dev->missing) + continue; if (!first_dev || dev->devid < first_dev->devid) first_dev = dev; } -- cgit v1.2.3 From 99f5944b8477914406173b47b4f261356286730b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 2 Aug 2012 10:23:59 -0400 Subject: Btrfs: do not strdup non existent strings When we close devices we add back empty devices for some reason that escapes me. In the case of a missing dev we don't allocate an rcu_string for it's name, so check to see if the device has a name and if it doesn't don't bother strdup()'ing it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b8708f994e67..3b394503bd4e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -569,9 +569,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) memcpy(new_device, device, sizeof(*new_device)); /* Safe because we are under uuid_mutex */ - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); + if (device->name) { + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(device->name && !name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); + } new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; -- cgit v1.2.3 From c329861da40623cd838b8c9ee31a850242fd88cf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 3 Aug 2012 16:49:19 -0400 Subject: Btrfs: don't allocate a seperate csums array for direct reads We've been allocating a big array for csums instead of storing them in the io_tree like we do for buffered reads because previously we were locking the entire range, so we didn't have an extent state for each sector of the range. But now that we do the range locking as we map the buffers we can limit the mapping lenght to sectorsize and use the private part of the io_tree for our csums. This allows us to avoid an extra memory allocation for direct reads which could incur latency. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 +- fs/btrfs/file-item.c | 4 ++-- fs/btrfs/inode.c | 45 ++++++++++++++++----------------------------- 3 files changed, 19 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index adb1cd7ceb9b..348196350bf0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3192,7 +3192,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset, u32 *dst); + struct bio *bio, u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b45b9de0c21d..857d93cd01dc 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset, u32 *dst) + struct bio *bio, u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); + return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 09182449cbdf..2d65c52b0944 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5847,15 +5847,18 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, int unlock_bits = EXTENT_LOCKED; int ret; - lockstart = start; - lockend = start + len - 1; if (create) { ret = btrfs_delalloc_reserve_space(inode, len); if (ret) return ret; unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + } else { + len = min_t(u64, len, root->sectorsize); } + lockstart = start; + lockend = start + len - 1; + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -6015,7 +6018,6 @@ struct btrfs_dio_private { u64 logical_offset; u64 disk_bytenr; u64 bytes; - u32 *csums; void *private; /* number of bios pending for this dio */ @@ -6035,7 +6037,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; u64 start; - u32 *private = dip->csums; start = dip->logical_offset; do { @@ -6043,8 +6044,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; + u64 private = ~(u32)0; unsigned long flags; + if (get_state_private(&BTRFS_I(inode)->io_tree, + start, &private)) + goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, @@ -6054,18 +6059,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != *private) { + if (csum != private) { +failed: printk(KERN_ERR "btrfs csum failed ino %llu off" " %llu csum %u private %u\n", (unsigned long long)btrfs_ino(inode), (unsigned long long)start, - csum, *private); + csum, (unsigned)private); err = -EIO; } } start += bvec->bv_len; - private++; bvec++; } while (bvec <= bvec_end); @@ -6073,7 +6078,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) dip->logical_offset + dip->bytes - 1); bio->bi_private = dip->private; - kfree(dip->csums); kfree(dip); /* If we had a csum failure make sure to clear the uptodate flag */ @@ -6179,7 +6183,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, - u32 *csums, int async_submit) + int async_submit) { int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -6212,8 +6216,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, - file_offset, csums); + ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); if (ret) goto err; } @@ -6239,10 +6242,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, u64 submit_len = 0; u64 map_length; int nr_pages = 0; - u32 *csums = dip->csums; int ret = 0; int async_submit = 0; - int write = rw & REQ_WRITE; map_length = orig_bio->bi_size; ret = btrfs_map_block(map_tree, READ, start_sector << 9, @@ -6278,16 +6279,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, atomic_inc(&dip->pending_bios); ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (ret) { bio_put(bio); atomic_dec(&dip->pending_bios); goto out_err; } - /* Write's use the ordered csums */ - if (!write && !skip_sum) - csums = csums + nr_pages; start_sector += submit_len >> 9; file_offset += submit_len; @@ -6317,7 +6315,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, submit: ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (!ret) return 0; @@ -6353,17 +6351,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ret = -ENOMEM; goto free_ordered; } - dip->csums = NULL; - - /* Write's use the ordered csum stuff, so we don't need dip->csums */ - if (!write && !skip_sum) { - dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); - if (!dip->csums) { - kfree(dip); - ret = -ENOMEM; - goto free_ordered; - } - } dip->private = bio->bi_private; dip->inode = inode; -- cgit v1.2.3 From 6209526531e70c080f79318ab8f50e26846c40a8 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 4 Aug 2012 01:45:02 -0600 Subject: btrfs: fix second lock in btrfs_delete_delayed_items() Fix a real bug caught by coccinelle. fs/btrfs/delayed-inode.c:1013:1-11: second lock on line 1013 Signed-off-by: Fengguang Wu --- fs/btrfs/delayed-inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8ceab..00deed4ef3ed 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1028,9 +1028,10 @@ do_again: btrfs_release_delayed_item(prev); ret = 0; btrfs_release_path(path); - if (curr) + if (curr) { + mutex_unlock(&node->mutex); goto do_again; - else + } else goto delete_fail; } -- cgit v1.2.3 From 1fa11e265fa2562fb713171b6a58e72bb7afd276 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 6 Aug 2012 14:18:51 -0600 Subject: Btrfs: fix deadlock in wait_for_more_refs Commit a168650c introduced a waiting mechanism to prevent busy waiting in btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations, where a tree_mod_seq is held while waiting for the io to complete, while the end_io calls btrfs_run_delayed_refs. This whole mechanism is unnecessary. If not enough runnable refs are available to satisfy count, just return as count is more like a guideline than a strict requirement. In case we have to run all refs, commit transaction makes sure that no other threads are working in the transaction anymore, so we just assert here that no refs are blocked. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 ---- fs/btrfs/ctree.h | 1 - fs/btrfs/delayed-ref.c | 8 ------ fs/btrfs/disk-io.c | 2 -- fs/btrfs/extent-tree.c | 77 ++++++++++++++------------------------------------ 5 files changed, 21 insertions(+), 73 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9d7621f271ff..08e0b11ba0a1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -420,12 +420,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, } spin_unlock(&fs_info->tree_mod_seq_lock); - /* - * we removed the lowest blocker from the blocker list, so there may be - * more processible delayed refs. - */ - wake_up(&fs_info->tree_mod_seq_wait); - /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 348196350bf0..c38734a07a65 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1252,7 +1252,6 @@ struct btrfs_fs_info { atomic_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; - wait_queue_head_t tree_mod_seq_wait; /* this protects tree_mod_log */ rwlock_t tree_mod_log_lock; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index da7419ed01bb..7561431af50d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -662,9 +662,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -713,9 +710,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -744,8 +738,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); - if (waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 502b20c56e84..a7ad8fc8dc53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2035,8 +2035,6 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; - init_waitqueue_head(&fs_info->tree_mod_seq_wait); - /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45c69c4184c9..d3df65f83b5c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2318,12 +2318,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; - /* - * we modified num_entries, but as we're currently running - * delayed refs, skip - * wake_up(&delayed_refs->seq_wait); - * here. - */ spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2350,22 +2344,6 @@ next: return count; } -static void wait_for_more_refs(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - unsigned long num_refs, - struct list_head *first_seq) -{ - spin_unlock(&delayed_refs->lock); - pr_debug("waiting for more refs (num %ld, first %p)\n", - num_refs, first_seq); - wait_event(fs_info->tree_mod_seq_wait, - num_refs != delayed_refs->num_entries || - fs_info->tree_mod_seq_list.next != first_seq); - pr_debug("done waiting for more refs (num %ld, first %p)\n", - delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); - spin_lock(&delayed_refs->lock); -} - #ifdef SCRAMBLE_DELAYED_REFS /* * Normally delayed refs get processed in ascending bytenr order. This @@ -2460,13 +2438,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; struct list_head cluster; - struct list_head *first_seq = NULL; int ret; u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; - unsigned long num_refs = 0; - int consider_waiting; + int loops; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2484,7 +2460,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: - consider_waiting = 0; + loops = 0; spin_lock(&delayed_refs->lock); #ifdef SCRAMBLE_DELAYED_REFS @@ -2512,31 +2488,6 @@ again: if (ret) break; - if (delayed_start >= delayed_refs->run_delayed_start) { - if (consider_waiting == 0) { - /* - * btrfs_find_ref_cluster looped. let's do one - * more cycle. if we don't run any delayed ref - * during that cycle (because we can't because - * all of them are blocked) and if the number of - * refs doesn't change, we avoid busy waiting. - */ - consider_waiting = 1; - num_refs = delayed_refs->num_entries; - first_seq = root->fs_info->tree_mod_seq_list.next; - } else { - wait_for_more_refs(root->fs_info, delayed_refs, - num_refs, first_seq); - /* - * after waiting, things have changed. we - * dropped the lock and someone else might have - * run some refs, built new clusters and so on. - * therefore, we restart staleness detection. - */ - consider_waiting = 0; - } - } - ret = run_clustered_refs(trans, root, &cluster); if (ret < 0) { spin_unlock(&delayed_refs->lock); @@ -2549,9 +2500,26 @@ again: if (count == 0) break; - if (ret || delayed_refs->run_delayed_start == 0) { + if (delayed_start >= delayed_refs->run_delayed_start) { + if (loops == 0) { + /* + * btrfs_find_ref_cluster looped. let's do one + * more cycle. if we don't run any delayed ref + * during that cycle (because we can't because + * all of them are blocked), bail out. + */ + loops = 1; + } else { + /* + * no runnable refs left, stop trying + */ + BUG_ON(run_all); + break; + } + } + if (ret) { /* refs were run, let's reset staleness detection */ - consider_waiting = 0; + loops = 0; } } @@ -5296,9 +5264,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) - wake_up(&root->fs_info->tree_mod_seq_wait); /* * we don't take a ref on the node because we're removing it from the -- cgit v1.2.3 From 66657b318e0e443ada229fccd40c8be86cfebdbf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Aug 2012 15:36:24 -0400 Subject: Btrfs: barrier before waitqueue_active We need a barrir before calling waitqueue_active otherwise we will miss wakeups. So in places that do atomic_dec(); then atomic_read() use atomic_dec_return() which imply a memory barrier (see memory-barriers.txt) and then add an explicit memory barrier everywhere else that need them. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/compression.c | 1 + fs/btrfs/delayed-inode.c | 7 +++---- fs/btrfs/disk-io.c | 7 ++++--- fs/btrfs/inode.c | 4 +--- fs/btrfs/volumes.c | 3 +-- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 86eff48dab78..43d1c5a3a030 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace) btrfs_compress_op[idx]->free_workspace(workspace); atomic_dec(alloc_workspace); wake: + smp_mb(); if (waitqueue_active(workspace_wait)) wake_up(workspace_wait); } diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 00deed4ef3ed..07d5eeb1e6f1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) rb_erase(&delayed_item->rb_node, root); delayed_item->delayed_node->count--; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && + if (atomic_dec_return(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); } @@ -1056,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) delayed_node->count--; delayed_root = delayed_node->root->fs_info->delayed_root; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < + if (atomic_dec_return(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7ad8fc8dc53..dd86a5d88428 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -754,9 +754,7 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; - atomic_dec(&fs_info->nr_async_submits); - - if (atomic_read(&fs_info->nr_async_submits) < limit && + if (atomic_dec_return(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); @@ -3783,14 +3781,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) /* FIXME: cleanup wait for commit */ t->in_commit = 1; t->blocked = 1; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); t->blocked = 0; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); t->commit_done = 1; + smp_mb(); if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d65c52b0944..97baf00b40d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1007,9 +1007,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; - atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); - - if (atomic_read(&root->fs_info->async_delalloc_pages) < + if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < 5 * 1024 * 1024 && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3b394503bd4e..0b1e69d380dd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -227,9 +227,8 @@ loop_lock: cur = pending; pending = pending->bi_next; cur->bi_next = NULL; - atomic_dec(&fs_info->nr_async_bios); - if (atomic_read(&fs_info->nr_async_bios) < limit && + if (atomic_dec_return(&fs_info->nr_async_bios) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); -- cgit v1.2.3 From 6fc823b10f333313deb0b5d9069cbfd3a3f99f3a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 6 Aug 2012 13:46:38 -0600 Subject: Btrfs: increase the size of the free space cache Arne was complaining about the space cache having mismatching generation numbers when debugging a deadlock. This is because we can run out of space in our preallocated range for our space cache if you have a pretty fragmented amount of space in your pinned space. So just increase the amount of space we preallocate for space cache so we can be sure to have enough space. This will only really affect data ranges since their the only chunks that end up larger than 256MB. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d3df65f83b5c..1bb408f737fb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2975,17 +2975,16 @@ again: } spin_unlock(&block_group->lock); - num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); + /* + * Try to preallocate enough space based on how big the block group is. + * Keep in mind this has to include any pinned space which could end up + * taking up quite a bit since it's not folded into the other space + * cache. + */ + num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); if (!num_pages) num_pages = 1; - /* - * Just to make absolutely sure we have enough space, we're going to - * preallocate 12 pages worth of space for each block group. In - * practice we ought to use at most 8, but we need extra space so we can - * add our header and have a terminator between the extents and the - * bitmaps. - */ num_pages *= 16; num_pages *= PAGE_CACHE_SIZE; -- cgit v1.2.3 From b12a3b1ea209d9dec02731fba58c3dbe7d31cfd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2012 15:34:49 -0400 Subject: Btrfs: don't run __tree_mod_log_free_eb on leaves When we split a leaf, we may end up inserting a new root on top of that leaf. The reflog code was incorrectly assuming the old root was always a node. This makes sure we skip over leaves. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 08e0b11ba0a1..6d183f60d63a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -625,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) u32 nritems; int ret; + if (btrfs_header_level(eb) == 0) + return; + nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { ret = tree_mod_log_insert_key_locked(fs_info, eb, i, -- cgit v1.2.3 From 22cd2e7de7b0bd68fb668d23e1564707ca689510 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Thu, 9 Aug 2012 00:16:53 -0600 Subject: Btrfs: fix race in run_clustered_refs With commit commit d1270cd91f308c9d22b2804720c36ccd32dbc35e Author: Arne Jansen Date: Tue Sep 13 15:16:43 2011 +0200 Btrfs: put back delayed refs that are too new I added a window where the delayed_ref's head->ref_mod code can diverge from the sum of the remaining refs, because we release the head->mutex in the middle. This leads to btrfs_lookup_extent_info returning wrong numbers. This patch fixes this by adjusting the head's ref_mod with each delayed ref we run. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1bb408f737fb..f16411d3c252 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2318,6 +2318,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; + if (locked_ref) { + /* + * when we play the delayed ref, also correct the + * ref_mod on head + */ + switch (ref->action) { + case BTRFS_ADD_DELAYED_REF: + case BTRFS_ADD_DELAYED_EXTENT: + locked_ref->node.ref_mod -= ref->ref_mod; + break; + case BTRFS_DROP_DELAYED_REF: + locked_ref->node.ref_mod += ref->ref_mod; + break; + default: + WARN_ON(1); + } + } spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, -- cgit v1.2.3 From c0f62dedd04ae0f3b8a18079db5a015af24e416f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 8 Aug 2012 21:39:36 -0600 Subject: Btrfs: fix wrong mtime and ctime when creating snapshots When we created a new snapshot, the mtime and ctime of its parent directory were not updated. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7208ada41e0e..3ee8d58e97ad 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1026,6 +1026,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, parent_root, parent_inode); if (ret) goto abort_trans_dput; -- cgit v1.2.3 From 5a24e84c55f57cc49bd1cab531b6ef28b6b7bdaa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 8 Aug 2012 10:12:59 -0600 Subject: Btrfs: fix enospc problems when deleting a subvol Subvol delete is a special kind of awful where we use the global reserve to cover the ENOSPC requirements. The problem is once we're done removing everything we do a btrfs_update_inode(), which by default will try to do the delayed update stuff which will use it's own reserve. There will be no space in this reserve and we'll return ENOSPC. So instead use btrfs_update_inode_fallback() which will just fallback to updating the inode item in the case of enospc. This is fine because the global reserve covers the space requirements for this. With this patch I can now delete a subvol on a problem image Dave Sterba sent me. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 97baf00b40d1..0808f483dafa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3171,7 +3171,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, dir); + ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, root, ret); out: -- cgit v1.2.3 From ae1e206b806ccc490dadff59af8a7a2477b32884 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Aug 2012 16:00:32 -0400 Subject: Btrfs: allow delayed refs to be merged Daniel Blueman reported a bug with fio+balance on a ramdisk setup. Basically what happens is the balance relocates a tree block which will drop the implicit refs for all of its children and adds a full backref. Once the block is relocated we have to add the implicit refs back, so when we cow the block again we add the implicit refs for its children back. The problem comes when the original drop ref doesn't get run before we add the implicit refs back. The delayed ref stuff will specifically prefer ADD operations over DROP to keep us from freeing up an extent that will have references to it, so we try to add the implicit ref before it is actually removed and we panic. This worked fine before because the add would have just canceled the drop out and we would have been fine. But the backref walking work needs to be able to freeze the delayed ref stuff in time so we have this ever increasing sequence number that gets attached to all new delayed ref updates which makes us not merge refs and we run into this issue. So to fix this we need to merge delayed refs. So everytime we run a clustered ref we need to try and merge all of its delayed refs. The backref walking stuff locks the delayed ref head before processing, so if we have it locked we are safe to merge any refs inside of the sequence number. If there is no sequence number we can merge all refs. Doing this not only fixes our bug but keeps the delayed ref code from adding and removing useless refs and batching together multiple refs into one search instead of one search per delayed ref, which will really help our commit times. I ran this with Daniels test and 276 and I haven't seen any problems. Thanks, Reported-by: Daniel J Blueman Signed-off-by: Josef Bacik --- fs/btrfs/delayed-ref.c | 155 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/delayed-ref.h | 4 ++ fs/btrfs/extent-tree.c | 10 ++++ 3 files changed, 142 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 7561431af50d..ae9411773397 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -38,17 +38,14 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, struct btrfs_delayed_tree_ref *ref1) { - if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } + if (ref1->root < ref2->root) + return -1; + if (ref1->root > ref2->root) + return 1; + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; return 0; } @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, * type of the delayed backrefs and content of delayed backrefs. */ static int comp_entry(struct btrfs_delayed_ref_node *ref2, - struct btrfs_delayed_ref_node *ref1) + struct btrfs_delayed_ref_node *ref1, + bool compare_seq) { if (ref1->bytenr < ref2->bytenr) return -1; @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, if (ref1->type > ref2->type) return 1; /* merging of sequenced refs is not allowed */ - if (ref1->seq < ref2->seq) - return -1; - if (ref1->seq > ref2->seq) - return 1; + if (compare_seq) { + if (ref1->seq < ref2->seq) + return -1; + if (ref1->seq > ref2->seq) + return 1; + } if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), @@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, rb_node); - cmp = comp_entry(entry, ins); + cmp = comp_entry(entry, ins, 1); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) @@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } +static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref) +{ + rb_erase(&ref->rb_node, &delayed_refs->root); + ref->in_tree = 0; + btrfs_put_delayed_ref(ref); + delayed_refs->num_entries--; + if (trans->delayed_ref_updates) + trans->delayed_ref_updates--; +} + +static int merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref, u64 seq) +{ + struct rb_node *node; + int merged = 0; + int mod = 0; + int done = 0; + + node = rb_prev(&ref->rb_node); + while (node) { + struct btrfs_delayed_ref_node *next; + + next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_prev(node); + if (next->bytenr != ref->bytenr) + break; + if (seq && next->seq >= seq) + break; + if (comp_entry(ref, next, 0)) + continue; + + if (ref->action == next->action) { + mod = next->ref_mod; + } else { + if (ref->ref_mod < next->ref_mod) { + struct btrfs_delayed_ref_node *tmp; + + tmp = ref; + ref = next; + next = tmp; + done = 1; + } + mod = -next->ref_mod; + } + + merged++; + drop_delayed_ref(trans, delayed_refs, next); + ref->ref_mod += mod; + if (ref->ref_mod == 0) { + drop_delayed_ref(trans, delayed_refs, ref); + break; + } else { + /* + * You can't have multiples of the same ref on a tree + * block. + */ + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); + } + + if (done) + break; + node = rb_prev(&ref->rb_node); + } + + return merged; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + struct rb_node *node; + u64 seq = 0; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + seq = elem->seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); + + node = rb_prev(&head->node.rb_node); + while (node) { + struct btrfs_delayed_ref_node *ref; + + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr != head->node.bytenr) + break; + + /* We can't merge refs that are outside of our seq count */ + if (seq && ref->seq >= seq) + break; + if (merge_ref(trans, delayed_refs, ref, seq)) + node = rb_prev(&head->node.rb_node); + else + node = rb_prev(node); + } +} + int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) @@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans, * every changing the extent allocation tree. */ existing->ref_mod--; - if (existing->ref_mod == 0) { - rb_erase(&existing->rb_node, - &delayed_refs->root); - existing->in_tree = 0; - btrfs_put_delayed_ref(existing); - delayed_refs->num_entries--; - if (trans->delayed_ref_updates) - trans->delayed_ref_updates--; - } else { + if (existing->ref_mod == 0) + drop_delayed_ref(trans, delayed_refs, existing); + else WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); - } } else { WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0d7c90c366b6..ab5300595847 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f16411d3c252..ba58024d40d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2251,6 +2251,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } } + /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + */ + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, + locked_ref); + /* * locked_ref is the head node, so we have to go one * node back for any delayed ref updates -- cgit v1.2.3 From 68ce9682a4bb95d6be5529cb57214bf2a1b7d20e Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 1 Aug 2012 05:45:52 -0600 Subject: Btrfs: remove superblock writing after fatal error With commit acce952b0, btrfs was changed to flag the filesystem with BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal error happened like a write I/O errors of all mirrors. In such situations, on unmount, the superblock is written in btrfs_error_commit_super(). This is done with the intention to be able to evaluate the error flag on the next mount. A warning is printed in this case during the next mount and the log tree is ignored. The issue is that it is possible that the superblock points to a root that was not written (due to write I/O errors). The result is that the filesystem cannot be mounted. btrfsck also does not start and all the other btrfs-progs tools fail to start as well. However, mount -o recovery is working well and does the right things to recover the filesystem (i.e., don't use the log root, clear the free space cache and use the next mountable root that is stored in the root backup array). This patch removes the writing of the superblock when BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error flag in the mount function. These lines can be used to reproduce the issue (using /dev/sdm): SCRATCH_DEV=/dev/sdm SCRATCH_MNT=/mnt echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup create foo ls -alLF /dev/mapper/foo mkfs.btrfs /dev/mapper/foo mount /dev/mapper/foo $SCRATCH_MNT echo bar > $SCRATCH_MNT/foo sync echo 0 25165824 error | dmsetup reload foo dmsetup resume foo ls -alF $SCRATCH_MNT touch $SCRATCH_MNT/1 ls -alF $SCRATCH_MNT sleep 35 echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup reload foo dmsetup resume foo sleep 1 umount $SCRATCH_MNT btrfsck /dev/mapper/foo dmsetup remove foo Signed-off-by: Stefan Behrens Signed-off-by: Jan Schmidt --- fs/btrfs/disk-io.c | 36 ++++-------------------------------- fs/btrfs/disk-io.h | 2 +- 2 files changed, 5 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dd86a5d88428..3c4c4397f470 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2527,8 +2527,7 @@ retry_root_backup: goto fail_trans_kthread; /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0 && - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { + if (btrfs_super_log_root(disk_super) != 0) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -3188,30 +3187,14 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(fs_info); - /* - * Here come 2 situations when btrfs is broken to flip readonly: - * - * 1. when btrfs flips readonly somewhere else before - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, - * and btrfs will skip to write sb directly to keep - * ERROR state on disk. - * - * 2. when btrfs flips readonly just in btrfs_commit_super, - * and in such case, btrfs cannot write sb via btrfs_commit_super, - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, - * btrfs will cleanup all FS resources first and write sb then. - */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - ret = btrfs_error_commit_super(root); - if (ret) - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); - } + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + btrfs_error_commit_super(root); btrfs_put_block_group_cache(fs_info); @@ -3433,18 +3416,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, if (read_only) return 0; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - printk(KERN_WARNING "warning: mount fs with errors, " - "running btrfsck is recommended\n"); - } - return 0; } -int btrfs_error_commit_super(struct btrfs_root *root) +void btrfs_error_commit_super(struct btrfs_root *root) { - int ret; - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -3454,10 +3430,6 @@ int btrfs_error_commit_super(struct btrfs_root *root) /* cleanup FS via transaction */ btrfs_cleanup_transaction(root); - - ret = write_ctree_super(NULL, root, 0); - - return ret; } static void btrfs_destroy_ordered_operations(struct btrfs_root *root) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95e147eea239..c5b00a735fef 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); -int btrfs_error_commit_super(struct btrfs_root *root); +void btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, -- cgit v1.2.3 From 5ee0844d6427e7338e0aba748f62b62d07ea2ed0 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 27 Aug 2012 08:30:03 -0600 Subject: Btrfs: revert checksum error statistic which can cause a BUG() Commit 442a4f6308e694e0fa6025708bd5e4e424bbf51c added btrfs device statistic counters for detected IO and checksum errors to Linux 3.5. The statistic part that counts checksum errors in end_bio_extent_readpage() can cause a BUG() in a subfunction: "kernel BUG at fs/btrfs/volumes.c:3762!" That part is reverted with the current patch. However, the counting of checksum errors in the scrub context remains active, and the counting of detected IO errors (read, write or flush errors) in all contexts remains active. Cc: stable # 3.5 Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 17 ++--------------- fs/btrfs/volumes.c | 22 ---------------------- fs/btrfs/volumes.h | 2 -- 3 files changed, 2 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e7c9ed6505b..49085f2336d2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2329,23 +2329,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); - if (ret) { - /* no IO indicated but software detected errors - * in the block, either checksum errors or - * issues with the contents */ - struct btrfs_root *root = - BTRFS_I(page->mapping->host)->root; - struct btrfs_device *device; - + if (ret) uptodate = 0; - device = btrfs_find_device_for_logical( - root, start, mirror); - if (device) - btrfs_dev_stat_inc_and_print(device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - } else { + else clean_io_failure(start, page); - } } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0b1e69d380dd..3f4e70e171ed 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4610,28 +4610,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) return ret; } -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num) -{ - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - int ret; - u64 map_length = 0; - struct btrfs_bio *bbio = NULL; - struct btrfs_device *device; - - BUG_ON(mirror_num == 0); - ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, - mirror_num); - if (ret) { - BUG_ON(bbio != NULL); - return NULL; - } - BUG_ON(mirror_num != bbio->mirror_num); - device = bbio->stripes[mirror_num - 1].dev; - kfree(bbio); - return device; -} - int btrfs_read_chunk_tree(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5479325987b3..53c06af92e8d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num); void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, -- cgit v1.2.3 From bd7de2c9a449e26a5493d918618eb20ae60d56bd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Aug 2012 12:53:03 -0600 Subject: Btrfs: fix deadlock with freeze and sync V2 We can deadlock with freeze right now because we unconditionally start a transaction in our ->sync_fs() call. To fix this just check and see if we have a running transaction to commit. This saves us from the deadlock because at this point we'll have the umount sem for the sb so we're safe from freezes coming in after we've done our check. With this patch the freeze xfstests no longer deadlocks. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e06f124f284..073c2368f459 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -813,7 +813,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - int ret; trace_btrfs_sync_fs(wait); @@ -824,11 +823,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_wait_ordered_extents(root, 0, 0); - trans = btrfs_start_transaction(root, 0); + spin_lock(&fs_info->trans_lock); + if (!fs_info->running_transaction) { + spin_unlock(&fs_info->trans_lock); + return 0; + } + spin_unlock(&fs_info->trans_lock); + + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - return ret; + return btrfs_commit_transaction(trans, root); } static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) -- cgit v1.2.3 From 24c03fa5cf3d02c327cf9f2fc39f72664b1bd7e1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 22 Aug 2012 20:10:38 -0600 Subject: Btrfs: fix a dio write regression This bug is introduced by commit 3b8bde746f6f9bd36a9f05f5f3b6e334318176a9 (Btrfs: lock extents as we map them in DIO). In dio write, we should unlock the section which we didn't do IO on in case that we fall back to buffered write. But we need to not only unlock the section but also cleanup reserved space for the section. This bug was found while running xfstests 133, with this 133 no longer complains. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0808f483dafa..38cda78de5e4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5992,11 +5992,27 @@ unlock: * in the case of read we need to unlock only the end area that we * aren't using if there is any left over space. */ - if (lockstart < lockend) - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - unlock_bits, 1, 0, &cached_state, GFP_NOFS); - else + if (lockstart < lockend) { + if (create && len < lockend - lockstart) { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockstart + len - 1, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + /* + * Beside unlock, we also need to cleanup reserved space + * for the left range by attaching EXTENT_DO_ACCOUNTING. + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, + lockstart + len, lockend, + unlock_bits | EXTENT_DO_ACCOUNTING, + 1, 0, NULL, GFP_NOFS); + } else { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + } + } else { free_extent_state(cached_state); + } free_extent_map(em); -- cgit v1.2.3 From d280e5be940931c84bb2e9831ead9d02bc785484 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Aug 2012 21:13:25 -0600 Subject: Btrfs: fix ordered extent leak when failing to start a transaction We cannot just return error before freeing ordered extent and releasing reserved space when we fail to start a transacion. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 38cda78de5e4..6ba80b902877 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1882,8 +1882,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans = btrfs_join_transaction_nolock(root); else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) /* -ENOMEM or corruption */ -- cgit v1.2.3 From 256dd1bb3750ac5ad49b40887c1691788dc44b33 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 10 Aug 2012 08:58:21 -0600 Subject: Btrfs: fix that repair code is spuriously executed for transid failures If verify_parent_transid() fails for all mirrors, the current code calls repair_io_failure() anyway which means: - that the disk block is rewritten without repairing anything and - that a kernel log message is printed which misleadingly claims that a read error was corrected. This is an example: parent transid verify failed on 615015833600 wanted 110423 found 110424 parent transid verify failed on 615015833600 wanted 110423 found 110424 btrfs read error corrected: ino 1 off 615015833600 (dev /dev/...) It is wrong to ignore the results from verify_parent_transid() and to call repair_eb_io_failure() when the verification of the transids failed. This commit fixes the issue. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3c4c4397f470..29c69e60d3b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, + if (!ret) { + if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; + break; + else + ret = -EIO; + } /* * This buffer's crc is fine, but its contents are corrupted, so -- cgit v1.2.3