aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/auth_x.c15
-rw-r--r--fs/ceph/caps.c32
-rw-r--r--fs/ceph/debugfs.c4
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c5
-rw-r--r--fs/ceph/locks.c14
-rw-r--r--fs/ceph/mds_client.c101
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/osd_client.c2
-rw-r--r--fs/ceph/snap.c89
-rw-r--r--fs/ceph/super.h11
-rw-r--r--fs/ceph/xattr.c1
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/asn1.c6
-rw-r--r--fs/cifs/cifs_unicode.h18
-rw-r--r--fs/cifs/cifs_uniupr.h16
-rw-r--r--fs/cifs/cifsencrypt.c475
-rw-r--r--fs/cifs/cifsglob.h25
-rw-r--r--fs/cifs/cifspdu.h7
-rw-r--r--fs/cifs/cifsproto.h12
-rw-r--r--fs/cifs/cifssmb.c13
-rw-r--r--fs/cifs/connect.c17
-rw-r--r--fs/cifs/dir.c157
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/ntlmssp.h13
-rw-r--r--fs/cifs/sess.c132
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/nfsd/nfs4state.c26
-rw-r--r--fs/nfsd/state.h14
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c42
-rw-r--r--fs/xfs/xfs_fsops.c31
-rw-r--r--fs/xfs/xfs_fsops.h2
-rw-r--r--fs/xfs/xfs_ialloc.c16
-rw-r--r--fs/xfs/xfs_inode.c49
-rw-r--r--fs/xfs/xfs_log.c7
-rw-r--r--fs/xfs/xfs_log_cil.c263
-rw-r--r--fs/xfs/xfs_log_priv.h13
-rw-r--r--fs/xfs/xfs_trans.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h3
44 files changed, 1089 insertions, 613 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5598a0d02295..4cfce1ee31fa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
/* dirty the head */
spin_lock(&inode->i_lock);
- if (ci->i_wrbuffer_ref_head == 0)
+ if (ci->i_head_snapc == NULL)
ci->i_head_snapc = ceph_get_snap_context(snapc);
++ci->i_wrbuffer_ref_head;
if (ci->i_wrbuffer_ref == 0)
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page)
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(!PageUptodate(page));
-
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
+ account_page_dirtied(page, page->mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
break;
}
}
- if (!snapc && ci->i_head_snapc) {
+ if (!snapc && ci->i_wrbuffer_ref_head) {
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 582e0b2caf8a..a2d002cbdec2 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
th = get_ticket_handler(ac, service);
- if (!th) {
+ if (IS_ERR(th)) {
*pneed |= service;
continue;
}
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th =
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
ceph_x_validate_tickets(ac, &need);
dout("build_request want %x have %x need %x\n",
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
return -ERANGE;
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
- BUG_ON(!th);
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
if (ret)
return ret;
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
case CEPHX_GET_PRINCIPAL_SESSION_KEY:
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
- BUG_ON(!th);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
buf + sizeof(*head), end);
break;
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
void *end = p + sizeof(au->reply_buf);
th = get_ticket_handler(ac, au->service);
- if (!th)
- return -EIO; /* hrm! */
+ if (IS_ERR(th))
+ return PTR_ERR(th);
ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
if (ret < 0)
return ret;
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
- if (th && !IS_ERR(th))
+ if (!IS_ERR(th))
remove_ticket_handler(ac, th);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bf182b03973..a2069b6680ae 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
gid_t gid;
struct ceph_mds_session *session;
u64 xattr_version = 0;
+ struct ceph_buffer *xattr_blob = NULL;
int delayed = 0;
u64 flush_tid = 0;
int i;
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
for (i = 0; i < CEPH_CAP_BITS; i++)
if (flushing & (1 << i))
ci->i_cap_flush_tid[i] = flush_tid;
+
+ follows = ci->i_head_snapc->seq;
+ } else {
+ follows = 0;
}
keep = cap->implemented;
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
mtime = inode->i_mtime;
atime = inode->i_atime;
time_warp_seq = ci->i_time_warp_seq;
- follows = ci->i_snap_realm->cached_context->seq;
uid = inode->i_uid;
gid = inode->i_gid;
mode = inode->i_mode;
- if (dropping & CEPH_CAP_XATTR_EXCL) {
+ if (flushing & CEPH_CAP_XATTR_EXCL) {
__ceph_build_xattrs_blob(ci);
- xattr_version = ci->i_xattrs.version + 1;
+ xattr_blob = ci->i_xattrs.blob;
+ xattr_version = ci->i_xattrs.version;
}
spin_unlock(&inode->i_lock);
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
- uid, gid, mode,
- xattr_version,
- (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL,
+ uid, gid, mode, xattr_version, xattr_blob,
follows);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
@@ -1282,7 +1285,7 @@ retry:
&capsnap->mtime, &capsnap->atime,
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
- 0, NULL,
+ capsnap->xattr_version, capsnap->xattr_blob,
capsnap->follows);
next_follows = capsnap->follows + 1;
@@ -1332,7 +1335,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ceph_cap_string(was | mask));
ci->i_dirty_caps |= mask;
if (was == 0) {
- dout(" inode %p now dirty\n", &ci->vfs_inode);
+ if (!ci->i_head_snapc)
+ ci->i_head_snapc = ceph_get_snap_context(
+ ci->i_snap_realm->cached_context);
+ dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
+ ci->i_head_snapc);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -2190,7 +2197,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
if (ci->i_head_snapc == snapc) {
ci->i_wrbuffer_ref_head -= nr;
- if (!ci->i_wrbuffer_ref_head) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
+ BUG_ON(!ci->i_head_snapc);
ceph_put_snap_context(ci->i_head_snapc);
ci->i_head_snapc = NULL;
}
@@ -2483,6 +2492,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
dout(" inode %p now clean\n", inode);
BUG_ON(!list_empty(&ci->i_dirty_item));
drop = 1;
+ if (ci->i_wrbuffer_ref_head == 0) {
+ BUG_ON(!ci->i_head_snapc);
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
} else {
BUG_ON(list_empty(&ci->i_dirty_item));
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 360c4f22718d..6fd8b20a8611 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p)
} else if (req->r_dentry) {
path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
&pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
spin_lock(&req->r_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
ceph_ino(req->r_dentry->d_parent->d_inode),
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p)
if (req->r_old_dentry) {
path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
&pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
ceph_ino(req->r_old_dentry->d_parent->d_inode),
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 67bbb41d5526..6e4f43ff23ec 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry)
else
dentry->d_op = &ceph_snap_dentry_ops;
- di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS);
+ di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
return -ENOMEM; /* oh well */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5d893d31e399..e7cca414da03 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode,
if (ci->i_files == 0 && ci->i_subdirs == 0 &&
ceph_snap(inode) == CEPH_NOSNAP &&
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
ci->i_ceph_flags |= CEPH_I_COMPLETE;
@@ -1229,11 +1230,11 @@ retry_lookup:
in = dn->d_inode;
} else {
in = ceph_get_inode(parent->d_sb, vino);
- if (in == NULL) {
+ if (IS_ERR(in)) {
dout("new_inode badness\n");
d_delete(dn);
dput(dn);
- err = -ENOMEM;
+ err = PTR_ERR(in);
goto out;
}
dn = splice_dentry(dn, in, NULL);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae85af06454f..ff4e753aae92 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
if (!err) {
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
/* undo! This should only happen if the kernel detects
* local deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
dout("got %d on posix_lock_file, undid lock", err);
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ file, (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
if (!err) {
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
file, (u64)fl->fl_pid,
- (u64)fl->fl_nspid,
+ (u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
dout("got %d on flock_lock_file_wait, undid lock", err);
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock,
cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
cephlock->client = cpu_to_le64(0);
cephlock->pid = cpu_to_le64(lock->fl_pid);
- cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid);
+ cephlock->pid_namespace =
+ cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
switch (lock->fl_type) {
case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a75ddbf9fe37..f091b1351786 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
+struct dentry *get_nonsnap_parent(struct dentry *dentry)
+{
+ while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ dentry = dentry->d_parent;
+ return dentry;
+}
+
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) {
inode = req->r_inode;
} else if (req->r_dentry) {
- if (req->r_dentry->d_inode) {
+ struct inode *dir = req->r_dentry->d_parent->d_inode;
+
+ if (dir->i_sb != mdsc->client->sb) {
+ /* not this fs! */
+ inode = req->r_dentry->d_inode;
+ } else if (ceph_snap(dir) != CEPH_NOSNAP) {
+ /* direct snapped/virtual snapdir requests
+ * based on parent dir inode */
+ struct dentry *dn =
+ get_nonsnap_parent(req->r_dentry->d_parent);
+ inode = dn->d_inode;
+ dout("__choose_mds using nonsnap parent %p\n", inode);
+ } else if (req->r_dentry->d_inode) {
+ /* dentry target */
inode = req->r_dentry->d_inode;
} else {
- inode = req->r_dentry->d_parent->d_inode;
+ /* dir + name */
+ inode = dir;
hash = req->r_dentry->d_name.hash;
is_hash = true;
}
}
+
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
(int)hash, mode);
if (!inode)
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session,
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
wake = 1; /* for good measure */
- complete_all(&mdsc->session_close_waiters);
+ wake_up_all(&mdsc->session_close_wq);
kick_requests(mdsc, mds);
break;
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- BUG_ON(err);
+ goto out_dput;
}
} else {
path = NULL;
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
err = ceph_pagelist_encode_string(pagelist, path, pathlen);
if (err)
- goto out;
+ goto out_free;
spin_lock(&inode->i_lock);
cap->seq = 0; /* reset cap seq */
@@ -2354,8 +2376,9 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
unlock_kernel();
}
-out:
+out_free:
kfree(path);
+out_dput:
dput(dentry);
return err;
}
@@ -2876,7 +2899,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
return -ENOMEM;
init_completion(&mdsc->safe_umount_waiters);
- init_completion(&mdsc->session_close_waiters);
+ init_waitqueue_head(&mdsc->session_close_wq);
INIT_LIST_HEAD(&mdsc->waiting_for_map);
mdsc->sessions = NULL;
mdsc->max_sessions = 0;
@@ -3021,6 +3044,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
}
+/*
+ * true if all sessions are closed, or we force unmount
+ */
+bool done_closing_sessions(struct ceph_mds_client *mdsc)
+{
+ int i, n = 0;
+
+ if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+ return true;
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++)
+ if (mdsc->sessions[i])
+ n++;
+ mutex_unlock(&mdsc->mutex);
+ return n == 0;
+}
/*
* called after sb is ro.
@@ -3029,45 +3069,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
struct ceph_mds_session *session;
int i;
- int n;
struct ceph_client *client = mdsc->client;
- unsigned long started, timeout = client->mount_args->mount_timeout * HZ;
+ unsigned long timeout = client->mount_args->mount_timeout * HZ;
dout("close_sessions\n");
- mutex_lock(&mdsc->mutex);
-
/* close sessions */
- started = jiffies;
- while (time_before(jiffies, started + timeout)) {
- dout("closing sessions\n");
- n = 0;
- for (i = 0; i < mdsc->max_sessions; i++) {
- session = __ceph_lookup_mds_session(mdsc, i);
- if (!session)
- continue;
- mutex_unlock(&mdsc->mutex);
- mutex_lock(&session->s_mutex);
- __close_session(mdsc, session);
- mutex_unlock(&session->s_mutex);
- ceph_put_mds_session(session);
- mutex_lock(&mdsc->mutex);
- n++;
- }
- if (n == 0)
- break;
-
- if (client->mount_state == CEPH_MOUNT_SHUTDOWN)
- break;
-
- dout("waiting for sessions to close\n");
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
mutex_unlock(&mdsc->mutex);
- wait_for_completion_timeout(&mdsc->session_close_waiters,
- timeout);
+ mutex_lock(&session->s_mutex);
+ __close_session(mdsc, session);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
mutex_lock(&mdsc->mutex);
}
+ mutex_unlock(&mdsc->mutex);
+
+ dout("waiting for sessions to close\n");
+ wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
+ timeout);
/* tear down remaining sessions */
+ mutex_lock(&mdsc->mutex);
for (i = 0; i < mdsc->max_sessions; i++) {
if (mdsc->sessions[i]) {
session = get_session(mdsc->sessions[i]);
@@ -3080,9 +3107,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
mutex_lock(&mdsc->mutex);
}
}
-
WARN_ON(!list_empty(&mdsc->cap_delay_list));
-
mutex_unlock(&mdsc->mutex);
ceph_cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ab7e89f5e344..c98267ce6d2a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -234,7 +234,8 @@ struct ceph_mds_client {
struct mutex mutex; /* all nested structures */
struct ceph_mdsmap *mdsmap;
- struct completion safe_umount_waiters, session_close_waiters;
+ struct completion safe_umount_waiters;
+ wait_queue_head_t session_close_wq;
struct list_head waiting_for_map;
struct ceph_mds_session **sessions; /* NULL for mds if no session */
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index bed6391e52c7..dfced1dacbcd 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc,
reqhead->reassert_version = req->r_reassert_version;
req->r_stamp = jiffies;
- list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
+ list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
ceph_msg_get(req->r_request); /* send consumes a ref */
ceph_con_send(&req->r_osd->o_con, req->r_request);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c0b26b6badba..4868b9dcac5a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -435,7 +435,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap;
- int used;
+ int used, dirty;
capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
if (!capsnap) {
@@ -445,6 +445,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
if (__ceph_have_pending_cap_snap(ci)) {
/* there is no point in queuing multiple "pending" cap_snaps,
as no new writes are allowed to start when pending, so any
@@ -452,11 +453,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
- } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
+ } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
+ (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+ CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
+ dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
+ capsnap, snapc);
igrab(inode);
-
+
atomic_set(&capsnap->nref, 1);
capsnap->ci = ci;
INIT_LIST_HEAD(&capsnap->ci_item);
@@ -464,15 +469,21 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->follows = snapc->seq - 1;
capsnap->issued = __ceph_caps_issued(ci, NULL);
- capsnap->dirty = __ceph_caps_dirty(ci);
+ capsnap->dirty = dirty;
capsnap->mode = inode->i_mode;
capsnap->uid = inode->i_uid;
capsnap->gid = inode->i_gid;
- /* fixme? */
- capsnap->xattr_blob = NULL;
- capsnap->xattr_len = 0;
+ if (dirty & CEPH_CAP_XATTR_EXCL) {
+ __ceph_build_xattrs_blob(ci);
+ capsnap->xattr_blob =
+ ceph_buffer_get(ci->i_xattrs.blob);
+ capsnap->xattr_version = ci->i_xattrs.version;
+ } else {
+ capsnap->xattr_blob = NULL;
+ capsnap->xattr_version = 0;
+ }
/* dirty page count moved from _head to this cap_snap;
all subsequent writes page dirties occur _after_ this
@@ -480,7 +491,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
ci->i_wrbuffer_ref_head = 0;
capsnap->context = snapc;
- ci->i_head_snapc = NULL;
+ ci->i_head_snapc =
+ ceph_get_snap_context(ci->i_snap_realm->cached_context);
+ dout(" new snapc is %p\n", ci->i_head_snapc);
list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
if (used & CEPH_CAP_FILE_WR) {
@@ -539,6 +552,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
return 1; /* caller may want to ceph_flush_snaps */
}
+/*
+ * Queue cap_snaps for snap writeback for this realm and its children.
+ * Called under snap_rwsem, so realm topology won't change.
+ */
+static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
+{
+ struct ceph_inode_info *ci;
+ struct inode *lastinode = NULL;
+ struct ceph_snap_realm *child;
+
+ dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
+
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_for_each_entry(ci, &realm->inodes_with_caps,
+ i_snap_realm_item) {
+ struct inode *inode = igrab(&ci->vfs_inode);
+ if (!inode)
+ continue;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+ lastinode = inode;
+ ceph_queue_cap_snap(ci);
+ spin_lock(&realm->inodes_with_caps_lock);
+ }
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+
+ dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
+ list_for_each_entry(child, &realm->children, child_item)
+ queue_realm_cap_snaps(child);
+
+ dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
+}
/*
* Parse and apply a snapblob "snap trace" from the MDS. This specifies
@@ -589,29 +637,8 @@ more:
*
* ...unless it's a snap deletion!
*/
- if (!deletion) {
- struct ceph_inode_info *ci;
- struct inode *lastinode = NULL;
-
- spin_lock(&realm->inodes_with_caps_lock);
- list_for_each_entry(ci, &realm->inodes_with_caps,
- i_snap_realm_item) {
- struct inode *inode = igrab(&ci->vfs_inode);
- if (!inode)
- continue;
- spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
- lastinode = inode;
- ceph_queue_cap_snap(ci);
- spin_lock(&realm->inodes_with_caps_lock);
- }
- spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
- dout("update_snap_trace cap_snaps queued\n");
- }
-
+ if (!deletion)
+ queue_realm_cap_snaps(realm);
} else {
dout("update_snap_trace %llx %p seq %lld unchanged\n",
realm->ino, realm, realm->seq);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2482d696f0de..c33897ae5725 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -216,8 +216,7 @@ struct ceph_cap_snap {
uid_t uid;
gid_t gid;
- void *xattr_blob;
- int xattr_len;
+ struct ceph_buffer *xattr_blob;
u64 xattr_version;
u64 size;
@@ -229,8 +228,11 @@ struct ceph_cap_snap {
static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
{
- if (atomic_dec_and_test(&capsnap->nref))
+ if (atomic_dec_and_test(&capsnap->nref)) {
+ if (capsnap->xattr_blob)
+ ceph_buffer_put(capsnap->xattr_blob);
kfree(capsnap);
+ }
}
/*
@@ -342,7 +344,8 @@ struct ceph_inode_info {
unsigned i_cap_exporting_issued;
struct ceph_cap_reservation i_cap_migration_resv;
struct list_head i_cap_snaps; /* snapped state pending flush to mds */
- struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */
+ struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
+ dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */
int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 097a2654c00f..9578af610b73 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
ci->i_xattrs.prealloc_blob = NULL;
ci->i_xattrs.dirty = false;
+ ci->i_xattrs.version++;
}
}
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0da1debd499d 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,8 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
+ select CRYPTO_MD5
+ select CRYPTO_ARC4
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cfd1ce34e0bc..21f0fbd86989 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
if (compare_oid(oid, oidlen, MSKRB5_OID,
MSKRB5_OID_LEN))
server->sec_mskerberos = true;
- else if (compare_oid(oid, oidlen, KRB5U2U_OID,
+ if (compare_oid(oid, oidlen, KRB5U2U_OID,
KRB5U2U_OID_LEN))
server->sec_kerberosu2u = true;
- else if (compare_oid(oid, oidlen, KRB5_OID,
+ if (compare_oid(oid, oidlen, KRB5_OID,
KRB5_OID_LEN))
server->sec_kerberos = true;
- else if (compare_oid(oid, oidlen, NTLMSSP_OID,
+ if (compare_oid(oid, oidlen, NTLMSSP_OID,
NTLMSSP_OID_LEN))
server->sec_ntlmssp = true;
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 650638275a6f..7fe6b52df507 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -30,6 +30,8 @@
* This is a compressed table of upper and lower case conversion.
*
*/
+#ifndef _CIFS_UNICODE_H
+#define _CIFS_UNICODE_H
#include <asm/byteorder.h>
#include <linux/types.h>
@@ -67,8 +69,8 @@ extern const struct UniCaseRange CifsUniUpperRange[];
#endif /* UNIUPR_NOUPPER */
#ifndef UNIUPR_NOLOWER
-extern signed char UniLowerTable[512];
-extern struct UniCaseRange UniLowerRange[];
+extern signed char CifsUniLowerTable[512];
+extern const struct UniCaseRange CifsUniLowerRange[];
#endif /* UNIUPR_NOLOWER */
#ifdef __KERNEL__
@@ -337,15 +339,15 @@ UniStrupr(register wchar_t *upin)
* UniTolower: Convert a unicode character to lower case
*/
static inline wchar_t
-UniTolower(wchar_t uc)
+UniTolower(register wchar_t uc)
{
- register struct UniCaseRange *rp;
+ register const struct UniCaseRange *rp;
- if (uc < sizeof(UniLowerTable)) {
+ if (uc < sizeof(CifsUniLowerTable)) {
/* Latin characters */
- return uc + UniLowerTable[uc]; /* Use base tables */
+ return uc + CifsUniLowerTable[uc]; /* Use base tables */
} else {
- rp = UniLowerRange; /* Use range tables */
+ rp = CifsUniLowerRange; /* Use range tables */
while (rp->start) {
if (uc < rp->start) /* Before start of range */
return uc; /* Uppercase = input */
@@ -374,3 +376,5 @@ UniStrlwr(register wchar_t *upin)
}
#endif
+
+#endif /* _CIFS_UNICODE_H */
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h
index 18a9d978e519..0ac7c5a8633a 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/cifs/cifs_uniupr.h
@@ -140,7 +140,7 @@ const struct UniCaseRange CifsUniUpperRange[] = {
/*
* Latin lower case
*/
-static signed char CifsUniLowerTable[512] = {
+signed char CifsUniLowerTable[512] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
@@ -242,12 +242,12 @@ static signed char UniCaseRangeLff20[27] = {
/*
* Lower Case Range
*/
-static const struct UniCaseRange CifsUniLowerRange[] = {
- 0x0380, 0x03ab, UniCaseRangeL0380,
- 0x0400, 0x042f, UniCaseRangeL0400,
- 0x0490, 0x04cb, UniCaseRangeL0490,
- 0x1e00, 0x1ff7, UniCaseRangeL1e00,
- 0xff20, 0xff3a, UniCaseRangeLff20,
- 0, 0, 0
+const struct UniCaseRange CifsUniLowerRange[] = {
+ {0x0380, 0x03ab, UniCaseRangeL0380},
+ {0x0400, 0x042f, UniCaseRangeL0400},
+ {0x0490, 0x04cb, UniCaseRangeL0490},
+ {0x1e00, 0x1ff7, UniCaseRangeL1e00},
+ {0xff20, 0xff3a, UniCaseRangeLff20},
+ {0}
};
#endif
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 847628dfdc44..709f2296bdb4 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -27,6 +27,7 @@
#include "md5.h"
#include "cifs_unicode.h"
#include "cifsproto.h"
+#include "ntlmssp.h"
#include <linux/ctype.h>
#include <linux/random.h>
@@ -42,21 +43,43 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
- const struct mac_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
+ int rc;
- if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
+ if (cifs_pdu == NULL || server == NULL || signature == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
- cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+ if (!server->ntlmssp.sdescmd5) {
+ cERROR(1,
+ "cifs_calculate_signature: can't generate signature\n");
+ return -1;
+ }
- cifs_MD5_final(signature, &context);
- return 0;
+ rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "cifs_calculate_signature: oould not init md5\n");
+ return rc;
+ }
+
+ if (server->secType == RawNTLMSSP)
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ server->session_key.data.ntlmv2.key,
+ CIFS_NTLMV2_SESSKEY_SIZE);
+ else
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ (char *)&server->session_key.data,
+ server->session_key.len);
+
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+
+ rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+
+ return rc;
}
+
int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -78,8 +101,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key,
- smb_signature);
+ rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -89,21 +111,39 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
}
static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
- const struct mac_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
int i;
+ int rc;
- if ((iov == NULL) || (signature == NULL) || (key == NULL))
+ if (iov == NULL || server == NULL || signature == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
+ if (!server->ntlmssp.sdescmd5) {
+ cERROR(1, "cifs_calc_signature2: can't generate signature\n");
+ return -1;
+ }
+
+ rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "cifs_calc_signature2: oould not init md5\n");
+ return rc;
+ }
+
+ if (server->secType == RawNTLMSSP)
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ server->session_key.data.ntlmv2.key,
+ CIFS_NTLMV2_SESSKEY_SIZE);
+ else
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ (char *)&server->session_key.data,
+ server->session_key.len);
+
for (i = 0; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
if (iov[i].iov_base == NULL) {
- cERROR(1, "null iovec entry");
+ cERROR(1, "cifs_calc_signature2: null iovec entry");
return -EIO;
}
/* The first entry includes a length field (which does not get
@@ -111,18 +151,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
if (i == 0) {
if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
break; /* nothing to sign or corrupt header */
- cifs_MD5_update(&context, iov[0].iov_base+4,
- iov[0].iov_len-4);
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ iov[i].iov_base + 4, iov[i].iov_len - 4);
} else
- cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
+ crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+ iov[i].iov_base, iov[i].iov_len);
}
- cifs_MD5_final(signature, &context);
+ rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
- return 0;
+ return rc;
}
-
int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -145,8 +185,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key,
- smb_signature);
+ rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -156,14 +195,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
}
int cifs_verify_signature(struct smb_hdr *cifs_pdu,
- const struct mac_key *mac_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number)
{
- unsigned int rc;
+ int rc;
char server_response_sig[8];
char what_we_think_sig_should_be[20];
- if ((cifs_pdu == NULL) || (mac_key == NULL))
+ if (cifs_pdu == NULL || server == NULL)
return -EINVAL;
if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -192,7 +231,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
cpu_to_le32(expected_sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
- rc = cifs_calculate_signature(cifs_pdu, mac_key,
+ rc = cifs_calculate_signature(cifs_pdu, server,
what_we_think_sig_should_be);
if (rc)
@@ -209,7 +248,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
}
/* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
+int cifs_calculate_session_key(struct session_key *key, const char *rn,
const char *password)
{
char temp_key[16];
@@ -223,63 +262,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
return 0;
}
-int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
- const struct nls_table *nls_info)
-{
- char temp_hash[16];
- struct HMACMD5Context ctx;
- char *ucase_buf;
- __le16 *unicode_buf;
- unsigned int i, user_name_len, dom_name_len;
-
- if (ses == NULL)
- return -EINVAL;
-
- E_md4hash(ses->password, temp_hash);
-
- hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
- user_name_len = strlen(ses->userName);
- if (user_name_len > MAX_USERNAME_SIZE)
- return -EINVAL;
- if (ses->domainName == NULL)
- return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
- dom_name_len = strlen(ses->domainName);
- if (dom_name_len > MAX_USERNAME_SIZE)
- return -EINVAL;
-
- ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
- if (ucase_buf == NULL)
- return -ENOMEM;
- unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
- if (unicode_buf == NULL) {
- kfree(ucase_buf);
- return -ENOMEM;
- }
-
- for (i = 0; i < user_name_len; i++)
- ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
- ucase_buf[i] = 0;
- user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
- MAX_USERNAME_SIZE*2, nls_info);
- unicode_buf[user_name_len] = 0;
- user_name_len++;
-
- for (i = 0; i < dom_name_len; i++)
- ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
- ucase_buf[i] = 0;
- dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
- MAX_USERNAME_SIZE*2, nls_info);
-
- unicode_buf[user_name_len + dom_name_len] = 0;
- hmac_md5_update((const unsigned char *) unicode_buf,
- (user_name_len+dom_name_len)*2, &ctx);
-
- hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
- kfree(ucase_buf);
- kfree(unicode_buf);
- return 0;
-}
-
#ifdef CONFIG_CIFS_WEAK_PW_HASH
void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
char *lnm_session_key)
@@ -324,38 +306,52 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
{
int rc = 0;
int len;
- char nt_hash[16];
- struct HMACMD5Context *pctxt;
+ char nt_hash[CIFS_NTHASH_SIZE];
wchar_t *user;
wchar_t *domain;
+ wchar_t *server;
- pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
-
- if (pctxt == NULL)
- return -ENOMEM;
+ if (!ses->server->ntlmssp.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash);
- /* convert Domainname to unicode and uppercase */
- hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+ crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
+ CIFS_NTHASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+ return rc;
+ }
/* convert ses->userName to unicode and uppercase */
len = strlen(ses->userName);
user = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (user == NULL)
+ if (user == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
+ rc = -ENOMEM;
goto calc_exit_2;
+ }
len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
UniStrupr(user);
- hmac_md5_update((char *)user, 2*len, pctxt);
+
+ crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ (char *)user, 2 * len);
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
len = strlen(ses->domainName);
domain = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (domain == NULL)
+ if (domain == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
+ rc = -ENOMEM;
goto calc_exit_1;
+ }
len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
nls_cp);
/* the following line was removed since it didn't work well
@@ -363,65 +359,292 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
Maybe converting the domain name earlier makes sense */
/* UniStrupr(domain); */
- hmac_md5_update((char *)domain, 2*len, pctxt);
+ crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ (char *)domain, 2 * len);
kfree(domain);
+ } else if (ses->serverName) {
+ len = strlen(ses->serverName);
+
+ server = kmalloc(2 + (len * 2), GFP_KERNEL);
+ if (server == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
+ rc = -ENOMEM;
+ goto calc_exit_1;
+ }
+ len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
+ nls_cp);
+ /* the following line was removed since it didn't work well
+ with lower cased domain name that passed as an option.
+ Maybe converting the domain name earlier makes sense */
+ /* UniStrupr(domain); */
+
+ crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ (char *)server, 2 * len);
+
+ kfree(server);
}
+
+ rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ ses->server->ntlmv2_hash);
+
calc_exit_1:
kfree(user);
calc_exit_2:
/* BB FIXME what about bytes 24 through 40 of the signing key?
compare with the NTLM example */
- hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
- kfree(pctxt);
return rc;
}
-void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
- const struct nls_table *nls_cp)
+static int
+find_domain_name(struct cifsSesInfo *ses)
+{
+ int rc = 0;
+ unsigned int attrsize;
+ unsigned int type;
+ unsigned char *blobptr;
+ struct ntlmssp2_name *attrptr;
+
+ if (ses->server->tiblob) {
+ blobptr = ses->server->tiblob;
+ attrptr = (struct ntlmssp2_name *) blobptr;
+
+ while ((type = attrptr->type) != 0) {
+ blobptr += 2; /* advance attr type */
+ attrsize = attrptr->length;
+ blobptr += 2; /* advance attr size */
+ if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
+ if (!ses->domainName) {
+ ses->domainName =
+ kmalloc(attrptr->length + 1,
+ GFP_KERNEL);
+ if (!ses->domainName)
+ return -ENOMEM;
+ cifs_from_ucs2(ses->domainName,
+ (__le16 *)blobptr,
+ attrptr->length,
+ attrptr->length,
+ load_nls_default(), false);
+ }
+ }
+ blobptr += attrsize; /* advance attr value */
+ attrptr = (struct ntlmssp2_name *) blobptr;
+ }
+ } else {
+ ses->server->tilen = 2 * sizeof(struct ntlmssp2_name);
+ ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL);
+ if (!ses->server->tiblob) {
+ ses->server->tilen = 0;
+ cERROR(1, "Challenge target info allocation failure");
+ return -ENOMEM;
+ }
+ memset(ses->server->tiblob, 0x0, ses->server->tilen);
+ attrptr = (struct ntlmssp2_name *) ses->server->tiblob;
+ attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
+ }
+
+ return rc;
+}
+
+static int
+CalcNTLMv2_response(const struct TCP_Server_Info *server,
+ char *v2_session_response)
{
int rc;
+
+ if (!server->ntlmssp.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
+
+ crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+ return rc;
+ }
+
+ memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
+ server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
+ crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+ v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
+ sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
+
+ if (server->tilen)
+ crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+ server->tiblob, server->tilen);
+
+ rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash,
+ v2_session_response);
+
+ return rc;
+}
+
+int
+setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
- struct HMACMD5Context context;
buf->blob_signature = cpu_to_le32(0x00000101);
buf->reserved = 0;
buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
buf->reserved2 = 0;
- buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
- buf->names[0].length = 0;
- buf->names[1].type = 0;
- buf->names[1].length = 0;
+
+ if (!ses->domainName) {
+ rc = find_domain_name(ses);
+ if (rc) {
+ cERROR(1, "could not get domain/server name rc %d", rc);
+ return rc;
+ }
+ }
/* calculate buf->ntlmv2_hash */
rc = calc_ntlmv2_hash(ses, nls_cp);
- if (rc)
+ if (rc) {
cERROR(1, "could not get v2 hash rc %d", rc);
- CalcNTLMv2_response(ses, resp_buf);
+ return rc;
+ }
+ rc = CalcNTLMv2_response(ses->server, resp_buf);
+ if (rc) {
+ cERROR(1, "could not get v2 hash rc %d", rc);
+ return rc;
+ }
- /* now calculate the MAC key for NTLMv2 */
- hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
- hmac_md5_update(resp_buf, 16, &context);
- hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context);
+ if (!ses->server->ntlmssp.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
- memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf,
- sizeof(struct ntlmv2_resp));
- ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp);
+ crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
+ ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n");
+ return rc;
+ }
+
+ crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
+ ses->server->session_key.data.ntlmv2.key);
+
+ memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
+ sizeof(struct ntlmv2_resp));
+ ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp);
+
+ return rc;
}
-void CalcNTLMv2_response(const struct cifsSesInfo *ses,
- char *v2_session_response)
+int
+calc_seckey(struct TCP_Server_Info *server)
{
- struct HMACMD5Context context;
- /* rest of v2 struct already generated */
- memcpy(v2_session_response + 8, ses->server->cryptKey, 8);
- hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
+ int rc;
+ unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE];
+ struct crypto_blkcipher *tfm_arc4;
+ struct scatterlist sgin, sgout;
+ struct blkcipher_desc desc;
+
+ get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
+
+ tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)",
+ 0, CRYPTO_ALG_ASYNC);
+ if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+ cERROR(1, "could not allocate " "master crypto API arc4\n");
+ return 1;
+ }
+
+ desc.tfm = tfm_arc4;
+
+ crypto_blkcipher_setkey(tfm_arc4,
+ server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
+ sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
+ sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
+ rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
- hmac_md5_update(v2_session_response+8,
- sizeof(struct ntlmv2_resp) - 8, &context);
+ if (!rc)
+ memcpy(server->session_key.data.ntlmv2.key,
+ sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
+
+ crypto_free_blkcipher(tfm_arc4);
+
+ return 0;
+}
- hmac_md5_final(v2_session_response, &context);
-/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
+void
+cifs_crypto_shash_release(struct TCP_Server_Info *server)
+{
+ if (server->ntlmssp.md5)
+ crypto_free_shash(server->ntlmssp.md5);
+
+ if (server->ntlmssp.hmacmd5)
+ crypto_free_shash(server->ntlmssp.hmacmd5);
+
+ kfree(server->ntlmssp.sdeschmacmd5);
+
+ kfree(server->ntlmssp.sdescmd5);
+}
+
+int
+cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+ int rc;
+ unsigned int size;
+
+ server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
+ if (!server->ntlmssp.hmacmd5 ||
+ IS_ERR(server->ntlmssp.hmacmd5)) {
+ cERROR(1, "could not allocate crypto hmacmd5\n");
+ return 1;
+ }
+
+ server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
+ if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
+ cERROR(1, "could not allocate crypto md5\n");
+ rc = 1;
+ goto cifs_crypto_shash_allocate_ret1;
+ }
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->ntlmssp.hmacmd5);
+ server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->ntlmssp.sdeschmacmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
+ rc = -ENOMEM;
+ goto cifs_crypto_shash_allocate_ret2;
+ }
+ server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5;
+ server->ntlmssp.sdeschmacmd5->shash.flags = 0x0;
+
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->ntlmssp.md5);
+ server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->ntlmssp.sdescmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
+ rc = -ENOMEM;
+ goto cifs_crypto_shash_allocate_ret3;
+ }
+ server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5;
+ server->ntlmssp.sdescmd5->shash.flags = 0x0;
+
+ return 0;
+
+cifs_crypto_shash_allocate_ret3:
+ kfree(server->ntlmssp.sdeschmacmd5);
+
+cifs_crypto_shash_allocate_ret2:
+ crypto_free_shash(server->ntlmssp.md5);
+
+cifs_crypto_shash_allocate_ret1:
+ crypto_free_shash(server->ntlmssp.hmacmd5);
+
+ return rc;
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0cdfb8c32ac6..c9d0cfc086eb 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+
/*
* The sizes of various internal tables and strings
*/
@@ -97,7 +100,7 @@ enum protocolEnum {
/* Netbios frames protocol not supported at this time */
};
-struct mac_key {
+struct session_key {
unsigned int len;
union {
char ntlm[CIFS_SESS_KEY_SIZE + 16];
@@ -120,6 +123,21 @@ struct cifs_cred {
struct cifs_ace *aces;
};
+struct sdesc {
+ struct shash_desc shash;
+ char ctx[];
+};
+
+struct ntlmssp_auth {
+ __u32 client_flags;
+ __u32 server_flags;
+ unsigned char ciphertext[CIFS_CPHTXT_SIZE];
+ struct crypto_shash *hmacmd5;
+ struct crypto_shash *md5;
+ struct sdesc *sdeschmacmd5;
+ struct sdesc *sdescmd5;
+};
+
/*
*****************************************************************
* Except the CIFS PDUs themselves all the
@@ -182,11 +200,14 @@ struct TCP_Server_Info {
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* needed for CIFS PDU signature */
- struct mac_key mac_signing_key;
+ struct session_key session_key;
char ntlmv2_hash[16];
unsigned long lstrp; /* when we got last response from this server */
u16 dialect; /* dialect index that server chose */
/* extended security flavors that server supports */
+ unsigned int tilen; /* length of the target info blob */
+ unsigned char *tiblob; /* target info blob in challenge response */
+ struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool sec_kerberosu2u; /* supports U2U Kerberos */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 14d036d8db11..320e0fd0ba7b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -134,6 +134,12 @@
* Size of the session key (crypto key encrypted with the password
*/
#define CIFS_SESS_KEY_SIZE (24)
+#define CIFS_CLIENT_CHALLENGE_SIZE (8)
+#define CIFS_SERVER_CHALLENGE_SIZE (8)
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+#define CIFS_NTLMV2_SESSKEY_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
/*
* Maximum user name length
@@ -663,7 +669,6 @@ struct ntlmv2_resp {
__le64 time;
__u64 client_chal; /* random */
__u32 reserved2;
- struct ntlmssp2_name names[2];
/* array of name entries could follow ending in minimum 4 byte struct */
} __attribute__((packed));
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1f5450814087..1378d9133844 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
__u32 *);
extern int cifs_verify_signature(struct smb_hdr *,
- const struct mac_key *mac_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
+extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
const char *pass);
-extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
- const struct nls_table *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
-extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
+extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
const struct nls_table *);
+extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
+extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern int calc_seckey(struct TCP_Server_Info *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern void calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c65c3419dd37..4bda920d1f75 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -604,11 +604,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
else
rc = -EINVAL;
- if (server->sec_kerberos || server->sec_mskerberos)
- server->secType = Kerberos;
- else if (server->sec_ntlmssp)
- server->secType = RawNTLMSSP;
- else
+ if (server->secType == Kerberos) {
+ if (!server->sec_kerberos &&
+ !server->sec_mskerberos)
+ rc = -EOPNOTSUPP;
+ } else if (server->secType == RawNTLMSSP) {
+ if (!server->sec_ntlmssp)
+ rc = -EOPNOTSUPP;
+ } else
rc = -EOPNOTSUPP;
}
} else
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 95c2ea67edfb..ec0ea4a43bdb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1673,7 +1673,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
MAX_USERNAME_SIZE))
continue;
if (strlen(vol->username) != 0 &&
- strncmp(ses->password, vol->password,
+ ses->password != NULL &&
+ strncmp(ses->password,
+ vol->password ? vol->password : "",
MAX_PASSWORD_SIZE))
continue;
}
@@ -1706,6 +1708,7 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
CIFSSMBLogoff(xid, ses);
_FreeXid(xid);
}
+ cifs_crypto_shash_release(server);
sesInfoFree(ses);
cifs_put_tcp_session(server);
}
@@ -1785,13 +1788,23 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
ses->linux_uid = volume_info->linux_uid;
ses->overrideSecFlg = volume_info->secFlg;
+ rc = cifs_crypto_shash_allocate(server);
+ if (rc) {
+ cERROR(1, "could not setup hash structures rc %d", rc);
+ goto get_ses_fail;
+ }
+ server->tilen = 0;
+ server->tiblob = NULL;
+
mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(xid, ses);
if (!rc)
rc = cifs_setup_session(xid, ses, volume_info->local_nls);
mutex_unlock(&ses->session_mutex);
- if (rc)
+ if (rc) {
+ cifs_crypto_shash_release(ses->server);
goto get_ses_fail;
+ }
/* success, put it on the list */
write_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 578d88c5b46e..f9ed0751cc12 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -305,8 +305,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
full_path = build_path_from_dentry(direntry);
if (full_path == NULL) {
rc = -ENOMEM;
- FreeXid(xid);
- return rc;
+ goto cifs_create_out;
}
if (oplockEnabled)
@@ -365,9 +364,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
if (buf == NULL) {
- kfree(full_path);
- FreeXid(xid);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto cifs_create_out;
}
/*
@@ -496,6 +494,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
struct cifsTconInfo *pTcon;
char *full_path = NULL;
struct inode *newinode = NULL;
+ int oplock = 0;
+ u16 fileHandle;
+ FILE_ALL_INFO *buf = NULL;
+ unsigned int bytes_written;
+ struct win_dev *pdev;
if (!old_valid_dev(device_number))
return -EINVAL;
@@ -506,9 +509,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
pTcon = cifs_sb->tcon;
full_path = build_path_from_dentry(direntry);
- if (full_path == NULL)
+ if (full_path == NULL) {
rc = -ENOMEM;
- else if (pTcon->unix_ext) {
+ goto mknod_out;
+ }
+
+ if (pTcon->unix_ext) {
struct cifs_unix_set_info_args args = {
.mode = mode & ~current_umask(),
.ctime = NO_CHANGE_64,
@@ -527,87 +533,78 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc)
+ goto mknod_out;
- if (!rc) {
- rc = cifs_get_inode_info_unix(&newinode, full_path,
+ rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
- if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
- else
- direntry->d_op = &cifs_dentry_ops;
- if (rc == 0)
- d_instantiate(direntry, newinode);
- }
- } else {
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
- int oplock = 0;
- u16 fileHandle;
- FILE_ALL_INFO *buf;
+ if (pTcon->nocase)
+ direntry->d_op = &cifs_ci_dentry_ops;
+ else
+ direntry->d_op = &cifs_dentry_ops;
- cFYI(1, "sfu compat create special file");
+ if (rc == 0)
+ d_instantiate(direntry, newinode);
+ goto mknod_out;
+ }
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (buf == NULL) {
- kfree(full_path);
- rc = -ENOMEM;
- FreeXid(xid);
- return rc;
- }
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+ goto mknod_out;
- rc = CIFSSMBOpen(xid, pTcon, full_path,
- FILE_CREATE, /* fail if exists */
- GENERIC_WRITE /* BB would
- WRITE_OWNER | WRITE_DAC be better? */,
- /* Create a file and set the
- file attribute to SYSTEM */
- CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
- &fileHandle, &oplock, buf,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
-
- /* BB FIXME - add handling for backlevel servers
- which need legacy open and check for all
- calls to SMBOpen for fallback to SMBLeagcyOpen */
- if (!rc) {
- /* BB Do not bother to decode buf since no
- local inode yet to put timestamps in,
- but we can reuse it safely */
- unsigned int bytes_written;
- struct win_dev *pdev;
- pdev = (struct win_dev *)buf;
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major =
- cpu_to_le64(MAJOR(device_number));
- pdev->minor =
- cpu_to_le64(MINOR(device_number));
- rc = CIFSSMBWrite(xid, pTcon,
- fileHandle,
- sizeof(struct win_dev),
- 0, &bytes_written, (char *)pdev,
- NULL, 0);
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major =
- cpu_to_le64(MAJOR(device_number));
- pdev->minor =
- cpu_to_le64(MINOR(device_number));
- rc = CIFSSMBWrite(xid, pTcon,
- fileHandle,
- sizeof(struct win_dev),
- 0, &bytes_written, (char *)pdev,
- NULL, 0);
- } /* else if(S_ISFIFO */
- CIFSSMBClose(xid, pTcon, fileHandle);
- d_drop(direntry);
- }
- kfree(buf);
- /* add code here to set EAs */
- }
+
+ cFYI(1, "sfu compat create special file");
+
+ buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+ if (buf == NULL) {
+ kfree(full_path);
+ rc = -ENOMEM;
+ FreeXid(xid);
+ return rc;
}
+ /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */
+ rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE,
+ GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
+ &fileHandle, &oplock, buf, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc)
+ goto mknod_out;
+
+ /* BB Do not bother to decode buf since no local inode yet to put
+ * timestamps in, but we can reuse it safely */
+
+ pdev = (struct win_dev *)buf;
+ if (S_ISCHR(mode)) {
+ memcpy(pdev->type, "IntxCHR", 8);
+ pdev->major =
+ cpu_to_le64(MAJOR(device_number));
+ pdev->minor =
+ cpu_to_le64(MINOR(device_number));
+ rc = CIFSSMBWrite(xid, pTcon,
+ fileHandle,
+ sizeof(struct win_dev),
+ 0, &bytes_written, (char *)pdev,
+ NULL, 0);
+ } else if (S_ISBLK(mode)) {
+ memcpy(pdev->type, "IntxBLK", 8);
+ pdev->major =
+ cpu_to_le64(MAJOR(device_number));
+ pdev->minor =
+ cpu_to_le64(MINOR(device_number));
+ rc = CIFSSMBWrite(xid, pTcon,
+ fileHandle,
+ sizeof(struct win_dev),
+ 0, &bytes_written, (char *)pdev,
+ NULL, 0);
+ } /* else if (S_ISFIFO) */
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ d_drop(direntry);
+
+ /* FIXME: add code here to set EAs */
+
+mknod_out:
kfree(full_path);
+ kfree(buf);
FreeXid(xid);
return rc;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index db11fdef0e92..de748c652d11 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -242,8 +242,7 @@ int cifs_open(struct inode *inode, struct file *file)
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
rc = -ENOMEM;
- FreeXid(xid);
- return rc;
+ goto out;
}
cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4bc47e5b5f29..86a164f08a74 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -834,7 +834,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
xid, NULL);
if (!inode)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(rc);
#ifdef CONFIG_CIFS_FSCACHE
/* populate tcon->resource_id */
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 49c9a4e75319..1db0f0746a5b 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -61,6 +61,19 @@
#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
#define NTLMSSP_NEGOTIATE_56 0x80000000
+/* Define AV Pair Field IDs */
+#define NTLMSSP_AV_EOL 0
+#define NTLMSSP_AV_NB_COMPUTER_NAME 1
+#define NTLMSSP_AV_NB_DOMAIN_NAME 2
+#define NTLMSSP_AV_DNS_COMPUTER_NAME 3
+#define NTLMSSP_AV_DNS_DOMAIN_NAME 4
+#define NTLMSSP_AV_DNS_TREE_NAME 5
+#define NTLMSSP_AV_FLAGS 6
+#define NTLMSSP_AV_TIMESTAMP 7
+#define NTLMSSP_AV_RESTRICTION 8
+#define NTLMSSP_AV_TARGET_NAME 9
+#define NTLMSSP_AV_CHANNEL_BINDINGS 10
+
/* Although typedefs are not commonly used for structure definitions */
/* in the Linux kernel, in this particular case they are useful */
/* to more closely match the standards document for NTLMSSP from */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 0a57cb7db5dd..795095f4eac6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -383,6 +383,9 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
struct cifsSesInfo *ses)
{
+ unsigned int tioffset; /* challeng message target info area */
+ unsigned int tilen; /* challeng message target info area length */
+
CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
@@ -405,6 +408,20 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
+ ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags);
+
+ tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
+ tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
+ ses->server->tilen = tilen;
+ if (tilen) {
+ ses->server->tiblob = kmalloc(tilen, GFP_KERNEL);
+ if (!ses->server->tiblob) {
+ cERROR(1, "Challenge target info allocation failure");
+ return -ENOMEM;
+ }
+ memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen);
+ }
+
return 0;
}
@@ -425,12 +442,13 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
/* BB is NTLMV2 session security format easier to use here? */
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
+ NTLMSSP_NEGOTIATE_NTLM;
if (ses->server->secMode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
- flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
- flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+ (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ flags |= NTLMSSP_NEGOTIATE_SIGN |
+ NTLMSSP_NEGOTIATE_KEY_XCH |
+ NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ }
sec_blob->NegotiateFlags |= cpu_to_le32(flags);
@@ -451,10 +469,12 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
struct cifsSesInfo *ses,
const struct nls_table *nls_cp, bool first)
{
+ int rc;
+ unsigned int size;
AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
__u32 flags;
unsigned char *tmp;
- char ntlm_session_key[CIFS_SESS_KEY_SIZE];
+ struct ntlmv2_resp ntlmv2_response = {};
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -477,19 +497,25 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.Length = 0;
sec_blob->LmChallengeResponse.MaximumLength = 0;
- /* calculate session key, BB what about adding similar ntlmv2 path? */
- SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
- if (first)
- cifs_calculate_mac_key(&ses->server->mac_signing_key,
- ntlm_session_key, ses->password);
-
- memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
- sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
+ if (rc) {
+ cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc);
+ goto setup_ntlmv2_ret;
+ }
+ size = sizeof(struct ntlmv2_resp);
+ memcpy(tmp, (char *)&ntlmv2_response, size);
+ tmp += size;
+ if (ses->server->tilen > 0) {
+ memcpy(tmp, ses->server->tiblob, ses->server->tilen);
+ tmp += ses->server->tilen;
+ } else
+ ses->server->tilen = 0;
- tmp += CIFS_SESS_KEY_SIZE;
+ sec_blob->NtChallengeResponse.Length = cpu_to_le16(size +
+ ses->server->tilen);
+ sec_blob->NtChallengeResponse.MaximumLength =
+ cpu_to_le16(size + ses->server->tilen);
if (ses->domainName == NULL) {
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -501,7 +527,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
MAX_USERNAME_SIZE, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- len += 2; /* trailing null */
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
sec_blob->DomainName.Length = cpu_to_le16(len);
sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
@@ -518,7 +543,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
MAX_USERNAME_SIZE, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- len += 2; /* trailing null */
sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
sec_blob->UserName.Length = cpu_to_le16(len);
sec_blob->UserName.MaximumLength = cpu_to_le16(len);
@@ -530,9 +554,26 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->SessionKey.Length = 0;
- sec_blob->SessionKey.MaximumLength = 0;
+ if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ !calc_seckey(ses->server)) {
+ memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.MaximumLength =
+ cpu_to_le16(CIFS_CPHTXT_SIZE);
+ tmp += CIFS_CPHTXT_SIZE;
+ } else {
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = 0;
+ sec_blob->SessionKey.MaximumLength = 0;
+ }
+
+ ses->server->sequence_number = 0;
+
+setup_ntlmv2_ret:
+ if (ses->server->tilen > 0)
+ kfree(ses->server->tiblob);
+
return tmp - pbuffer;
}
@@ -546,15 +587,14 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
return;
}
-static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
+static int setup_ntlmssp_auth_req(char *ntlmsspblob,
struct cifsSesInfo *ses,
const struct nls_table *nls, bool first_time)
{
int bloblen;
- bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
+ bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls,
first_time);
- pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
return bloblen;
}
@@ -690,7 +730,7 @@ ssetup_ntlmssp_authenticate:
if (first_time) /* should this be moved into common code
with similar ntlmv2 path? */
- cifs_calculate_mac_key(&ses->server->mac_signing_key,
+ cifs_calculate_session_key(&ses->server->session_key,
ntlm_session_key, ses->password);
/* copy session key */
@@ -729,12 +769,21 @@ ssetup_ntlmssp_authenticate:
cpu_to_le16(sizeof(struct ntlmv2_resp));
/* calculate session key */
- setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+ rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+ if (rc) {
+ kfree(v2_sess_key);
+ goto ssetup_exit;
+ }
/* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
kfree(v2_sess_key);
+ if (ses->server->tilen > 0) {
+ memcpy(bcc_ptr, ses->server->tiblob,
+ ses->server->tilen);
+ bcc_ptr += ses->server->tilen;
+ }
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {
*bcc_ptr = 0;
@@ -765,15 +814,15 @@ ssetup_ntlmssp_authenticate:
}
/* bail out if key is too long */
if (msg->sesskey_len >
- sizeof(ses->server->mac_signing_key.data.krb5)) {
+ sizeof(ses->server->session_key.data.krb5)) {
cERROR(1, "Kerberos signing key too long (%u bytes)",
msg->sesskey_len);
rc = -EOVERFLOW;
goto ssetup_exit;
}
if (first_time) {
- ses->server->mac_signing_key.len = msg->sesskey_len;
- memcpy(ses->server->mac_signing_key.data.krb5,
+ ses->server->session_key.len = msg->sesskey_len;
+ memcpy(ses->server->session_key.data.krb5,
msg->data, msg->sesskey_len);
}
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -815,12 +864,28 @@ ssetup_ntlmssp_authenticate:
if (phase == NtLmNegotiate) {
setup_ntlmssp_neg_req(pSMB, ses);
iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
+ iov[1].iov_base = &pSMB->req.SecurityBlob[0];
} else if (phase == NtLmAuthenticate) {
int blob_len;
- blob_len = setup_ntlmssp_auth_req(pSMB, ses,
- nls_cp,
- first_time);
+ char *ntlmsspblob;
+
+ ntlmsspblob = kmalloc(5 *
+ sizeof(struct _AUTHENTICATE_MESSAGE),
+ GFP_KERNEL);
+ if (!ntlmsspblob) {
+ cERROR(1, "Can't allocate NTLMSSP");
+ rc = -ENOMEM;
+ goto ssetup_exit;
+ }
+
+ blob_len = setup_ntlmssp_auth_req(ntlmsspblob,
+ ses,
+ nls_cp,
+ first_time);
iov[1].iov_len = blob_len;
+ iov[1].iov_base = ntlmsspblob;
+ pSMB->req.SecurityBlobLength =
+ cpu_to_le16(blob_len);
/* Make sure that we tell the server that we
are using the uid that it just gave us back
on the response (challenge) */
@@ -830,7 +895,6 @@ ssetup_ntlmssp_authenticate:
rc = -ENOSYS;
goto ssetup_exit;
}
- iov[1].iov_base = &pSMB->req.SecurityBlob[0];
/* unicode strings must be word aligned */
if ((iov[0].iov_len + iov[1].iov_len) % 2) {
*bcc_ptr = 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 82f78c4d6978..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(midQ->resp_buf,
- &ses->server->mac_signing_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->mac_signing_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->mac_signing_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e7357104cfd..3dfef0623968 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2450,14 +2450,13 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
{
- u32 op_share_access, new_access;
+ u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK;
+ bool new_access;
__be32 status;
- set_access(&new_access, stp->st_access_bmap);
- new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK;
-
+ new_access = !test_bit(op_share_access, &stp->st_access_bmap);
if (new_access) {
- status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access);
+ status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access);
if (status)
return status;
}
@@ -2470,7 +2469,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
return status;
}
/* remember the open */
- op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK;
__set_bit(op_share_access, &stp->st_access_bmap);
__set_bit(open->op_share_deny, &stp->st_deny_bmap);
@@ -2983,7 +2981,6 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
*filpp = find_readable_file(stp->st_file);
else
*filpp = find_writeable_file(stp->st_file);
- BUG_ON(!*filpp); /* assured by check_openmode */
}
}
status = nfs_ok;
@@ -3561,7 +3558,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_stateowner *open_sop = NULL;
struct nfs4_stateowner *lock_sop = NULL;
struct nfs4_stateid *lock_stp;
- struct file *filp;
+ struct nfs4_file *fp;
+ struct file *filp = NULL;
struct file_lock file_lock;
struct file_lock conflock;
__be32 status = 0;
@@ -3591,7 +3589,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* lock stateid.
*/
struct nfs4_stateid *open_stp = NULL;
- struct nfs4_file *fp;
status = nfserr_stale_clientid;
if (!nfsd4_has_session(cstate) &&
@@ -3634,6 +3631,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
lock_sop = lock->lk_replay_owner;
+ fp = lock_stp->st_file;
}
/* lock->lk_replay_owner and lock_stp have been created or found */
@@ -3648,13 +3646,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- filp = find_readable_file(lock_stp->st_file);
+ if (find_readable_file(lock_stp->st_file)) {
+ nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ);
+ filp = find_readable_file(lock_stp->st_file);
+ }
file_lock.fl_type = F_RDLCK;
cmd = F_SETLK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- filp = find_writeable_file(lock_stp->st_file);
+ if (find_writeable_file(lock_stp->st_file)) {
+ nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE);
+ filp = find_writeable_file(lock_stp->st_file);
+ }
file_lock.fl_type = F_WRLCK;
cmd = F_SETLK;
break;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 7731a75971dd..322518c88e4b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -363,23 +363,23 @@ struct nfs4_file {
* at all? */
static inline struct file *find_writeable_file(struct nfs4_file *f)
{
- if (f->fi_fds[O_RDWR])
- return f->fi_fds[O_RDWR];
- return f->fi_fds[O_WRONLY];
+ if (f->fi_fds[O_WRONLY])
+ return f->fi_fds[O_WRONLY];
+ return f->fi_fds[O_RDWR];
}
static inline struct file *find_readable_file(struct nfs4_file *f)
{
- if (f->fi_fds[O_RDWR])
- return f->fi_fds[O_RDWR];
- return f->fi_fds[O_RDONLY];
+ if (f->fi_fds[O_RDONLY])
+ return f->fi_fds[O_RDONLY];
+ return f->fi_fds[O_RDWR];
}
static inline struct file *find_any_file(struct nfs4_file *f)
{
if (f->fi_fds[O_RDWR])
return f->fi_fds[O_RDWR];
- else if (f->fi_fds[O_RDWR])
+ else if (f->fi_fds[O_WRONLY])
return f->fi_fds[O_WRONLY];
else
return f->fi_fds[O_RDONLY];
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 96360a83cb91..661a6cf8e826 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2033,15 +2033,17 @@ out:
__be32
nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
{
- struct path path = {
- .mnt = fhp->fh_export->ex_path.mnt,
- .dentry = fhp->fh_dentry,
- };
__be32 err;
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
- if (!err && vfs_statfs(&path, stat))
- err = nfserr_io;
+ if (!err) {
+ struct path path = {
+ .mnt = fhp->fh_export->ex_path.mnt,
+ .dentry = fhp->fh_dentry,
+ };
+ if (vfs_statfs(&path, stat))
+ err = nfserr_io;
+ }
return err;
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 15412fe15c3a..b552f816de15 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -852,8 +852,8 @@ xfs_convert_page(
SetPageUptodate(page);
if (count) {
- wbc->nr_to_write--;
- if (wbc->nr_to_write <= 0)
+ if (--wbc->nr_to_write <= 0 &&
+ wbc->sync_mode == WB_SYNC_NONE)
done = 1;
}
xfs_start_page_writeback(page, !page_dirty, count);
@@ -1068,7 +1068,7 @@ xfs_vm_writepage(
* by themselves.
*/
if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
- goto out_fail;
+ goto redirty;
/*
* We need a transaction if there are delalloc or unwritten buffers
@@ -1080,7 +1080,7 @@ xfs_vm_writepage(
*/
xfs_count_page_state(page, &delalloc, &unwritten);
if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
- goto out_fail;
+ goto redirty;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
@@ -1245,12 +1245,15 @@ error:
if (iohead)
xfs_cancel_ioend(iohead);
+ if (err == -EAGAIN)
+ goto redirty;
+
xfs_aops_discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
return err;
-out_fail:
+redirty:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 15c35b62ff14..a4e07974955b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1226,6 +1226,7 @@ xfs_fs_statfs(
struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
+ __int64_t ffree;
statp->f_type = XFS_SB_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1249,7 +1250,11 @@ xfs_fs_statfs(
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
mp->m_maxicount);
- statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+
+ /* make sure statp->f_ffree does not underflow */
+ ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+ statp->f_ffree = max_t(__int64_t, ffree, 0);
+
spin_unlock(&mp->m_sb_lock);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1402,7 +1407,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp);
+ return -xfs_fs_log_dummy(mp, SYNC_WAIT);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index dfcbd98d1599..d59c4a65d492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -34,6 +34,7 @@
#include "xfs_inode_item.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
+#include "xfs_fsops.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -341,38 +342,6 @@ xfs_sync_attr(
}
STATIC int
-xfs_commit_dummy_trans(
- struct xfs_mount *mp,
- uint flags)
-{
- struct xfs_inode *ip = mp->m_rootip;
- struct xfs_trans *tp;
- int error;
-
- /*
- * Put a dummy transaction in the log to tell recovery
- * that all others are OK.
- */
- tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
- error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- /* the log force ensures this transaction is pushed to disk */
- xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
- return error;
-}
-
-STATIC int
xfs_sync_fsdata(
struct xfs_mount *mp)
{
@@ -432,7 +401,7 @@ xfs_quiesce_data(
/* mark the log as covered if needed */
if (xfs_log_need_covered(mp))
- error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
+ error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
/* flush data-only devices */
if (mp->m_rtdev_targp)
@@ -563,7 +532,7 @@ xfs_flush_inodes(
/*
* Every sync period we need to unpin all items, reclaim inodes and sync
* disk quotas. We might need to cover the log to indicate that the
- * filesystem is idle.
+ * filesystem is idle and not frozen.
*/
STATIC void
xfs_sync_worker(
@@ -577,8 +546,9 @@ xfs_sync_worker(
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
- if (xfs_log_need_covered(mp))
- error = xfs_commit_dummy_trans(mp, 0);
+ if (mp->m_super->s_frozen == SB_UNFROZEN &&
+ xfs_log_need_covered(mp))
+ error = xfs_fs_log_dummy(mp, 0);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index dbca5f5c37ba..43b1d5699335 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -604,31 +604,36 @@ out:
return 0;
}
+/*
+ * Dump a transaction into the log that contains no real change. This is needed
+ * to be able to make the log dirty or stamp the current tail LSN into the log
+ * during the covering operation.
+ *
+ * We cannot use an inode here for this - that will push dirty state back up
+ * into the VFS and then periodic inode flushing will prevent log covering from
+ * making progress. Hence we log a field in the superblock instead.
+ */
int
xfs_fs_log_dummy(
- xfs_mount_t *mp)
+ xfs_mount_t *mp,
+ int flags)
{
xfs_trans_t *tp;
- xfs_inode_t *ip;
int error;
tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
- error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
- ip = mp->m_rootip;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
+ /* log the UUID because it is an unchanging field */
+ xfs_mod_sb(tp, XFS_SB_UUID);
+ if (flags & SYNC_WAIT)
+ xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp, 0);
}
int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 88435e0a77c9..a786c5212c1e 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(xfs_mount_t *mp);
+extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index abf80ae1e95b..5371d2dc360e 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1213,7 +1213,6 @@ xfs_imap_lookup(
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
- xfs_agino_t startino;
int error;
int i;
@@ -1227,13 +1226,13 @@ xfs_imap_lookup(
}
/*
- * derive and lookup the exact inode record for the given agino. If the
- * record cannot be found, then it's an invalid inode number and we
- * should abort.
+ * Lookup the inode record for the given agino. If the record cannot be
+ * found, then it's an invalid inode number and we should abort. Once
+ * we have a record, we need to ensure it contains the inode number
+ * we are looking up.
*/
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
- startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
- error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1246,6 +1245,11 @@ xfs_imap_lookup(
if (error)
return error;
+ /* check that the returned record contains the required inode */
+ if (rec.ir_startino > agino ||
+ rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
+ return EINVAL;
+
/* for untrusted inodes check it is allocated first */
if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 68415cb4f23c..34798f391c49 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
return 0;
}
+/*
+ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * inodes that are in memory - they all must be marked stale and attached to
+ * the cluster buffer.
+ */
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
}
for (j = 0; j < nbufs; j++, inum += ninodes) {
- int found = 0;
-
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
/*
* Walk the inodes already attached to the buffer and mark them
* stale. These will all have the flush locks held, so an
- * in-memory inode walk can't lock them.
+ * in-memory inode walk can't lock them. By marking them all
+ * stale first, we will not attempt to lock them in the loop
+ * below as the XFS_ISTALE flag will be set.
*/
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
while (lip) {
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
&iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- found++;
}
lip = lip->li_bio_list;
}
+
/*
* For each inode in memory attempt to add it to the inode
* buffer and set it up for being staled on buffer IO
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
* even trying to lock them.
*/
for (i = 0; i < ninodes; i++) {
+retry:
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
continue;
}
- /* don't try to lock/unlock the current inode */
+ /*
+ * Don't try to lock/unlock the current inode, but we
+ * _cannot_ skip the other inodes that we did not find
+ * in the list attached to the buffer and are not
+ * already marked stale. If we can't lock it, back off
+ * and retry.
+ */
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
- continue;
+ delay(1);
+ goto retry;
}
read_unlock(&pag->pag_ici_lock);
- if (!xfs_iflock_nowait(ip)) {
- if (ip != free_ip)
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
-
+ xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- ASSERT(ip != free_ip);
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
+ /*
+ * we don't need to attach clean inodes or those only
+ * with unlogged changes (which we throw away, anyway).
+ */
iip = ip->i_itemp;
- if (!iip) {
- /* inode with unlogged changes only */
+ if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
- found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (found)
- xfs_trans_stale_inode_buf(tp, bp);
+ xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 925d572bf0f4..33f718f92a48 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3015,7 +3015,8 @@ _xfs_log_force(
XFS_STATS_INC(xs_log_force);
- xlog_cil_push(log, 1);
+ if (log->l_cilp)
+ xlog_cil_force(log);
spin_lock(&log->l_icloglock);
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
XFS_STATS_INC(xs_log_force);
if (log->l_cilp) {
- lsn = xlog_cil_push_lsn(log, lsn);
+ lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
}
@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
* call below.
*/
if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
- xlog_cil_push(log, 1);
+ xlog_cil_force(log);
/*
* We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 31e4ea2d19ac..ed575fb4b495 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
ctx->sequence = 1;
ctx->cil = cil;
cil->xc_ctx = ctx;
+ cil->xc_current_sequence = ctx->sequence;
cil->xc_log = log;
log->l_cilp = cil;
@@ -269,15 +270,10 @@ xlog_cil_insert(
static void
xlog_cil_format_items(
struct log *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket,
- xfs_lsn_t *start_lsn)
+ struct xfs_log_vec *log_vector)
{
struct xfs_log_vec *lv;
- if (start_lsn)
- *start_lsn = log->l_cilp->xc_ctx->sequence;
-
ASSERT(log_vector);
for (lv = log_vector; lv; lv = lv->lv_next) {
void *ptr;
@@ -301,9 +297,24 @@ xlog_cil_format_items(
ptr += vec->i_len;
}
ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
+ }
+}
+
+static void
+xlog_cil_insert_items(
+ struct log *log,
+ struct xfs_log_vec *log_vector,
+ struct xlog_ticket *ticket,
+ xfs_lsn_t *start_lsn)
+{
+ struct xfs_log_vec *lv;
+
+ if (start_lsn)
+ *start_lsn = log->l_cilp->xc_ctx->sequence;
+ ASSERT(log_vector);
+ for (lv = log_vector; lv; lv = lv->lv_next)
xlog_cil_insert(log, ticket, lv->lv_item, lv);
- }
}
static void
@@ -321,80 +332,6 @@ xlog_cil_free_logvec(
}
/*
- * Commit a transaction with the given vector to the Committed Item List.
- *
- * To do this, we need to format the item, pin it in memory if required and
- * account for the space used by the transaction. Once we have done that we
- * need to release the unused reservation for the transaction, attach the
- * transaction to the checkpoint context so we carry the busy extents through
- * to checkpoint completion, and then unlock all the items in the transaction.
- *
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
- * Called with the context lock already held in read mode to lock out
- * background commit, returns without it held once background commits are
- * allowed again.
- */
-int
-xfs_log_commit_cil(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_log_vec *log_vector,
- xfs_lsn_t *commit_lsn,
- int flags)
-{
- struct log *log = mp->m_log;
- int log_flags = 0;
- int push = 0;
-
- if (flags & XFS_TRANS_RELEASE_LOG_RES)
- log_flags = XFS_LOG_REL_PERM_RESERV;
-
- if (XLOG_FORCED_SHUTDOWN(log)) {
- xlog_cil_free_logvec(log_vector);
- return XFS_ERROR(EIO);
- }
-
- /* lock out background commit */
- down_read(&log->l_cilp->xc_ctx_lock);
- xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
-
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(log->l_mp, tp->t_ticket);
-
- /* attach the transaction to the CIL if it has any busy extents */
- if (!list_empty(&tp->t_busy)) {
- spin_lock(&log->l_cilp->xc_cil_lock);
- list_splice_init(&tp->t_busy,
- &log->l_cilp->xc_ctx->busy_extents);
- spin_unlock(&log->l_cilp->xc_cil_lock);
- }
-
- tp->t_commit_lsn = *commit_lsn;
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
- xfs_trans_unreserve_and_mod_sb(tp);
-
- /* check for background commit before unlock */
- if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
- push = 1;
- up_read(&log->l_cilp->xc_ctx_lock);
-
- /*
- * We need to push CIL every so often so we don't cache more than we
- * can fit in the log. The limit really is that a checkpoint can't be
- * more than half the log (the current checkpoint is not allowed to
- * overwrite the previous checkpoint), but commit latency and memory
- * usage limit this to a smaller size in most cases.
- */
- if (push)
- xlog_cil_push(log, 0);
- return 0;
-}
-
-/*
* Mark all items committed and clear busy extents. We free the log vector
* chains in a separate pass so that we unpin the log items as quickly as
* possible.
@@ -427,13 +364,23 @@ xlog_cil_committed(
}
/*
- * Push the Committed Item List to the log. If the push_now flag is not set,
- * then it is a background flush and so we can chose to ignore it.
+ * Push the Committed Item List to the log. If @push_seq flag is zero, then it
+ * is a background flush and so we can chose to ignore it. Otherwise, if the
+ * current sequence is the same as @push_seq we need to do a flush. If
+ * @push_seq is less than the current sequence, then it has already been
+ * flushed and we don't need to do anything - the caller will wait for it to
+ * complete if necessary.
+ *
+ * @push_seq is a value rather than a flag because that allows us to do an
+ * unlocked check of the sequence number for a match. Hence we can allows log
+ * forces to run racily and not issue pushes for the same sequence twice. If we
+ * get a race between multiple pushes for the same sequence they will block on
+ * the first one and then abort, hence avoiding needless pushes.
*/
-int
+STATIC int
xlog_cil_push(
struct log *log,
- int push_now)
+ xfs_lsn_t push_seq)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_log_vec *lv;
@@ -453,12 +400,14 @@ xlog_cil_push(
if (!cil)
return 0;
+ ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
+
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
/* lock out transaction commit, but don't block on background push */
if (!down_write_trylock(&cil->xc_ctx_lock)) {
- if (!push_now)
+ if (!push_seq)
goto out_free_ticket;
down_write(&cil->xc_ctx_lock);
}
@@ -469,7 +418,11 @@ xlog_cil_push(
goto out_skip;
/* check for spurious background flush */
- if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ goto out_skip;
+
+ /* check for a previously pushed seqeunce */
+ if (push_seq < cil->xc_ctx->sequence)
goto out_skip;
/*
@@ -515,6 +468,13 @@ xlog_cil_push(
cil->xc_ctx = new_ctx;
/*
+ * mirror the new sequence into the cil structure so that we can do
+ * unlocked checks against the current sequence in log forces without
+ * risking deferencing a freed context pointer.
+ */
+ cil->xc_current_sequence = new_ctx->sequence;
+
+ /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -626,6 +586,102 @@ out_abort:
}
/*
+ * Commit a transaction with the given vector to the Committed Item List.
+ *
+ * To do this, we need to format the item, pin it in memory if required and
+ * account for the space used by the transaction. Once we have done that we
+ * need to release the unused reservation for the transaction, attach the
+ * transaction to the checkpoint context so we carry the busy extents through
+ * to checkpoint completion, and then unlock all the items in the transaction.
+ *
+ * For more specific information about the order of operations in
+ * xfs_log_commit_cil() please refer to the comments in
+ * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+int
+xfs_log_commit_cil(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_log_vec *log_vector,
+ xfs_lsn_t *commit_lsn,
+ int flags)
+{
+ struct log *log = mp->m_log;
+ int log_flags = 0;
+ int push = 0;
+
+ if (flags & XFS_TRANS_RELEASE_LOG_RES)
+ log_flags = XFS_LOG_REL_PERM_RESERV;
+
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ xlog_cil_free_logvec(log_vector);
+ return XFS_ERROR(EIO);
+ }
+
+ /*
+ * do all the hard work of formatting items (including memory
+ * allocation) outside the CIL context lock. This prevents stalling CIL
+ * pushes when we are low on memory and a transaction commit spends a
+ * lot of time in memory reclaim.
+ */
+ xlog_cil_format_items(log, log_vector);
+
+ /* lock out background commit */
+ down_read(&log->l_cilp->xc_ctx_lock);
+ xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+
+ /* check we didn't blow the reservation */
+ if (tp->t_ticket->t_curr_res < 0)
+ xlog_print_tic_res(log->l_mp, tp->t_ticket);
+
+ /* attach the transaction to the CIL if it has any busy extents */
+ if (!list_empty(&tp->t_busy)) {
+ spin_lock(&log->l_cilp->xc_cil_lock);
+ list_splice_init(&tp->t_busy,
+ &log->l_cilp->xc_ctx->busy_extents);
+ spin_unlock(&log->l_cilp->xc_cil_lock);
+ }
+
+ tp->t_commit_lsn = *commit_lsn;
+ xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ xfs_trans_unreserve_and_mod_sb(tp);
+
+ /*
+ * Once all the items of the transaction have been copied to the CIL,
+ * the items can be unlocked and freed.
+ *
+ * This needs to be done before we drop the CIL context lock because we
+ * have to update state in the log items and unlock them before they go
+ * to disk. If we don't, then the CIL checkpoint can race with us and
+ * we can run checkpoint completion before we've updated and unlocked
+ * the log items. This affects (at least) processing of stale buffers,
+ * inodes and EFIs.
+ */
+ xfs_trans_free_items(tp, *commit_lsn, 0);
+
+ /* check for background commit before unlock */
+ if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+ push = 1;
+
+ up_read(&log->l_cilp->xc_ctx_lock);
+
+ /*
+ * We need to push CIL every so often so we don't cache more than we
+ * can fit in the log. The limit really is that a checkpoint can't be
+ * more than half the log (the current checkpoint is not allowed to
+ * overwrite the previous checkpoint), but commit latency and memory
+ * usage limit this to a smaller size in most cases.
+ */
+ if (push)
+ xlog_cil_push(log, 0);
+ return 0;
+}
+
+/*
* Conditionally push the CIL based on the sequence passed in.
*
* We only need to push if we haven't already pushed the sequence
@@ -639,39 +695,34 @@ out_abort:
* commit lsn is there. It'll be empty, so this is broken for now.
*/
xfs_lsn_t
-xlog_cil_push_lsn(
+xlog_cil_force_lsn(
struct log *log,
- xfs_lsn_t push_seq)
+ xfs_lsn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx;
xfs_lsn_t commit_lsn = NULLCOMMITLSN;
-restart:
- down_write(&cil->xc_ctx_lock);
- ASSERT(push_seq <= cil->xc_ctx->sequence);
-
- /* check to see if we need to force out the current context */
- if (push_seq == cil->xc_ctx->sequence) {
- up_write(&cil->xc_ctx_lock);
- xlog_cil_push(log, 1);
- goto restart;
- }
+ ASSERT(sequence <= cil->xc_current_sequence);
+
+ /*
+ * check to see if we need to force out the current context.
+ * xlog_cil_push() handles racing pushes for the same sequence,
+ * so no need to deal with it here.
+ */
+ if (sequence == cil->xc_current_sequence)
+ xlog_cil_push(log, sequence);
/*
* See if we can find a previous sequence still committing.
- * We can drop the flush lock as soon as we have the cil lock
- * because we are now only comparing contexts protected by
- * the cil lock.
- *
* We need to wait for all previous sequence commits to complete
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
+restart:
spin_lock(&cil->xc_cil_lock);
- up_write(&cil->xc_ctx_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
- if (ctx->sequence > push_seq)
+ if (ctx->sequence > sequence)
continue;
if (!ctx->commit_lsn) {
/*
@@ -681,7 +732,7 @@ restart:
sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
goto restart;
}
- if (ctx->sequence != push_seq)
+ if (ctx->sequence != sequence)
continue;
/* found it! */
commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8c072618965c..ced52b98b322 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,6 +422,7 @@ struct xfs_cil {
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
sv_t xc_commit_wait;
+ xfs_lsn_t xc_current_sequence;
};
/*
@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log);
void xlog_cil_init_post_recovery(struct log *log);
void xlog_cil_destroy(struct log *log);
-int xlog_cil_push(struct log *log, int push_now);
-xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
+/*
+ * CIL force routines
+ */
+xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+
+static inline void
+xlog_cil_force(struct log *log)
+{
+ xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+}
/*
* Unmount record type is used as a pseudo transaction type for the ticket.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdca7416c754..1c47edaea0d2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1167,7 +1167,7 @@ xfs_trans_del_item(
* Unlock all of the items of a transaction and free all the descriptors
* of that transaction.
*/
-STATIC void
+void
xfs_trans_free_items(
struct xfs_trans *tp,
xfs_lsn_t commit_lsn,
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil(
return error;
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-
- /* xfs_trans_free_items() unlocks them first */
- xfs_trans_free_items(tp, *commit_lsn, 0);
xfs_trans_free(tp);
return 0;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index e2d93d8ead7b..62da86c90de5 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -25,7 +25,8 @@ struct xfs_trans;
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
-
+void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
+ int flags);
void xfs_trans_item_committed(struct xfs_log_item *lip,
xfs_lsn_t commit_lsn, int aborted);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);