From bc09d141ebb26c0c1ab713c8597ca833be9afee4 Mon Sep 17 00:00:00 2001
From: Fabian Frederick
Date: Wed, 10 Dec 2014 15:41:15 -0800
Subject: fs/cifs: remove obsolete __constant

Replace all __constant_foo to foo() except in smb2status.h (1700 lines to
update).

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Steve French <sfrench@samba.org>
Cc: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/cifsacl.c  |  2 +-
 fs/cifs/cifssmb.c  | 20 ++++++++++----------
 fs/cifs/sess.c     |  2 +-
 fs/cifs/smb2misc.c | 38 +++++++++++++++++++-------------------
 fs/cifs/smb2ops.c  |  2 +-
 fs/cifs/smb2pdu.c  |  2 +-
 fs/cifs/smb2pdu.h  | 28 ++++++++++++++--------------
 7 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6d00c419cbae..1ea780bc6376 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
 	1, 1, {0, 0, 0, 0, 0, 1}, {0} };
 /* security id for Authenticated Users system group */
 static const struct cifs_sid sid_authusers = {
-	1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+	1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 61d00a6e398f..fa13d5e79f64 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
 		}
 		parm_data = (struct cifs_posix_lock *)
 			((char *)&pSMBr->hdr.Protocol + data_offset);
-		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
 			pLockData->fl_type = F_UNLCK;
 		else {
 			if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_RDLCK))
+					cpu_to_le16(CIFS_RDLCK))
 				pLockData->fl_type = F_RDLCK;
 			else if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_WRLCK))
+					cpu_to_le16(CIFS_WRLCK))
 				pLockData->fl_type = F_WRLCK;
 
 			pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	pSMB->TotalParameterCount = 0;
-	pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+	pSMB->TotalDataCount = cpu_to_le32(2);
 	pSMB->MaxParameterCount = 0;
 	pSMB->MaxDataCount = 0;
 	pSMB->MaxSetupCount = 4;
 	pSMB->Reserved = 0;
 	pSMB->ParameterOffset = 0;
-	pSMB->DataCount = __constant_cpu_to_le32(2);
+	pSMB->DataCount = cpu_to_le32(2);
 	pSMB->DataOffset =
 		cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
 				compression_state) - 4);  /* 84 */
 	pSMB->SetupCount = 4;
-	pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
 	pSMB->ParameterCount = 0;
-	pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+	pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
 	pSMB->IsFsctl = 1; /* FSCTL */
 	pSMB->IsRootFlag = 0;
 	pSMB->Fid = fid; /* file handle always le */
 	/* 3 byte pad, followed by 2 byte compress state */
-	pSMB->ByteCount = __constant_cpu_to_le16(5);
+	pSMB->ByteCount = cpu_to_le16(5);
 	inc_rfc1001_len(pSMB, 5);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
 	cifs_acl->version = cpu_to_le16(1);
 	if (acl_type == ACL_TYPE_ACCESS) {
 		cifs_acl->access_entry_count = cpu_to_le16(count);
-		cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
 	} else if (acl_type == ACL_TYPE_DEFAULT) {
 		cifs_acl->default_entry_count = cpu_to_le16(count);
-		cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
 	} else {
 		cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
 		return 0;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 57db63ff88da..ab0fda6e8e58 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 					CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
 					USHRT_MAX));
 	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-	pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+	pSMB->req.VcNumber = cpu_to_le16(1);
 
 	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1a08a34838fc..f1cefc9763ed 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
  *  indexed by command in host byte order
  */
 static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
-	/* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
-	/* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
-	/* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
-	/* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
-	/* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
-	/* SMB2_CREATE */ __constant_cpu_to_le16(89),
-	/* SMB2_CLOSE */ __constant_cpu_to_le16(60),
-	/* SMB2_FLUSH */ __constant_cpu_to_le16(4),
-	/* SMB2_READ */ __constant_cpu_to_le16(17),
-	/* SMB2_WRITE */ __constant_cpu_to_le16(17),
-	/* SMB2_LOCK */ __constant_cpu_to_le16(4),
-	/* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+	/* SMB2_NEGOTIATE */ cpu_to_le16(65),
+	/* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+	/* SMB2_LOGOFF */ cpu_to_le16(4),
+	/* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+	/* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+	/* SMB2_CREATE */ cpu_to_le16(89),
+	/* SMB2_CLOSE */ cpu_to_le16(60),
+	/* SMB2_FLUSH */ cpu_to_le16(4),
+	/* SMB2_READ */ cpu_to_le16(17),
+	/* SMB2_WRITE */ cpu_to_le16(17),
+	/* SMB2_LOCK */ cpu_to_le16(4),
+	/* SMB2_IOCTL */ cpu_to_le16(49),
 	/* BB CHECK this ... not listed in documentation */
-	/* SMB2_CANCEL */ __constant_cpu_to_le16(0),
-	/* SMB2_ECHO */ __constant_cpu_to_le16(4),
-	/* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
-	/* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
-	/* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
-	/* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+	/* SMB2_CANCEL */ cpu_to_le16(0),
+	/* SMB2_ECHO */ cpu_to_le16(4),
+	/* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+	/* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+	/* SMB2_QUERY_INFO */ cpu_to_le16(9),
+	/* SMB2_SET_INFO */ cpu_to_le16(2),
 	/* BB FIXME can also be 44 for lease break */
-	/* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+	/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
 int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5f521bcdee2..7ecac610b44b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -600,7 +600,7 @@ smb2_clone_range(const unsigned int xid,
 		goto cchunk_out;
 
 	/* For now array only one chunk long, will make more flexible later */
-	pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+	pcchunk->ChunkCount = cpu_to_le32(1);
 	pcchunk->Reserved = 0;
 	pcchunk->Reserved2 = 0;
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f1672bb82d5..08f73e168b0b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1359,7 +1359,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	char *ret_data = NULL;
 
 	fsctl_input.CompressionState =
-			__constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+			cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e3188abdafd0..fbd4df1e1683 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
 /* BB FIXME - analyze following length BB */
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
 
 /*
  * SMB2 Header Definition
@@ -96,7 +96,7 @@
  *
  */
 
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
 
 struct smb2_hdr {
 	__be32 smb2_buf_length;	/* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
 } __packed;
 
 /* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM	__constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM	cpu_to_le16(0x0001)
 
 /*
  *	SMB2 flag definitions
  */
-#define SMB2_FLAGS_SERVER_TO_REDIR	__constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND	__constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS	__constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED		__constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS	__constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR	cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND	cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS	cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED		cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS	cpu_to_le32(0x10000000)
 
 /*
  *	Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
  *
  */
 
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
 
 struct smb2_err_rsp {
 	struct smb2_hdr hdr;
@@ -502,12 +502,12 @@ struct create_context {
 #define SMB2_LEASE_HANDLE_CACHING_HE	0x02
 #define SMB2_LEASE_WRITE_CACHING_HE	0x04
 
-#define SMB2_LEASE_NONE			__constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING		__constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING	__constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING	__constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE			cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING		cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING	cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING	cpu_to_le32(0x04)
 
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
 
 #define SMB2_LEASE_KEY_SIZE 16
 
-- 
cgit v1.2.3


From 4b99d39b1b2890a178d840f7a068305c7890faee Mon Sep 17 00:00:00 2001
From: Fabian Frederick
Date: Wed, 10 Dec 2014 15:41:17 -0800
Subject: fs/cifs/file.c: replace count*size kzalloc by kcalloc

kcalloc manages count*sizeof overflow.

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Steve French <sfrench@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3e4d00a06c44..b123a64d7be4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1066,7 +1066,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;
@@ -1401,7 +1401,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 662e9b2b98a0b8e172c392f3d3437d354a6c4067 Mon Sep 17 00:00:00 2001
From: Fabian Frederick
Date: Wed, 10 Dec 2014 15:41:20 -0800
Subject: fs/cifs/smb2file.c: replace count*size kzalloc by kcalloc

kcalloc manages count*sizeof overflow.

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Steve French <sfrench@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/smb2file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 45992944e238..7198eac5dddd 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		return -EINVAL;
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	}
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;
-- 
cgit v1.2.3


From f08736bd6c54c33b91d4324eee5713ca6f13319a Mon Sep 17 00:00:00 2001
From: Joseph Qi
Date: Wed, 10 Dec 2014 15:41:34 -0800
Subject: ocfs2/dlm: let sender retry if dlm_dispatch_assert_master failed with
 -ENOMEM

Do not BUG() if GFP_ATOMIC allocation fails in dlm_dispatch_assert_master.
Instead, return -ENOMEM to the sender and then retry.

Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Reviewed-by: Alex Chen <alex.chen@huawei.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmrecovery.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 3365839d2971..79b5af5e6a7b 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1656,14 +1656,18 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	req.namelen = res->lockname.len;
 	memcpy(req.name, res->lockname.name, res->lockname.len);
 
+resend:
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
-	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog(ML_ERROR, "Error %d when sending message %u (key "
 		     "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
 		     dlm->key, nodenum);
-	else {
+	else if (status == -ENOMEM) {
+		mlog_errno(status);
+		msleep(50);
+		goto resend;
+	} else {
 		BUG_ON(status < 0);
 		BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
 		*real_master = (u8) (status & 0xff);
@@ -1705,9 +1709,13 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 			int ret = dlm_dispatch_assert_master(dlm, res,
 							     0, 0, flags);
 			if (ret < 0) {
-				mlog_errno(-ENOMEM);
-				/* retry!? */
-				BUG();
+				mlog_errno(ret);
+				spin_unlock(&res->spinlock);
+				dlm_lockres_put(res);
+				spin_unlock(&dlm->spinlock);
+				dlm_put(dlm);
+				/* sender will take care of this and retry */
+				return ret;
 			} else
 				__dlm_lockres_grab_inflight_worker(dlm, res);
 			spin_unlock(&res->spinlock);
-- 
cgit v1.2.3


From 519a286175b6422e13694ebb73aefee0b5749443 Mon Sep 17 00:00:00 2001
From: Dan Carpenter
Date: Wed, 10 Dec 2014 15:41:37 -0800
Subject: ocfs2: fix an off-by-one BUG_ON() statement

The ->si_slots[] array is allocated in ocfs2_init_slot_info() it has
"->max_slots" number of elements so this test should be >= instead of >.

Static checker work.  Compile tested only.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/slot_map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index a88b2a4fcc85..d5493e361a38 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -306,7 +306,7 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 	assert_spin_locked(&osb->osb_lock);
 
 	BUG_ON(slot_num < 0);
-	BUG_ON(slot_num > osb->max_slots);
+	BUG_ON(slot_num >= osb->max_slots);
 
 	if (!si->si_slots[slot_num].sl_valid)
 		return -ENOENT;
-- 
cgit v1.2.3


From 2b693005b833ba309c7cb8426cd023d8e80da31e Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:41:40 -0800
Subject: ocfs2: Fix xattr check in ocfs2_get_xattr_nolock()

ocfs2_get_xattr_nolock() checks whether inode has any extended attributes
(OCFS2_HAS_XATTR_FL).  If not, it just sets 'ret' to -ENODATA but
continues with checking inline and external attributes anyway (which is
pointless although it does not harm).  Just return immediately when we
know there are no extended attributes in the inode.

Coverity id: 1226906.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 016f01df3825..662f8dee149f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1284,7 +1284,7 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
 		return -EOPNOTSUPP;
 
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
-		ret = -ENODATA;
+		return -ENODATA;
 
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
-- 
cgit v1.2.3


From 4a635a113bfe1eaa184410d6b6065cd8eee13607 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:41:42 -0800
Subject: ocfs2: remove bogus test from ocfs2_read_locked_inode()

'args' are always set for ocfs2_read_locked_inode() and brelse() checks
whether bh is NULL.  So the test (args && bh) is unnecessary (plus the
args part is really confusing anyway).  Remove it.

Coverity id: 1128856.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 437de7f768c6..c8b25de9efbb 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -540,8 +540,7 @@ bail:
 	if (status < 0)
 		make_bad_inode(inode);
 
-	if (args && bh)
-		brelse(bh);
+	brelse(bh);
 
 	return status;
 }
-- 
cgit v1.2.3


From f5425fcea72eec084c50f36db4d25f697227c602 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:41:45 -0800
Subject: ocfs2: report error from o2hb_do_disk_heartbeat() to user

Report return value of o2hb_do_disk_heartbeat() as a part of ML_HEARTBEAT
message so that we know whether a heartbeat actually happened or not.
This also makes assigned but otherwise unused 'ret' variable used.

Coverity id: 1227053.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/heartbeat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index eb9d48746ab4..16eff45727ee 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1127,10 +1127,10 @@ static int o2hb_thread(void *data)
 		elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
 
 		mlog(ML_HEARTBEAT,
-		     "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
+		     "start = %lu.%lu, end = %lu.%lu, msec = %u, ret = %d\n",
 		     before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
 		     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
-		     elapsed_msec);
+		     elapsed_msec, ret);
 
 		if (!kthread_should_stop() &&
 		    elapsed_msec < reg->hr_timeout_ms) {
-- 
cgit v1.2.3


From cb79662bc2f83f7b3b60970ad88df43085f96514 Mon Sep 17 00:00:00 2001
From: Srinivas Eeda
Date: Wed, 10 Dec 2014 15:41:48 -0800
Subject: ocfs2: o2dlm: fix a race between purge and master query

Node A sends master query request to node B which is the master.  At this
time lockres happens to be on purgelist.  dlm_master_request_handler gets
the dlm spinlock, finds the resource and releases the dlm spin lock.
Right at this dlm_thread on this node could purge the lockres.
dlm_master_request_handler can then acquire lockres spinlock and reply to
Node A that node B is the master even though lockres on node B is purged.

The above scenario will now make node A falsely think node B is the master
which is inconsistent.  Further if another node C tries to master the same
resource, every node will respond they are not the master.  Node C then
masters the resource and sends assert master to all nodes.  This will now
make node A crash with the following message.

dlm_assert_master_handler:1831 ERROR: DIE! Mastery assert from 9, but current
owner is 10!

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
Tested-by: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 215e41abf101..3689b3592042 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1460,6 +1460,18 @@ way_up_top:
 
 		/* take care of the easy cases up front */
 		spin_lock(&res->spinlock);
+
+		/*
+		 * Right after dlm spinlock was released, dlm_thread could have
+		 * purged the lockres. Check if lockres got unhashed. If so
+		 * start over.
+		 */
+		if (hlist_unhashed(&res->hash_node)) {
+			spin_unlock(&res->spinlock);
+			dlm_lockres_put(res);
+			goto way_up_top;
+		}
+
 		if (res->state & (DLM_LOCK_RES_RECOVERING|
 				  DLM_LOCK_RES_MIGRATING)) {
 			spin_unlock(&res->spinlock);
-- 
cgit v1.2.3


From 196fe71d643be8285749726e88c1508536fe03b8 Mon Sep 17 00:00:00 2001
From: Junxiao Bi
Date: Wed, 10 Dec 2014 15:41:51 -0800
Subject: ocfs2: o2net: fix connect expired

Set nn_persistent_error to -ENOTCONN will stop reconnect since the
"stop" condition in o2net_start_connect() will be true.

    stop = (nn->nn_sc ||
                (nn->nn_persistent_error &&
                (nn->nn_persistent_error != -ENOTCONN || timeout == 0)));

This will make connection never be established if the first connection
request is lost.

Set nn_persistent_error to 0 when connect expired to fix this.  With
this changes, dlm will not be waken up when connect expired, this is OK
since dlm depends on network, dlm can do nothing in this case if waken
up.  Let it wait there for network recover and connect built again to
continue.

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a96044004064..2e355e0f8335 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1736,7 +1736,7 @@ static void o2net_connect_expired(struct work_struct *work)
 		     o2net_idle_timeout() / 1000,
 		     o2net_idle_timeout() % 1000);
 
-		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
+		o2net_set_nn_state(nn, NULL, 0, 0);
 	}
 	spin_unlock(&nn->nn_lock);
 }
-- 
cgit v1.2.3


From 86b9c6f3f891019b26f8e5bb11a6faa96bba54a8 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues
Date: Wed, 10 Dec 2014 15:41:53 -0800
Subject: ocfs2: remove filesize checks for sync I/O journal commit

Filesize is not a good indication that the file needs to be synced.
An example where this breaks is:
 1. Open the file in O_SYNC|O_RDWR
 2. Read a small portion of the file (say 64 bytes)
 3. Lseek to starting of the file
 4. Write 64 bytes

If the node crashes, it is not written out to disk because this was not
committed in the journal and the other node which reads the file after
recovery reads stale data (even if the write on the other node was
successful)

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.de>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 324dc93ac896..69fb9f75b082 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2381,9 +2381,7 @@ out_dio:
 		if (ret < 0)
 			written = ret;
 
-		if (!ret && ((old_size != i_size_read(inode)) ||
-			     (old_clusters != OCFS2_I(inode)->ip_clusters) ||
-			     has_refcount)) {
+		if (!ret) {
 			ret = jbd2_journal_force_commit(osb->journal->j_journal);
 			if (ret < 0)
 				written = ret;
-- 
cgit v1.2.3


From dc17158060fb57f49c310a7def12fdaeddf35b19 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:41:56 -0800
Subject: ocfs2: fix error handling when creating debugfs root in ocfs2_init()

Error handling if creation of root of debugfs in ocfs2_init() fails is
broken.  Although error code is set we fail to exit ocfs2_init() with
error and thus initialization ends with success.  Later when mounting a
filesystem, ocfs2 debugfs entries end up being created in the root of
debugfs filesystem which is confusing.

Fix the error handling to bail out.

Coverity id: 1227009.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 93c85bc745e1..8e3ac25efb9f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1622,8 +1622,9 @@ static int __init ocfs2_init(void)
 
 	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
 	if (!ocfs2_debugfs_root) {
-		status = -EFAULT;
+		status = -ENOMEM;
 		mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
+		goto out4;
 	}
 
 	ocfs2_set_locking_protocol();
-- 
cgit v1.2.3


From d1e78238741491d24177d99453091439c90ac70e Mon Sep 17 00:00:00 2001
From: Xue jiufei
Date: Wed, 10 Dec 2014 15:41:59 -0800
Subject: ocfs2: do not set OCFS2_LOCK_UPCONVERT_FINISHING if nonblocking lock
 can not be granted at once

ocfs2_readpages() use nonblocking flag to avoid page lock inversion.  It
will trigger cluster hang because that flag OCFS2_LOCK_UPCONVERT_FINISHING
is not cleared if nonblocking lock cannot be granted at once.  The flag
would prevent dc thread from downconverting.  So other nodes cannot
acheive this lockres for ever.

So we should not set OCFS2_LOCK_UPCONVERT_FINISHING when receiving ast if
nonblocking lock had already returned.

Signed-off-by: joyce.xue <xuejiufei@huawei.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c | 37 +++++++++++++++++++++++++++++++------
 fs/ocfs2/ocfs2.h   |  6 ++++++
 2 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 21262f2b1654..953f4eb75fa7 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -861,8 +861,13 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
 	 * downconverting the lock before the upconvert has fully completed.
+	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
+	 * had already returned.
 	 */
-	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
+		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	else
+		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
 
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 }
@@ -1324,13 +1329,12 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
 
 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
  * if the mask still hadn't reached its goal */
-static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 				      struct ocfs2_mask_waiter *mw)
 {
-	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(&lockres->l_lock, flags);
+	assert_spin_locked(&lockres->l_lock);
 	if (!list_empty(&mw->mw_item)) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			ret = -EBUSY;
@@ -1338,6 +1342,18 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 		list_del_init(&mw->mw_item);
 		init_completion(&mw->mw_complete);
 	}
+
+	return ret;
+}
+
+static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+				      struct ocfs2_mask_waiter *mw)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&lockres->l_lock, flags);
+	ret = __lockres_remove_mask_waiter(lockres, mw);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	return ret;
@@ -1373,6 +1389,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
+	int dlm_locked = 0;
 
 	ocfs2_init_mask_waiter(&mw);
 
@@ -1481,6 +1498,7 @@ again:
 			ocfs2_recover_from_dlm_error(lockres, 1);
 			goto out;
 		}
+		dlm_locked = 1;
 
 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
 		     lockres->l_name);
@@ -1514,10 +1532,17 @@ out:
 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
 		wait = 0;
-		if (lockres_remove_mask_waiter(lockres, &mw))
+		spin_lock_irqsave(&lockres->l_lock, flags);
+		if (__lockres_remove_mask_waiter(lockres, &mw)) {
+			if (dlm_locked)
+				lockres_or_flags(lockres,
+					OCFS2_LOCK_NONBLOCK_FINISHED);
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			ret = -EAGAIN;
-		else
+		} else {
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			goto again;
+		}
 	}
 	if (wait) {
 		ret = ocfs2_wait_for_mask(&mw);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bbec539230fd..7d6b7d090452 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -144,6 +144,12 @@ enum ocfs2_unlock_action {
 						     * before the upconvert
 						     * has completed */
 
+#define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster
+						   * lock has already
+						   * returned, do not block
+						   * dc thread from
+						   * downconverting */
+
 struct ocfs2_lock_res_ops;
 
 typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
-- 
cgit v1.2.3


From 61fb9ea4b3e1f22341f7d817db18884062b08601 Mon Sep 17 00:00:00 2001
From: jiangyiwen
Date: Wed, 10 Dec 2014 15:42:02 -0800
Subject: ocfs2: do not set filesystem readonly if link down

Do not set the filesystem readonly if the storage link is down.  In this
case, metadata is not corrupted and only -EIO is returned.  And if it is
indeed corrupted metadata, it has already called ocfs2_error() in
ocfs2_validate_inode_block().

Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/aops.c | 2 +-
 fs/ocfs2/dir.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1ef547e49373..d9f222987f24 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1251,7 +1251,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
 					  NULL);
 	if (ret < 0) {
-		ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
+		mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
 			    "at logical block %llu",
 			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			    (unsigned long long)v_blkno);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0717662b4aef..e6dc0288dbb9 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -744,7 +744,7 @@ restart:
 		if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
 			/* read error, skip block & hope for the best.
 			 * ocfs2_read_dir_block() has released the bh. */
-			ocfs2_error(dir->i_sb, "reading directory %llu, "
+			mlog(ML_ERROR, "reading directory %llu, "
 				    "offset %lu\n",
 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
 				    block);
-- 
cgit v1.2.3


From 88d69b92fc12084af43cba428bda9727c0ca084f Mon Sep 17 00:00:00 2001
From: Dan Carpenter
Date: Wed, 10 Dec 2014 15:42:04 -0800
Subject: ocfs2: remove bogus NULL check in ocfs2_move_extents()

"inode" isn't NULL here, and also we dereference it on the previous line
so static checkers get annoyed.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/move_extents.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 74caffeeee1d..56a768d06aa6 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -904,9 +904,6 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	if (!inode)
-		return -ENOENT;
-
 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
 		return -EROFS;
 
-- 
cgit v1.2.3


From b3e3e5af60e0fda182e978f568cc512568c024cf Mon Sep 17 00:00:00 2001
From: Dan Carpenter
Date: Wed, 10 Dec 2014 15:42:07 -0800
Subject: ocfs2: remove unneeded NULL check

In commit 1faf289454b9 ("ocfs2_dlm: disallow a domain join if node maps
mismatch") we introduced a new earlier NULL check so this one is not
needed.  Also static checkers complain because we dereference it first
and then check for NULL.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmdomain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 02d315fef432..50a59d2337b2 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -877,7 +877,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 	 * to be put in someone's domain map.
 	 * Also, explicitly disallow joining at certain troublesome
 	 * times (ie. during recovery). */
-	if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
+	if (dlm->dlm_state != DLM_CTXT_LEAVING) {
 		int bit = query->node_idx;
 		spin_lock(&dlm->spinlock);
 
-- 
cgit v1.2.3


From e2ab879e96b5e65bf8ce1123f3b7f01ebba27204 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:42:10 -0800
Subject: fs/char_dev.c: remove pointless assignment from
 __register_chrdev_region()

At one place we assign major number we found to ret.  That assignment is
then never used and actually doesn't make any sense given how the code is
currently structured (the assignment comes from pre-git times).  Just
remove it.

Coverity id: 1226852.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/char_dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index f77f7702fabe..67b2007f10fe 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -117,7 +117,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 			goto out;
 		}
 		major = i;
-		ret = major;
 	}
 
 	cd->major = major;
-- 
cgit v1.2.3


From c164e038eee805147e95789dddb88ae3b3aca11c Mon Sep 17 00:00:00 2001
From: Kirill A. Shutemov
Date: Wed, 10 Dec 2014 15:44:36 -0800
Subject: mm: fix huge zero page accounting in smaps report

As a small zero page, huge zero page should not be accounted in smaps
report as normal page.

For small pages we rely on vm_normal_page() to filter out zero page, but
vm_normal_page() is not designed to handle pmds.  We only get here due
hackish cast pmd to pte in smaps_pte_range() -- pte and pmd format is not
necessary compatible on each and every architecture.

Let's add separate codepath to handle pmds.  follow_trans_huge_pmd() will
detect huge zero page for us.

We would need pmd_dirty() helper to do this properly.  The patch adds it
to THP-enabled architectures which don't yet have one.

[akpm@linux-foundation.org: use do_div to fix 32-bit build]
Signed-off-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Fengwei Yin <yfw.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/pgtable.h         |   1 +
 arch/powerpc/include/asm/pgtable-ppc64.h |   1 +
 arch/sparc/include/asm/pgtable_64.h      |   7 +++
 arch/x86/include/asm/pgtable.h           |   5 ++
 fs/proc/task_mmu.c                       | 104 ++++++++++++++++++++-----------
 5 files changed, 82 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 41a43bf26492..df22314f57cf 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -279,6 +279,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index ae153c40ab7c..9b4b1904efae 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -467,6 +467,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 }
 
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index bfeb626085ac..1ff9e7864168 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -667,6 +667,13 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_dirty(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_dirty(pte);
+}
+
 static inline unsigned long pmd_young(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index aa97a070f09f..081d6f45e006 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -99,6 +99,11 @@ static inline int pte_young(pte_t pte)
 	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
+static inline int pmd_dirty(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_DIRTY;
+}
+
 static inline int pmd_young(pmd_t pmd)
 {
 	return pmd_flags(pmd) & _PAGE_ACCESSED;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f6734c6b66a6..246eae84b13b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -447,58 +447,91 @@ struct mem_size_stats {
 	u64 pss;
 };
 
+static void smaps_account(struct mem_size_stats *mss, struct page *page,
+		unsigned long size, bool young, bool dirty)
+{
+	int mapcount;
+
+	if (PageAnon(page))
+		mss->anonymous += size;
 
-static void smaps_pte_entry(pte_t ptent, unsigned long addr,
-		unsigned long ptent_size, struct mm_walk *walk)
+	mss->resident += size;
+	/* Accumulate the size in pages that have been accessed. */
+	if (young || PageReferenced(page))
+		mss->referenced += size;
+	mapcount = page_mapcount(page);
+	if (mapcount >= 2) {
+		u64 pss_delta;
+
+		if (dirty || PageDirty(page))
+			mss->shared_dirty += size;
+		else
+			mss->shared_clean += size;
+		pss_delta = (u64)size << PSS_SHIFT;
+		do_div(pss_delta, mapcount);
+		mss->pss += pss_delta;
+	} else {
+		if (dirty || PageDirty(page))
+			mss->private_dirty += size;
+		else
+			mss->private_clean += size;
+		mss->pss += (u64)size << PSS_SHIFT;
+	}
+}
+
+static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+		struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
 	pgoff_t pgoff = linear_page_index(vma, addr);
 	struct page *page = NULL;
-	int mapcount;
 
-	if (pte_present(ptent)) {
-		page = vm_normal_page(vma, addr, ptent);
-	} else if (is_swap_pte(ptent)) {
-		swp_entry_t swpent = pte_to_swp_entry(ptent);
+	if (pte_present(*pte)) {
+		page = vm_normal_page(vma, addr, *pte);
+	} else if (is_swap_pte(*pte)) {
+		swp_entry_t swpent = pte_to_swp_entry(*pte);
 
 		if (!non_swap_entry(swpent))
-			mss->swap += ptent_size;
+			mss->swap += PAGE_SIZE;
 		else if (is_migration_entry(swpent))
 			page = migration_entry_to_page(swpent);
-	} else if (pte_file(ptent)) {
-		if (pte_to_pgoff(ptent) != pgoff)
-			mss->nonlinear += ptent_size;
+	} else if (pte_file(*pte)) {
+		if (pte_to_pgoff(*pte) != pgoff)
+			mss->nonlinear += PAGE_SIZE;
 	}
 
 	if (!page)
 		return;
 
-	if (PageAnon(page))
-		mss->anonymous += ptent_size;
-
 	if (page->index != pgoff)
-		mss->nonlinear += ptent_size;
+		mss->nonlinear += PAGE_SIZE;
 
-	mss->resident += ptent_size;
-	/* Accumulate the size in pages that have been accessed. */
-	if (pte_young(ptent) || PageReferenced(page))
-		mss->referenced += ptent_size;
-	mapcount = page_mapcount(page);
-	if (mapcount >= 2) {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->shared_dirty += ptent_size;
-		else
-			mss->shared_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
-	} else {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->private_dirty += ptent_size;
-		else
-			mss->private_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT);
-	}
+	smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+	struct mem_size_stats *mss = walk->private;
+	struct vm_area_struct *vma = mss->vma;
+	struct page *page;
+
+	/* FOLL_DUMP will return -EFAULT on huge zero page */
+	page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+	if (IS_ERR_OR_NULL(page))
+		return;
+	mss->anonymous_thp += HPAGE_PMD_SIZE;
+	smaps_account(mss, page, HPAGE_PMD_SIZE,
+			pmd_young(*pmd), pmd_dirty(*pmd));
 }
+#else
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+}
+#endif
 
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			   struct mm_walk *walk)
@@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	spinlock_t *ptl;
 
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
+		smaps_pmd_entry(pmd, addr, walk);
 		spin_unlock(ptl);
-		mss->anonymous_thp += HPAGE_PMD_SIZE;
 		return 0;
 	}
 
@@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	 */
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
-		smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
+		smaps_pte_entry(pte, addr, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
 	return 0;
-- 
cgit v1.2.3


From 710585d4922fd315f2cada8fbe550ae8ed23e994 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel
Date: Wed, 10 Dec 2014 15:45:01 -0800
Subject: fs/proc: use a rb tree for the directory entries

When a lot of netdevices are created, one of the bottleneck is the
creation of proc entries.  This serie aims to accelerate this part.

The current implementation for the directories in /proc is using a single
linked list.  This is slow when handling directories with large numbers of
entries (eg netdevice-related entries when lots of tunnels are opened).

This patch replaces this linked list by a red-black tree.

Here are some numbers:

dummy30000.batch contains 30 000 times 'link add type dummy'.

Before the patch:
  $ time ip -b dummy30000.batch
  real    2m31.950s
  user    0m0.440s
  sys     2m21.440s
  $ time rmmod dummy
  real    1m35.764s
  user    0m0.000s
  sys     1m24.088s

After the patch:
  $ time ip -b dummy30000.batch
  real    2m0.874s
  user    0m0.448s
  sys     1m49.720s
  $ time rmmod dummy
  real    1m13.988s
  user    0m0.000s
  sys     1m1.008s

The idea of improving this part was suggested by Thierry Herbelot.

[akpm@linux-foundation.org: initialise proc_root.subdir at compile time]
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Thierry Herbelot <thierry.herbelot@6wind.com>.
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c  | 164 ++++++++++++++++++++++++++++++++++-------------------
 fs/proc/internal.h |  11 ++--
 fs/proc/proc_net.c |   1 +
 fs/proc/root.c     |   1 +
 4 files changed, 113 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 317b72641ebf..9f8fa1e5e8aa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -31,9 +31,81 @@ static DEFINE_SPINLOCK(proc_subdir_lock);
 
 static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
 {
-	if (de->namelen != len)
-		return 0;
-	return !memcmp(name, de->name, len);
+	if (len < de->namelen)
+		return -1;
+	if (len > de->namelen)
+		return 1;
+
+	return memcmp(name, de->name, len);
+}
+
+static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
+{
+	struct rb_node *node = rb_first(&dir->subdir);
+
+	if (node == NULL)
+		return NULL;
+
+	return rb_entry(node, struct proc_dir_entry, subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
+{
+	struct rb_node *node = rb_next(&dir->subdir_node);
+
+	if (node == NULL)
+		return NULL;
+
+	return rb_entry(node, struct proc_dir_entry, subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
+					      const char *name,
+					      unsigned int len)
+{
+	struct rb_node *node = dir->subdir.rb_node;
+
+	while (node) {
+		struct proc_dir_entry *de = container_of(node,
+							 struct proc_dir_entry,
+							 subdir_node);
+		int result = proc_match(len, name, de);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return de;
+	}
+	return NULL;
+}
+
+static bool pde_subdir_insert(struct proc_dir_entry *dir,
+			      struct proc_dir_entry *de)
+{
+	struct rb_root *root = &dir->subdir;
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct proc_dir_entry *this =
+			container_of(*new, struct proc_dir_entry, subdir_node);
+		int result = proc_match(de->namelen, de->name, this);
+
+		parent = *new;
+		if (result < 0)
+			new = &(*new)->rb_left;
+		else if (result > 0)
+			new = &(*new)->rb_right;
+		else
+			return false;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&de->subdir_node, parent, new);
+	rb_insert_color(&de->subdir_node, root);
+	return true;
 }
 
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -92,10 +164,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 			break;
 
 		len = next - cp;
-		for (de = de->subdir; de ; de = de->next) {
-			if (proc_match(len, cp, de))
-				break;
-		}
+		de = pde_subdir_find(de, cp, len);
 		if (!de) {
 			WARN(1, "name '%s'\n", name);
 			return -ENOENT;
@@ -183,19 +252,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 	struct inode *inode;
 
 	spin_lock(&proc_subdir_lock);
-	for (de = de->subdir; de ; de = de->next) {
-		if (de->namelen != dentry->d_name.len)
-			continue;
-		if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-			pde_get(de);
-			spin_unlock(&proc_subdir_lock);
-			inode = proc_get_inode(dir->i_sb, de);
-			if (!inode)
-				return ERR_PTR(-ENOMEM);
-			d_set_d_op(dentry, &simple_dentry_operations);
-			d_add(dentry, inode);
-			return NULL;
-		}
+	de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
+	if (de) {
+		pde_get(de);
+		spin_unlock(&proc_subdir_lock);
+		inode = proc_get_inode(dir->i_sb, de);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+		d_set_d_op(dentry, &simple_dentry_operations);
+		d_add(dentry, inode);
+		return NULL;
 	}
 	spin_unlock(&proc_subdir_lock);
 	return ERR_PTR(-ENOENT);
@@ -225,7 +291,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		return 0;
 
 	spin_lock(&proc_subdir_lock);
-	de = de->subdir;
+	de = pde_subdir_first(de);
 	i = ctx->pos - 2;
 	for (;;) {
 		if (!de) {
@@ -234,7 +300,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		if (!i)
 			break;
-		de = de->next;
+		de = pde_subdir_next(de);
 		i--;
 	}
 
@@ -249,7 +315,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		spin_lock(&proc_subdir_lock);
 		ctx->pos++;
-		next = de->next;
+		next = pde_subdir_next(de);
 		pde_put(de);
 		de = next;
 	} while (de);
@@ -286,9 +352,8 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	struct proc_dir_entry *tmp;
 	int ret;
-	
+
 	ret = proc_alloc_inum(&dp->low_ino);
 	if (ret)
 		return ret;
@@ -308,17 +373,10 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 	}
 
 	spin_lock(&proc_subdir_lock);
-
-	for (tmp = dir->subdir; tmp; tmp = tmp->next)
-		if (strcmp(tmp->name, dp->name) == 0) {
-			WARN(1, "proc_dir_entry '%s/%s' already registered\n",
-				dir->name, dp->name);
-			break;
-		}
-
-	dp->next = dir->subdir;
 	dp->parent = dir;
-	dir->subdir = dp;
+	if (pde_subdir_insert(dir, dp) == false)
+		WARN(1, "proc_dir_entry '%s/%s' already registered\n",
+		     dir->name, dp->name);
 	spin_unlock(&proc_subdir_lock);
 
 	return 0;
@@ -354,6 +412,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->namelen = qstr.len;
 	ent->mode = mode;
 	ent->nlink = nlink;
+	ent->subdir = RB_ROOT;
 	atomic_set(&ent->count, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
@@ -485,7 +544,6 @@ void pde_put(struct proc_dir_entry *pde)
  */
 void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *de = NULL;
 	const char *fn = name;
 	unsigned int len;
@@ -497,14 +555,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			de = *p;
-			*p = de->next;
-			de->next = NULL;
-			break;
-		}
-	}
+	de = pde_subdir_find(parent, fn, len);
+	if (de)
+		rb_erase(&de->subdir_node, &parent->subdir);
 	spin_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -516,16 +569,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
-	WARN(de->subdir, "%s: removing non-empty directory "
-			 "'%s/%s', leaking at least '%s'\n", __func__,
-			 de->parent->name, de->name, de->subdir->name);
+	WARN(pde_subdir_first(de),
+	     "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
+	     __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
 	pde_put(de);
 }
 EXPORT_SYMBOL(remove_proc_entry);
 
 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *root = NULL, *de, *next;
 	const char *fn = name;
 	unsigned int len;
@@ -537,24 +589,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			root = *p;
-			*p = root->next;
-			root->next = NULL;
-			break;
-		}
-	}
+	root = pde_subdir_find(parent, fn, len);
 	if (!root) {
 		spin_unlock(&proc_subdir_lock);
 		return -ENOENT;
 	}
+	rb_erase(&root->subdir_node, &parent->subdir);
+
 	de = root;
 	while (1) {
-		next = de->subdir;
+		next = pde_subdir_first(de);
 		if (next) {
-			de->subdir = next->next;
-			next->next = NULL;
+			rb_erase(&next->subdir_node, &de->subdir);
 			de = next;
 			continue;
 		}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa7a0ee182e1..7fb1a4869fd0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -24,10 +24,9 @@ struct mempolicy;
  * tree) of these proc_dir_entries, so that we can dynamically
  * add new files to /proc.
  *
- * The "next" pointer creates a linked list of one /proc directory,
- * while parent/subdir create the directory structure (every
- * /proc file has a parent, but "subdir" is NULL for all
- * non-directory entries).
+ * parent/subdir are used for the directory structure (every /proc file has a
+ * parent, but "subdir" is empty for all non-directory entries).
+ * subdir_node is used to build the rb tree "subdir" of the parent.
  */
 struct proc_dir_entry {
 	unsigned int low_ino;
@@ -38,7 +37,9 @@ struct proc_dir_entry {
 	loff_t size;
 	const struct inode_operations *proc_iops;
 	const struct file_operations *proc_fops;
-	struct proc_dir_entry *next, *parent, *subdir;
+	struct proc_dir_entry *parent;
+	struct rb_root subdir;
+	struct rb_node subdir_node;
 	void *data;
 	atomic_t count;		/* use count */
 	atomic_t in_use;	/* number of callers into module in progress; */
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a63af3e0a612..1bde894bc624 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net)
 	if (!netd)
 		goto out;
 
+	netd->subdir = RB_ROOT;
 	netd->data = net;
 	netd->nlink = 2;
 	netd->namelen = 3;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 094e44d4a6be..e74ac9f1a2c0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = {
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
+	.subdir		= RB_ROOT,
 	.name		= "/proc",
 };
 
-- 
cgit v1.2.3


From b208d54b75399b276b05f9e70cce8d3a59a42547 Mon Sep 17 00:00:00 2001
From: Debabrata Banerjee
Date: Wed, 10 Dec 2014 15:45:04 -0800
Subject: procfs: fix error handling of proc_register()

proc_register() error paths are leaking inodes and directory refcounts.

Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9f8fa1e5e8aa..be39c6feb3e5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -369,14 +369,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		dp->proc_iops = &proc_file_inode_operations;
 	} else {
 		WARN_ON(1);
+		proc_free_inum(dp->low_ino);
 		return -EINVAL;
 	}
 
 	spin_lock(&proc_subdir_lock);
 	dp->parent = dir;
-	if (pde_subdir_insert(dir, dp) == false)
+	if (pde_subdir_insert(dir, dp) == false) {
 		WARN(1, "proc_dir_entry '%s/%s' already registered\n",
 		     dir->name, dp->name);
+		spin_unlock(&proc_subdir_lock);
+		if (S_ISDIR(dp->mode))
+			dir->nlink--;
+		proc_free_inum(dp->low_ino);
+		return -EEXIST;
+	}
 	spin_unlock(&proc_subdir_lock);
 
 	return 0;
-- 
cgit v1.2.3


From 2fc1e948e820bddf8a686c6e2989219b471d7982 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel
Date: Wed, 10 Dec 2014 15:45:07 -0800
Subject: fs/proc.c: use rb_entry_safe() instead of rb_entry()

Better to use existing macro that rewriting them.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index be39c6feb3e5..7fea13229f33 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -41,22 +41,14 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
 
 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
 {
-	struct rb_node *node = rb_first(&dir->subdir);
-
-	if (node == NULL)
-		return NULL;
-
-	return rb_entry(node, struct proc_dir_entry, subdir_node);
+	return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+			     subdir_node);
 }
 
 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
 {
-	struct rb_node *node = rb_next(&dir->subdir_node);
-
-	if (node == NULL)
-		return NULL;
-
-	return rb_entry(node, struct proc_dir_entry, subdir_node);
+	return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry,
+			     subdir_node);
 }
 
 static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
-- 
cgit v1.2.3


From 4af1036df4dd4f0d59fad9d82ed456bfa2e73fa6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 10 Dec 2014 15:45:10 -0800
Subject: proc: task_state: read cred->group_info outside of task_lock()

task_state() reads cred->group_info under task_lock() because a long ago
it was task_struct->group_info and it was actually protected by
task->alloc_lock.  Today this task_unlock() after rcu_read_unlock() just
adds the confusion, move task_unlock() up.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Cc: Sterling Alexander <stalexan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index cd3653e4f35c..b5810c228c10 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -201,11 +201,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"FDSize:\t%d\n"
 		"Groups:\t",
 		fdt ? fdt->max_fds : 0);
+	task_unlock(p);
 	rcu_read_unlock();
 
 	group_info = cred->group_info;
-	task_unlock(p);
-
 	for (g = 0; g < group_info->ngroups; g++)
 		seq_printf(m, "%d ",
 			   from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
-- 
cgit v1.2.3


From 0f4a0d53f20c76a70cb5b69b6aa237de2107e171 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 10 Dec 2014 15:45:12 -0800
Subject: proc: task_state: deuglify the max_fds calculation

1. The usage of fdt looks very ugly, it can't be NULL if ->files is
   not NULL. We can use "unsigned int max_fds" instead.

2. This also allows to move seq_printf(max_fds) outside of task_lock()
   and join it with the previous seq_printf(). See also the next patch.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Cc: Sterling Alexander <stalexan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index b5810c228c10..7c8d9aecb070 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -157,9 +157,9 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
 	int g;
-	struct fdtable *fdt = NULL;
 	const struct cred *cred;
 	pid_t ppid, tpid;
+	unsigned int max_fds = 0;
 
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
@@ -171,6 +171,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 			tpid = task_pid_nr_ns(tracer, ns);
 	}
 	cred = get_task_cred(p);
+
+	task_lock(p);
+	if (p->files)
+		max_fds = files_fdtable(p->files)->max_fds;
+	task_unlock(p);
+
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -179,7 +185,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
 		"Uid:\t%d\t%d\t%d\t%d\n"
-		"Gid:\t%d\t%d\t%d\t%d\n",
+		"Gid:\t%d\t%d\t%d\t%d\n"
+		"FDSize:\t%d\nGroups:\t",
 		get_task_state(p),
 		task_tgid_nr_ns(p, ns),
 		task_numa_group_id(p),
@@ -192,16 +199,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		from_kgid_munged(user_ns, cred->gid),
 		from_kgid_munged(user_ns, cred->egid),
 		from_kgid_munged(user_ns, cred->sgid),
-		from_kgid_munged(user_ns, cred->fsgid));
-
-	task_lock(p);
-	if (p->files)
-		fdt = files_fdtable(p->files);
-	seq_printf(m,
-		"FDSize:\t%d\n"
-		"Groups:\t",
-		fdt ? fdt->max_fds : 0);
-	task_unlock(p);
+		from_kgid_munged(user_ns, cred->fsgid),
+		max_fds);
 	rcu_read_unlock();
 
 	group_info = cred->group_info;
-- 
cgit v1.2.3


From b0fafc11115938a3215723f37e8e9cc6a94b05b0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 10 Dec 2014 15:45:15 -0800
Subject: proc: task_state: move the main seq_printf() outside of
 rcu_read_lock()

task_state() does seq_printf() under rcu_read_lock(), but this is only
needed for task_tgid_nr_ns() and task_numa_group_id().  We can calculate
tgid/ngid and drop rcu lock.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Cc: Sterling Alexander <stalexan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c8d9aecb070..800e30f8f284 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -158,7 +158,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct group_info *group_info;
 	int g;
 	const struct cred *cred;
-	pid_t ppid, tpid;
+	pid_t ppid, tpid, tgid, ngid;
 	unsigned int max_fds = 0;
 
 	rcu_read_lock();
@@ -170,12 +170,16 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		if (tracer)
 			tpid = task_pid_nr_ns(tracer, ns);
 	}
+
+	tgid = task_tgid_nr_ns(p, ns);
+	ngid = task_numa_group_id(p);
 	cred = get_task_cred(p);
 
 	task_lock(p);
 	if (p->files)
 		max_fds = files_fdtable(p->files)->max_fds;
 	task_unlock(p);
+	rcu_read_unlock();
 
 	seq_printf(m,
 		"State:\t%s\n"
@@ -188,10 +192,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"Gid:\t%d\t%d\t%d\t%d\n"
 		"FDSize:\t%d\nGroups:\t",
 		get_task_state(p),
-		task_tgid_nr_ns(p, ns),
-		task_numa_group_id(p),
-		pid_nr_ns(pid, ns),
-		ppid, tpid,
+		tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
 		from_kuid_munged(user_ns, cred->uid),
 		from_kuid_munged(user_ns, cred->euid),
 		from_kuid_munged(user_ns, cred->suid),
@@ -201,7 +202,6 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		from_kgid_munged(user_ns, cred->sgid),
 		from_kgid_munged(user_ns, cred->fsgid),
 		max_fds);
-	rcu_read_unlock();
 
 	group_info = cred->group_info;
 	for (g = 0; g < group_info->ngroups; g++)
-- 
cgit v1.2.3


From abdba6e9ea6d3903c2b0618db720e17b3c1c705c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 10 Dec 2014 15:45:18 -0800
Subject: proc: task_state: ptrace_parent() doesn't need pid_alive() check

p->ptrace != 0 means that release_task(p) was not called, so pid_alive()
buys nothing and we can remove this check.  Other callers already use it
directly without additional checks.

Note: with or without this patch ptrace_parent() can return the pointer to
the freed task, this will be explained/fixed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Cc: Sterling Alexander <stalexan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 800e30f8f284..bd117d065b82 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -157,19 +157,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
 	int g;
+	struct task_struct *tracer;
 	const struct cred *cred;
-	pid_t ppid, tpid, tgid, ngid;
+	pid_t ppid, tpid = 0, tgid, ngid;
 	unsigned int max_fds = 0;
 
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-	tpid = 0;
-	if (pid_alive(p)) {
-		struct task_struct *tracer = ptrace_parent(p);
-		if (tracer)
-			tpid = task_pid_nr_ns(tracer, ns);
-	}
+
+	tracer = ptrace_parent(p);
+	if (tracer)
+		tpid = task_pid_nr_ns(tracer, ns);
 
 	tgid = task_tgid_nr_ns(p, ns);
 	ngid = task_numa_group_id(p);
-- 
cgit v1.2.3


From c6cb898b54ce1705f9edfa4a13be57ad3cced4a1 Mon Sep 17 00:00:00 2001
From: Yann Droneaud
Date: Wed, 10 Dec 2014 15:45:42 -0800
Subject: binfmt_misc: replace get_unused_fd() with get_unused_fd_flags(0)

This patch replaces calls to get_unused_fd() with equivalent call to
get_unused_fd_flags(0) to preserve current behavor for existing code.

In a further patch, get_unused_fd() will be removed so that new code start
using get_unused_fd_flags(), with the hope O_CLOEXEC could be used, either
by default or choosen by userspace.

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index fd8beb9657a2..1cc5377ba955 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -153,7 +153,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		/* if the binary should be opened on behalf of the
 		 * interpreter than keep it open and assign descriptor
 		 * to it */
- 		fd_binary = get_unused_fd();
+		fd_binary = get_unused_fd_flags(0);
  		if (fd_binary < 0) {
  			retval = fd_binary;
  			goto _ret;
-- 
cgit v1.2.3


From 8d10a035829cacbd84bf35610870c35e566d1242 Mon Sep 17 00:00:00 2001
From: Yann Droneaud
Date: Wed, 10 Dec 2014 15:45:44 -0800
Subject: fs/file.c: replace get_unused_fd() with get_unused_fd_flags(0)

This patch replaces calls to get_unused_fd() with equivalent call to
get_unused_fd_flags(0) to preserve current behavor for existing code.

In a further patch, get_unused_fd() will be removed so that new code
start using get_unused_fd_flags(), with the hope O_CLOEXEC could be
used, either by default or choosen by userspace.

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index ab3eb6a88239..ee738ea028fa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -869,7 +869,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 	struct file *file = fget_raw(fildes);
 
 	if (file) {
-		ret = get_unused_fd();
+		ret = get_unused_fd_flags(0);
 		if (ret >= 0)
 			fd_install(ret, file);
 		else
-- 
cgit v1.2.3


From 6b899c4e9a049dfca759d990bd53b14f81c3626c Mon Sep 17 00:00:00 2001
From: Mike Frysinger
Date: Wed, 10 Dec 2014 15:52:08 -0800
Subject: binfmt_misc: add comments & debug logs

When trying to develop a custom format handler, the errors returned all
effectively get bucketed as EINVAL with no kernel messages.  The other
errors (ENOMEM/EFAULT) are internal/obvious and basic.  Thus any time a
bad handler is rejected, the developer has to walk the dense code and
try to guess where it went wrong.  Needing to dive into kernel code is
itself a fairly high barrier for a lot of people.

To improve this situation, let's deploy extensive pr_debug markers at
logical parse points, and add comments to the dense parsing logic.  It
let's you see exactly where the parsing aborts, the string the kernel
received (useful when dealing with shell code), how it translated the
buffers to binary data, and how it will apply the mask at runtime.

Some example output:
  $ echo ':qemu-foo:M::\x7fELF\xAD\xAD\x01\x00:\xff\xff\xff\xff\xff\x00\xff\x00:/usr/bin/qemu-foo:POC' > register
  $ dmesg
  binfmt_misc: register: received 92 bytes
  binfmt_misc: register: delim: 0x3a {:}
  binfmt_misc: register: name: {qemu-foo}
  binfmt_misc: register: type: M (magic)
  binfmt_misc: register: offset: 0x0
  binfmt_misc: register: magic[raw]: 5c 78 37 66 45 4c 46 5c 78 41 44 5c 78 41 44 5c  \x7fELF\xAD\xAD\
  binfmt_misc: register: magic[raw]: 78 30 31 5c 78 30 30 00                          x01\x00.
  binfmt_misc: register:  mask[raw]: 5c 78 66 66 5c 78 66 66 5c 78 66 66 5c 78 66 66  \xff\xff\xff\xff
  binfmt_misc: register:  mask[raw]: 5c 78 66 66 5c 78 30 30 5c 78 66 66 5c 78 30 30  \xff\x00\xff\x00
  binfmt_misc: register:  mask[raw]: 00                                               .
  binfmt_misc: register: magic/mask length: 8
  binfmt_misc: register: magic[decoded]: 7f 45 4c 46 ad ad 01 00                          .ELF....
  binfmt_misc: register:  mask[decoded]: ff ff ff ff ff 00 ff 00                          ........
  binfmt_misc: register:  magic[masked]: 7f 45 4c 46 ad 00 01 00                          .ELF....
  binfmt_misc: register: interpreter: {/usr/bin/qemu-foo}
  binfmt_misc: register: flag: P (preserve argv0)
  binfmt_misc: register: flag: O (open binary)
  binfmt_misc: register: flag: C (preserve creds)

The [raw] lines show us exactly what was received from userspace.  The
lines after that show us how the kernel has decoded things.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 121 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1cc5377ba955..d87ddc7c4b14 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -16,6 +16,8 @@
  *  2001-02-28 AV: rewritten into something that resembles C. Original didn't.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -30,8 +32,13 @@
 #include <linux/mount.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
+#ifdef DEBUG
+# define USE_DEBUG 1
+#else
+# define USE_DEBUG 0
+#endif
 
 enum {
 	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
@@ -87,20 +94,24 @@ static Node *check_file(struct linux_binprm *bprm)
 	char *p = strrchr(bprm->interp, '.');
 	struct list_head *l;
 
+	/* Walk all the registered handlers. */
 	list_for_each(l, &entries) {
 		Node *e = list_entry(l, Node, list);
 		char *s;
 		int j;
 
+		/* Make sure this one is currently enabled. */
 		if (!test_bit(Enabled, &e->flags))
 			continue;
 
+		/* Do matching based on extension if applicable. */
 		if (!test_bit(Magic, &e->flags)) {
 			if (p && !strcmp(e->magic, p + 1))
 				return e;
 			continue;
 		}
 
+		/* Do matching based on magic & mask. */
 		s = bprm->buf + e->offset;
 		if (e->mask) {
 			for (j = 0; j < e->size; j++)
@@ -259,14 +270,17 @@ static char * check_special_flags (char * sfs, Node * e)
 	while (cont) {
 		switch (*p) {
 			case 'P':
+				pr_debug("register: flag: P (preserve argv0)\n");
 				p++;
 				e->flags |= MISC_FMT_PRESERVE_ARGV0;
 				break;
 			case 'O':
+				pr_debug("register: flag: O (open binary)\n");
 				p++;
 				e->flags |= MISC_FMT_OPEN_BINARY;
 				break;
 			case 'C':
+				pr_debug("register: flag: C (preserve creds)\n");
 				p++;
 				/* this flags also implies the
 				   open-binary flag */
@@ -292,6 +306,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	char *buf, *p;
 	char del;
 
+	pr_debug("register: received %zu bytes\n", count);
+
 	/* some sanity checks */
 	err = -EINVAL;
 	if ((count < 11) || (count > MAX_REGISTER_LENGTH))
@@ -311,8 +327,12 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	del = *p++;	/* delimeter */
 
+	pr_debug("register: delim: %#x {%c}\n", del, del);
+
+	/* Pad the buffer with the delim to simplify parsing below. */
 	memset(buf+count, del, 8);
 
+	/* Parse the 'name' field. */
 	e->name = p;
 	p = strchr(p, del);
 	if (!p)
@@ -323,46 +343,113 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	    !strcmp(e->name, "..") ||
 	    strchr(e->name, '/'))
 		goto Einval;
+
+	pr_debug("register: name: {%s}\n", e->name);
+
+	/* Parse the 'type' field. */
 	switch (*p++) {
-		case 'E': e->flags = 1<<Enabled; break;
-		case 'M': e->flags = (1<<Enabled) | (1<<Magic); break;
-		default: goto Einval;
+	case 'E':
+		pr_debug("register: type: E (extension)\n");
+		e->flags = 1 << Enabled;
+		break;
+	case 'M':
+		pr_debug("register: type: M (magic)\n");
+		e->flags = (1 << Enabled) | (1 << Magic);
+		break;
+	default:
+		goto Einval;
 	}
 	if (*p++ != del)
 		goto Einval;
+
 	if (test_bit(Magic, &e->flags)) {
-		char *s = strchr(p, del);
+		/* Handle the 'M' (magic) format. */
+		char *s;
+
+		/* Parse the 'offset' field. */
+		s = strchr(p, del);
 		if (!s)
 			goto Einval;
 		*s++ = '\0';
 		e->offset = simple_strtoul(p, &p, 10);
 		if (*p++)
 			goto Einval;
+		pr_debug("register: offset: %#x\n", e->offset);
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = scanarg(p, del);
 		if (!p)
 			goto Einval;
 		p[-1] = '\0';
-		if (!e->magic[0])
+		if (p == e->magic)
 			goto Einval;
+		if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[raw]: ",
+				DUMP_PREFIX_NONE, e->magic, p - e->magic);
+
+		/* Parse the 'mask' field. */
 		e->mask = p;
 		p = scanarg(p, del);
 		if (!p)
 			goto Einval;
 		p[-1] = '\0';
-		if (!e->mask[0])
+		if (p == e->mask) {
 			e->mask = NULL;
+			pr_debug("register:  mask[raw]: none\n");
+		} else if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register:  mask[raw]: ",
+				DUMP_PREFIX_NONE, e->mask, p - e->mask);
+
+		/*
+		 * Decode the magic & mask fields.
+		 * Note: while we might have accepted embedded NUL bytes from
+		 * above, the unescape helpers here will stop at the first one
+		 * it encounters.
+		 */
 		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
 		if (e->mask &&
 		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
 			goto Einval;
 		if (e->size + e->offset > BINPRM_BUF_SIZE)
 			goto Einval;
+		pr_debug("register: magic/mask length: %i\n", e->size);
+		if (USE_DEBUG) {
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[decoded]: ",
+				DUMP_PREFIX_NONE, e->magic, e->size);
+
+			if (e->mask) {
+				int i;
+				char *masked = kmalloc(e->size, GFP_USER);
+
+				print_hex_dump_bytes(
+					KBUILD_MODNAME ": register:  mask[decoded]: ",
+					DUMP_PREFIX_NONE, e->mask, e->size);
+
+				if (masked) {
+					for (i = 0; i < e->size; ++i)
+						masked[i] = e->magic[i] & e->mask[i];
+					print_hex_dump_bytes(
+						KBUILD_MODNAME ": register:  magic[masked]: ",
+						DUMP_PREFIX_NONE, masked, e->size);
+
+					kfree(masked);
+				}
+			}
+		}
 	} else {
+		/* Handle the 'E' (extension) format. */
+
+		/* Skip the 'offset' field. */
 		p = strchr(p, del);
 		if (!p)
 			goto Einval;
 		*p++ = '\0';
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = strchr(p, del);
 		if (!p)
@@ -370,11 +457,16 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		*p++ = '\0';
 		if (!e->magic[0] || strchr(e->magic, '/'))
 			goto Einval;
+		pr_debug("register: extension: {%s}\n", e->magic);
+
+		/* Skip the 'mask' field. */
 		p = strchr(p, del);
 		if (!p)
 			goto Einval;
 		*p++ = '\0';
 	}
+
+	/* Parse the 'interpreter' field. */
 	e->interpreter = p;
 	p = strchr(p, del);
 	if (!p)
@@ -382,10 +474,10 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	*p++ = '\0';
 	if (!e->interpreter[0])
 		goto Einval;
+	pr_debug("register: interpreter: {%s}\n", e->interpreter);
 
-
+	/* Parse the 'flags' field. */
 	p = check_special_flags (p, e);
-
 	if (*p == '\n')
 		p++;
 	if (p != buf + count)
@@ -553,11 +645,17 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	int res = parse_command(buffer, count);
 
 	switch (res) {
-		case 1: clear_bit(Enabled, &e->flags);
+		case 1:
+			/* Disable this handler. */
+			clear_bit(Enabled, &e->flags);
 			break;
-		case 2: set_bit(Enabled, &e->flags);
+		case 2:
+			/* Enable this handler. */
+			set_bit(Enabled, &e->flags);
 			break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
+		case 3:
+			/* Delete this handler. */
+			root = dget(file->f_path.dentry->d_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
 
 			kill_node(e);
@@ -661,9 +759,17 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 	struct dentry *root;
 
 	switch (res) {
-		case 1: enabled = 0; break;
-		case 2: enabled = 1; break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
+		case 1:
+			/* Disable all handlers. */
+			enabled = 0;
+			break;
+		case 2:
+			/* Enable all handlers. */
+			enabled = 1;
+			break;
+		case 3:
+			/* Delete all handlers. */
+			root = dget(file->f_path.dentry->d_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
 
 			while (!list_empty(&entries))
-- 
cgit v1.2.3


From e6084d4a086b626acb5cf97795a9243062c2f4cd Mon Sep 17 00:00:00 2001
From: Mike Frysinger
Date: Wed, 10 Dec 2014 15:52:10 -0800
Subject: binfmt_misc: clean up code style a bit

Clean up various coding style issues that checkpatch complains about.
No functional changes here.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 293 +++++++++++++++++++++++++++----------------------------
 1 file changed, 145 insertions(+), 148 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index d87ddc7c4b14..01a2cd9ab2bf 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -1,19 +1,10 @@
 /*
- *  binfmt_misc.c
+ * binfmt_misc.c
  *
- *  Copyright (C) 1997 Richard Günther
+ * Copyright (C) 1997 Richard Günther
  *
- *  binfmt_misc detects binaries via a magic or filename extension and invokes
- *  a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and
- *  binfmt_mz.
- *
- *  1997-04-25 first version
- *  [...]
- *  1997-05-19 cleanup
- *  1997-06-26 hpa: pass the real filename rather than argv[0]
- *  1997-06-30 minor cleanup
- *  1997-08-09 removed extension stripping, locking cleanup
- *  2001-02-28 AV: rewritten into something that resembles C. Original didn't.
+ * binfmt_misc detects binaries via a magic or filename extension and invokes
+ * a specified wrapper. See Documentation/binfmt_misc.txt for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -48,9 +39,9 @@ static LIST_HEAD(entries);
 static int enabled = 1;
 
 enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1<<31)
-#define MISC_FMT_OPEN_BINARY (1<<30)
-#define MISC_FMT_CREDENTIALS (1<<29)
+#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
+#define MISC_FMT_OPEN_BINARY (1 << 30)
+#define MISC_FMT_CREDENTIALS (1 << 29)
 
 typedef struct {
 	struct list_head list;
@@ -134,7 +125,7 @@ static Node *check_file(struct linux_binprm *bprm)
 static int load_misc_binary(struct linux_binprm *bprm)
 {
 	Node *fmt;
-	struct file * interp_file = NULL;
+	struct file *interp_file = NULL;
 	char iname[BINPRM_BUF_SIZE];
 	const char *iname_addr = iname;
 	int retval;
@@ -142,7 +133,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 
 	retval = -ENOEXEC;
 	if (!enabled)
-		goto _ret;
+		goto ret;
 
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
@@ -151,25 +142,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
 	read_unlock(&entries_lock);
 	if (!fmt)
-		goto _ret;
+		goto ret;
 
 	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
 		retval = remove_arg_zero(bprm);
 		if (retval)
-			goto _ret;
+			goto ret;
 	}
 
 	if (fmt->flags & MISC_FMT_OPEN_BINARY) {
 
 		/* if the binary should be opened on behalf of the
 		 * interpreter than keep it open and assign descriptor
-		 * to it */
+		 * to it
+		 */
 		fd_binary = get_unused_fd_flags(0);
- 		if (fd_binary < 0) {
- 			retval = fd_binary;
- 			goto _ret;
- 		}
- 		fd_install(fd_binary, bprm->file);
+		if (fd_binary < 0) {
+			retval = fd_binary;
+			goto ret;
+		}
+		fd_install(fd_binary, bprm->file);
 
 		/* if the binary is not readable than enforce mm->dumpable=0
 		   regardless of the interpreter's permissions */
@@ -182,32 +174,32 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
 		bprm->interp_data = fd_binary;
 
- 	} else {
- 		allow_write_access(bprm->file);
- 		fput(bprm->file);
- 		bprm->file = NULL;
- 	}
+	} else {
+		allow_write_access(bprm->file);
+		fput(bprm->file);
+		bprm->file = NULL;
+	}
 	/* make argv[1] be the path to the binary */
-	retval = copy_strings_kernel (1, &bprm->interp, bprm);
+	retval = copy_strings_kernel(1, &bprm->interp, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 	bprm->argc++;
 
 	/* add the interp as argv[0] */
-	retval = copy_strings_kernel (1, &iname_addr, bprm);
+	retval = copy_strings_kernel(1, &iname_addr, bprm);
 	if (retval < 0)
-		goto _error;
-	bprm->argc ++;
+		goto error;
+	bprm->argc++;
 
 	/* Update interp in case binfmt_script needs it. */
 	retval = bprm_change_interp(iname, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-	interp_file = open_exec (iname);
-	retval = PTR_ERR (interp_file);
-	if (IS_ERR (interp_file))
-		goto _error;
+	interp_file = open_exec(iname);
+	retval = PTR_ERR(interp_file);
+	if (IS_ERR(interp_file))
+		goto error;
 
 	bprm->file = interp_file;
 	if (fmt->flags & MISC_FMT_CREDENTIALS) {
@@ -218,23 +210,23 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		memset(bprm->buf, 0, BINPRM_BUF_SIZE);
 		retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
 	} else
-		retval = prepare_binprm (bprm);
+		retval = prepare_binprm(bprm);
 
 	if (retval < 0)
-		goto _error;
+		goto error;
 
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-_ret:
+ret:
 	return retval;
-_error:
+error:
 	if (fd_binary > 0)
 		sys_close(fd_binary);
 	bprm->interp_flags = 0;
 	bprm->interp_data = 0;
-	goto _ret;
+	goto ret;
 }
 
 /* Command parsers */
@@ -261,39 +253,40 @@ static char *scanarg(char *s, char del)
 	return s;
 }
 
-static char * check_special_flags (char * sfs, Node * e)
+static char *check_special_flags(char *sfs, Node *e)
 {
-	char * p = sfs;
+	char *p = sfs;
 	int cont = 1;
 
 	/* special flags */
 	while (cont) {
 		switch (*p) {
-			case 'P':
-				pr_debug("register: flag: P (preserve argv0)\n");
-				p++;
-				e->flags |= MISC_FMT_PRESERVE_ARGV0;
-				break;
-			case 'O':
-				pr_debug("register: flag: O (open binary)\n");
-				p++;
-				e->flags |= MISC_FMT_OPEN_BINARY;
-				break;
-			case 'C':
-				pr_debug("register: flag: C (preserve creds)\n");
-				p++;
-				/* this flags also implies the
-				   open-binary flag */
-				e->flags |= (MISC_FMT_CREDENTIALS |
-						MISC_FMT_OPEN_BINARY);
-				break;
-			default:
-				cont = 0;
+		case 'P':
+			pr_debug("register: flag: P (preserve argv0)\n");
+			p++;
+			e->flags |= MISC_FMT_PRESERVE_ARGV0;
+			break;
+		case 'O':
+			pr_debug("register: flag: O (open binary)\n");
+			p++;
+			e->flags |= MISC_FMT_OPEN_BINARY;
+			break;
+		case 'C':
+			pr_debug("register: flag: C (preserve creds)\n");
+			p++;
+			/* this flags also implies the
+			   open-binary flag */
+			e->flags |= (MISC_FMT_CREDENTIALS |
+					MISC_FMT_OPEN_BINARY);
+			break;
+		default:
+			cont = 0;
 		}
 	}
 
 	return p;
 }
+
 /*
  * This registers a new binary format, it recognises the syntax
  * ':name:type:offset:magic:mask:interpreter:flags'
@@ -323,26 +316,26 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	memset(e, 0, sizeof(Node));
 	if (copy_from_user(buf, buffer, count))
-		goto Efault;
+		goto efault;
 
 	del = *p++;	/* delimeter */
 
 	pr_debug("register: delim: %#x {%c}\n", del, del);
 
 	/* Pad the buffer with the delim to simplify parsing below. */
-	memset(buf+count, del, 8);
+	memset(buf + count, del, 8);
 
 	/* Parse the 'name' field. */
 	e->name = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->name[0] ||
 	    !strcmp(e->name, ".") ||
 	    !strcmp(e->name, "..") ||
 	    strchr(e->name, '/'))
-		goto Einval;
+		goto einval;
 
 	pr_debug("register: name: {%s}\n", e->name);
 
@@ -357,10 +350,10 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		e->flags = (1 << Enabled) | (1 << Magic);
 		break;
 	default:
-		goto Einval;
+		goto einval;
 	}
 	if (*p++ != del)
-		goto Einval;
+		goto einval;
 
 	if (test_bit(Magic, &e->flags)) {
 		/* Handle the 'M' (magic) format. */
@@ -369,21 +362,21 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		/* Parse the 'offset' field. */
 		s = strchr(p, del);
 		if (!s)
-			goto Einval;
+			goto einval;
 		*s++ = '\0';
 		e->offset = simple_strtoul(p, &p, 10);
 		if (*p++)
-			goto Einval;
+			goto einval;
 		pr_debug("register: offset: %#x\n", e->offset);
 
 		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
 		if (p == e->magic)
-			goto Einval;
+			goto einval;
 		if (USE_DEBUG)
 			print_hex_dump_bytes(
 				KBUILD_MODNAME ": register: magic[raw]: ",
@@ -393,7 +386,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		e->mask = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
 		if (p == e->mask) {
 			e->mask = NULL;
@@ -412,9 +405,9 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
 		if (e->mask &&
 		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
-			goto Einval;
+			goto einval;
 		if (e->size + e->offset > BINPRM_BUF_SIZE)
-			goto Einval;
+			goto einval;
 		pr_debug("register: magic/mask length: %i\n", e->size);
 		if (USE_DEBUG) {
 			print_hex_dump_bytes(
@@ -446,23 +439,23 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		/* Skip the 'offset' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 
 		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 		if (!e->magic[0] || strchr(e->magic, '/'))
-			goto Einval;
+			goto einval;
 		pr_debug("register: extension: {%s}\n", e->magic);
 
 		/* Skip the 'mask' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 	}
 
@@ -470,27 +463,28 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	e->interpreter = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->interpreter[0])
-		goto Einval;
+		goto einval;
 	pr_debug("register: interpreter: {%s}\n", e->interpreter);
 
 	/* Parse the 'flags' field. */
-	p = check_special_flags (p, e);
+	p = check_special_flags(p, e);
 	if (*p == '\n')
 		p++;
 	if (p != buf + count)
-		goto Einval;
+		goto einval;
+
 	return e;
 
 out:
 	return ERR_PTR(err);
 
-Efault:
+efault:
 	kfree(e);
 	return ERR_PTR(-EFAULT);
-Einval:
+einval:
 	kfree(e);
 	return ERR_PTR(-EINVAL);
 }
@@ -509,7 +503,7 @@ static int parse_command(const char __user *buffer, size_t count)
 		return -EFAULT;
 	if (!count)
 		return 0;
-	if (s[count-1] == '\n')
+	if (s[count - 1] == '\n')
 		count--;
 	if (count == 1 && s[0] == '0')
 		return 1;
@@ -526,7 +520,7 @@ static void entry_status(Node *e, char *page)
 {
 	char *dp;
 	char *status = "disabled";
-	const char * flags = "flags: ";
+	const char *flags = "flags: ";
 
 	if (test_bit(Enabled, &e->flags))
 		status = "enabled";
@@ -540,19 +534,15 @@ static void entry_status(Node *e, char *page)
 	dp = page + strlen(page);
 
 	/* print the special flags */
-	sprintf (dp, "%s", flags);
-	dp += strlen (flags);
-	if (e->flags & MISC_FMT_PRESERVE_ARGV0) {
-		*dp ++ = 'P';
-	}
-	if (e->flags & MISC_FMT_OPEN_BINARY) {
-		*dp ++ = 'O';
-	}
-	if (e->flags & MISC_FMT_CREDENTIALS) {
-		*dp ++ = 'C';
-	}
-	*dp ++ = '\n';
-
+	sprintf(dp, "%s", flags);
+	dp += strlen(flags);
+	if (e->flags & MISC_FMT_PRESERVE_ARGV0)
+		*dp++ = 'P';
+	if (e->flags & MISC_FMT_OPEN_BINARY)
+		*dp++ = 'O';
+	if (e->flags & MISC_FMT_CREDENTIALS)
+		*dp++ = 'C';
+	*dp++ = '\n';
 
 	if (!test_bit(Magic, &e->flags)) {
 		sprintf(dp, "extension .%s\n", e->magic);
@@ -580,7 +570,7 @@ static void entry_status(Node *e, char *page)
 
 static struct inode *bm_get_inode(struct super_block *sb, int mode)
 {
-	struct inode * inode = new_inode(sb);
+	struct inode *inode = new_inode(sb);
 
 	if (inode) {
 		inode->i_ino = get_next_ino();
@@ -620,13 +610,14 @@ static void kill_node(Node *e)
 /* /<entry> */
 
 static ssize_t
-bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
+bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	Node *e = file_inode(file)->i_private;
 	ssize_t res;
 	char *page;
 
-	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+	page = (char *) __get_free_page(GFP_KERNEL);
+	if (!page)
 		return -ENOMEM;
 
 	entry_status(e, page);
@@ -645,26 +636,28 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	int res = parse_command(buffer, count);
 
 	switch (res) {
-		case 1:
-			/* Disable this handler. */
-			clear_bit(Enabled, &e->flags);
-			break;
-		case 2:
-			/* Enable this handler. */
-			set_bit(Enabled, &e->flags);
-			break;
-		case 3:
-			/* Delete this handler. */
-			root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
+	case 1:
+		/* Disable this handler. */
+		clear_bit(Enabled, &e->flags);
+		break;
+	case 2:
+		/* Enable this handler. */
+		set_bit(Enabled, &e->flags);
+		break;
+	case 3:
+		/* Delete this handler. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
 
-			kill_node(e);
+		kill_node(e);
 
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -752,34 +745,36 @@ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 }
 
-static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+static ssize_t bm_status_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	int res = parse_command(buffer, count);
 	struct dentry *root;
 
 	switch (res) {
-		case 1:
-			/* Disable all handlers. */
-			enabled = 0;
-			break;
-		case 2:
-			/* Enable all handlers. */
-			enabled = 1;
-			break;
-		case 3:
-			/* Delete all handlers. */
-			root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
+	case 1:
+		/* Disable all handlers. */
+		enabled = 0;
+		break;
+	case 2:
+		/* Enable all handlers. */
+		enabled = 1;
+		break;
+	case 3:
+		/* Delete all handlers. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
 
-			while (!list_empty(&entries))
-				kill_node(list_entry(entries.next, Node, list));
+		while (!list_empty(&entries))
+			kill_node(list_entry(entries.next, Node, list));
 
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -796,14 +791,16 @@ static const struct super_operations s_ops = {
 	.evict_inode	= bm_evict_inode,
 };
 
-static int bm_fill_super(struct super_block * sb, void * data, int silent)
+static int bm_fill_super(struct super_block *sb, void *data, int silent)
 {
+	int err;
 	static struct tree_descr bm_files[] = {
 		[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
 		[3] = {"register", &bm_register_operations, S_IWUSR},
 		/* last one */ {""}
 	};
-	int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
+
+	err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
 	if (!err)
 		sb->s_op = &s_ops;
 	return err;
-- 
cgit v1.2.3


From f7e1ad1a1e23af97419cd8d5adff67fedf7cf169 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Wed, 10 Dec 2014 15:52:13 -0800
Subject: fs/binfmt_misc.c: use GFP_KERNEL instead of GFP_USER

GFP_USER means "honour cpuset nodes-allowed beancounting".  These are
regular old kernel objects and there seems no reason to give them this
treatment.

Acked-by: Mike Frysinger <vapier@gentoo.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 01a2cd9ab2bf..70789e198dea 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -308,7 +308,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	err = -ENOMEM;
 	memsize = sizeof(Node) + count + 8;
-	e = kmalloc(memsize, GFP_USER);
+	e = kmalloc(memsize, GFP_KERNEL);
 	if (!e)
 		goto out;
 
@@ -416,7 +416,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 			if (e->mask) {
 				int i;
-				char *masked = kmalloc(e->size, GFP_USER);
+				char *masked = kmalloc(e->size, GFP_KERNEL);
 
 				print_hex_dump_bytes(
 					KBUILD_MODNAME ": register:  mask[decoded]: ",
-- 
cgit v1.2.3


From 52f5592e549c013feb9bb71cab3e6fd624633577 Mon Sep 17 00:00:00 2001
From: Jungseung Lee
Date: Wed, 10 Dec 2014 15:52:16 -0800
Subject: fs/binfmt_elf.c: fix internal inconsistency relating to vma dump size

vma_dump_size() has been used several times on actual dumper and it is
supposed to return the same value for the same vma.  But vma_dump_size()
could return different values for same vma.

The known problem case is concurrent shared memory removal.  If a vma is
used for a shared memory and that shared memory is removed between
writing program header and dumping vma memory, this will result in a
dump file which is internally consistent.

To fix the problem, we set baseline to get dump size and store the size
into vma_filesz and always use the same vma dump size which is stored in
vma_filsz.  The consistnecy with reality is not actually guranteed, but
it's tolerable since that is fully consistent with base line.

Signed-off-by: Jungseung Lee <js07.lee@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d8fc0605b9d2..3a6175fe10c0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1994,18 +1994,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 	shdr4extnum->sh_info = segs;
 }
 
-static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
-				     unsigned long mm_flags)
-{
-	struct vm_area_struct *vma;
-	size_t size = 0;
-
-	for (vma = first_vma(current, gate_vma); vma != NULL;
-	     vma = next_vma(vma, gate_vma))
-		size += vma_dump_size(vma, mm_flags);
-	return size;
-}
-
 /*
  * Actual dumper
  *
@@ -2017,7 +2005,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
-	int segs;
+	int segs, i;
+	size_t vma_data_size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff;
@@ -2026,6 +2015,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
+	elf_addr_t *vma_filesz = NULL;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -2093,7 +2083,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
+	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
+	if (!vma_filesz)
+		goto end_coredump;
+
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+			vma = next_vma(vma, gate_vma)) {
+		unsigned long dump_size;
+
+		dump_size = vma_dump_size(vma, cprm->mm_flags);
+		vma_filesz[i++] = dump_size;
+		vma_data_size += dump_size;
+	}
+
+	offset += vma_data_size;
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -2113,7 +2116,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		struct elf_phdr phdr;
 
@@ -2121,7 +2124,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
+		phdr.p_filesz = vma_filesz[i++];
 		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2149,12 +2152,12 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
 		unsigned long end;
 
-		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
+		end = vma->vm_start + vma_filesz[i++];
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
@@ -2187,6 +2190,7 @@ end_coredump:
 cleanup:
 	free_note_info(&info);
 	kfree(shdr4extnum);
+	kfree(vma_filesz);
 	kfree(phdr4note);
 	kfree(elf);
 out:
-- 
cgit v1.2.3


From a682e9c28cac152e6e54c39efcf046e0c8cfcf63 Mon Sep 17 00:00:00 2001
From: Jan Kara
Date: Wed, 10 Dec 2014 15:52:22 -0800
Subject: ncpfs: return proper error from NCP_IOC_SETROOT ioctl

If some error happens in NCP_IOC_SETROOT ioctl, the appropriate error
return value is then (in most cases) just overwritten before we return.
This can result in reporting success to userspace although error happened.

This bug was introduced by commit 2e54eb96e2c8 ("BKL: Remove BKL from
ncpfs").  Propagate the errors correctly.

Coverity id: 1226925.

Fixes: 2e54eb96e2c80 ("BKL: Remove BKL from ncpfs")
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Petr Vandrovec <petr@vandrovec.name>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ncpfs/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index d5659d96ee7f..cf7e043a9447 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -447,7 +447,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
 						result = -EIO;
 					}
 				}
-				result = 0;
 			}
 			mutex_unlock(&server->root_setup_lock);
 
-- 
cgit v1.2.3


From 75dc857c46996e336cb2f9cf0ed1e5508fb241a7 Mon Sep 17 00:00:00 2001
From: Andreas Rohner
Date: Wed, 10 Dec 2014 15:54:29 -0800
Subject: nilfs2: avoid duplicate segment construction for fsync()

This patch removes filemap_write_and_wait_range() from nilfs_sync_file(),
because it triggers a data segment construction by calling
nilfs_writepages() with WB_SYNC_ALL.  A data segment construction does not
remove the inode from the i_dirty list and it does not clear the
NILFS_I_DIRTY flag.  Therefore nilfs_inode_dirty() still returns true,
which leads to an unnecessary duplicate segment construction in
nilfs_sync_file().

A call to filemap_write_and_wait_range() is not needed, because NILFS2
does not rely on the generic writeback mechanisms.  Instead it implements
its own mechanism to collect all dirty pages and write them into segments.
 It is more efficient to initiate the segment construction directly in
nilfs_sync_file() without the detour over filemap_write_and_wait_range().

Additionally the lock of i_mutex is not needed, because all code blocks
that are protected by i_mutex are also protected by a NILFS transaction:

  Function                i_mutex     nilfs_transaction
  ------------------------------------------------------
  nilfs_ioctl_setflags:   yes         yes
  nilfs_fiemap:           yes         no
  nilfs_write_begin:      yes         yes
  nilfs_write_end:        yes         yes
  nilfs_lookup:           yes         no
  nilfs_create:           yes         yes
  nilfs_link:             yes         yes
  nilfs_mknod:            yes         yes
  nilfs_symlink:          yes         yes
  nilfs_mkdir:            yes         yes
  nilfs_unlink:           yes         yes
  nilfs_rmdir:            yes         yes
  nilfs_rename:           yes         yes
  nilfs_setattr:          yes         yes

For nilfs_lookup() i_mutex is held for the parent directory, to protect it
from modification.  The segment construction does not modify directory
inodes, so no lock is needed.

nilfs_fiemap() reads the block layout on the disk, by using
nilfs_bmap_lookup_contig(). This is already protected by bmap->b_sem.

Signed-off-by: Andreas Rohner <andreas.rohner@gmx.net>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/file.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index e9e3325f29f3..3a03e0aea1fb 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -39,21 +39,15 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 */
 	struct the_nilfs *nilfs;
 	struct inode *inode = file->f_mapping->host;
-	int err;
-
-	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (err)
-		return err;
-	mutex_lock(&inode->i_mutex);
+	int err = 0;
 
 	if (nilfs_inode_dirty(inode)) {
 		if (datasync)
 			err = nilfs_construct_dsync_segment(inode->i_sb, inode,
-							    0, LLONG_MAX);
+							    start, end);
 		else
 			err = nilfs_construct_segment(inode->i_sb);
 	}
-	mutex_unlock(&inode->i_mutex);
 
 	nilfs = inode->i_sb->s_fs_info;
 	if (!err)
-- 
cgit v1.2.3


From 72b9918ea4d7f2d8362a2defdb93e5fd25a86308 Mon Sep 17 00:00:00 2001
From: Markus Elfring
Date: Wed, 10 Dec 2014 15:54:31 -0800
Subject: nilfs2: deletion of an unnecessary check before the function call
 "iput"

The iput() function tests whether its argument is NULL and then returns
immediately.  Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/the_nilfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 9da25fe9ea61..69bd801afb53 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -808,8 +808,7 @@ void nilfs_put_root(struct nilfs_root *root)
 		spin_lock(&nilfs->ns_cptree_lock);
 		rb_erase(&root->rb_node, &nilfs->ns_cptree);
 		spin_unlock(&nilfs->ns_cptree_lock);
-		if (root->ifile)
-			iput(root->ifile);
+		iput(root->ifile);
 
 		kfree(root);
 	}
-- 
cgit v1.2.3


From 705304a863cc41585508c0f476f6d3ec28cf7e00 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi
Date: Wed, 10 Dec 2014 15:54:34 -0800
Subject: nilfs2: fix the nilfs_iget() vs. nilfs_new_inode() races

Same story as in commit 41080b5a2401 ("nfsd race fixes: ext2") (similar
ext2 fix) except that nilfs2 needs to use insert_inode_locked4() instead
of insert_inode_locked() and a bug of a check for dead inodes needs to
be fixed.

If nilfs_iget() is called from nfsd after nilfs_new_inode() calls
insert_inode_locked4(), nilfs_iget() will wait for unlock_new_inode() at
the end of nilfs_mkdir()/nilfs_create()/etc to unlock the inode.

If nilfs_iget() is called before nilfs_new_inode() calls
insert_inode_locked4(), it will create an in-core inode and read its
data from the on-disk inode.  But, nilfs_iget() will find i_nlink equals
zero and fail at nilfs_read_inode_common(), which will lead it to call
iget_failed() and cleanly fail.

However, this sanity check doesn't work as expected for reused on-disk
inodes because they leave a non-zero value in i_mode field and it
hinders the test of i_nlink.  This patch also fixes the issue by
removing the test on i_mode that nilfs2 doesn't need.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/inode.c | 32 ++++++++++++++++++++++++--------
 fs/nilfs2/namei.c | 15 ++++++++++++---
 2 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index e1fa69b341b9..8b5969538f39 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -49,6 +49,8 @@ struct nilfs_iget_args {
 	int for_gc;
 };
 
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
 void nilfs_inode_add_blocks(struct inode *inode, int n)
 {
 	struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -348,6 +350,17 @@ const struct address_space_operations nilfs_aops = {
 	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
+static int nilfs_insert_inode_locked(struct inode *inode,
+				     struct nilfs_root *root,
+				     unsigned long ino)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = root, .cno = 0, .for_gc = 0
+	};
+
+	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 {
 	struct super_block *sb = dir->i_sb;
@@ -383,7 +396,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		err = nilfs_bmap_read(ii->i_bmap, NULL);
 		if (err < 0)
-			goto failed_bmap;
+			goto failed_after_creation;
 
 		set_bit(NILFS_I_BMAP, &ii->i_state);
 		/* No lock is needed; iget() ensures it. */
@@ -399,21 +412,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	spin_lock(&nilfs->ns_next_gen_lock);
 	inode->i_generation = nilfs->ns_next_generation++;
 	spin_unlock(&nilfs->ns_next_gen_lock);
-	insert_inode_hash(inode);
+	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+		err = -EIO;
+		goto failed_after_creation;
+	}
 
 	err = nilfs_init_acl(inode, dir);
 	if (unlikely(err))
-		goto failed_acl; /* never occur. When supporting
+		goto failed_after_creation; /* never occur. When supporting
 				    nilfs_init_acl(), proper cancellation of
 				    above jobs should be considered */
 
 	return inode;
 
- failed_acl:
- failed_bmap:
+ failed_after_creation:
 	clear_nlink(inode);
+	unlock_new_inode(inode);
 	iput(inode);  /* raw_inode will be deleted through
-			 generic_delete_inode() */
+			 nilfs_evict_inode() */
 	goto failed;
 
  failed_ifile_create_inode:
@@ -461,8 +477,8 @@ int nilfs_read_inode_common(struct inode *inode,
 	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
 	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-	if (inode->i_nlink == 0 && inode->i_mode == 0)
-		return -EINVAL; /* this inode is deleted */
+	if (inode->i_nlink == 0)
+		return -ESTALE; /* this inode is deleted */
 
 	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
 	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9de78f08989e..0f84b257932c 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 	int err = nilfs_add_link(dentry, inode);
 	if (!err) {
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -182,6 +184,7 @@ out:
 out_fail:
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	goto out;
 }
@@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	inode_inc_link_count(inode);
 	ihold(inode);
 
-	err = nilfs_add_nondir(dentry, inode);
-	if (!err)
+	err = nilfs_add_link(dentry, inode);
+	if (!err) {
+		d_instantiate(dentry, inode);
 		err = nilfs_transaction_commit(dir->i_sb);
-	else
+	} else {
+		inode_dec_link_count(inode);
+		iput(inode);
 		nilfs_transaction_abort(dir->i_sb);
+	}
 
 	return err;
 }
@@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	nilfs_mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out:
 	if (!err)
 		err = nilfs_transaction_commit(dir->i_sb);
@@ -255,6 +263,7 @@ out_fail:
 	drop_nlink(inode);
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 out_dir:
 	drop_nlink(dir);
-- 
cgit v1.2.3


From ddbc22e27e672b6b180757ea1d7f8481dbb88128 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes
Date: Wed, 10 Dec 2014 15:54:37 -0800
Subject: fs/hfs/catalog.c: fix comparison bug in hfs_cat_keycmp

Relying on the sign (after casting to int) of the difference of two
quantities for comparison is usually wrong.  For example, should a-b
turn out to be 2^31, the return value of cmp(a,b) is -2^31; but that
would also be the return value from cmp(b, a).  So a compares less than
b and b compares less than a.  One can also easily find three values
a,b,c such that a compares less than b, b compares less than c, but a
does not compare less than c.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfs/catalog.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index ff0316b925a5..db458ee3a546 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -162,14 +162,16 @@ err2:
  */
 int hfs_cat_keycmp(const btree_key *key1, const btree_key *key2)
 {
-	int retval;
+	__be32 k1p, k2p;
 
-	retval = be32_to_cpu(key1->cat.ParID) - be32_to_cpu(key2->cat.ParID);
-	if (!retval)
-		retval = hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
-				    key2->cat.CName.name, key2->cat.CName.len);
+	k1p = key1->cat.ParID;
+	k2p = key2->cat.ParID;
 
-	return retval;
+	if (k1p != k2p)
+		return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
+
+	return hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
+			  key2->cat.CName.name, key2->cat.CName.len);
 }
 
 /* Try to get a catalog entry for given catalog id */
-- 
cgit v1.2.3


From c35a7f18a0b237261dad57c9abd3adfa73f315e1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 10 Dec 2014 15:54:56 -0800
Subject: exit: proc: don't try to flush /proc/tgid/task/tgid

proc_flush_task_mnt() always tries to flush task/pid, but this is
pointless if we reap the leader. d_invalidate() is recursive, and
if nothing else the next d_hash_and_lookup(tgid) should fail anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sterling Alexander <stalexan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 772efa45a452..e7b04a321cc1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2618,6 +2618,9 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		dput(dentry);
 	}
 
+	if (pid == tgid)
+		return;
+
 	name.name = buf;
 	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);
-- 
cgit v1.2.3