From e4f3ec063421bdbcb93330e72aa3eeedb6a0d85a Mon Sep 17 00:00:00 2001 From: Paul Collins Date: Sat, 14 Jun 2008 14:14:59 +1200 Subject: udf: restore UDFFS_DEBUG to being undefined by default Commit 706047a79725b585cf272fdefc234b31b6545c72, "udf: Fix compilation warnings when UDF debug is on" inadvertently (I assume) enabled debugging messages by default for UDF. This patch disables them again. Signed-off-by: Paul Collins Signed-off-by: Jan Kara --- fs/udf/udfdecl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8fa9c2d70911..8ec865de5f13 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -16,7 +16,7 @@ #define UDF_PREALLOCATE #define UDF_DEFAULT_PREALLOC_BLOCKS 8 -#define UDFFS_DEBUG +#undef UDFFS_DEBUG #ifdef UDFFS_DEBUG #define udf_debug(f, a...) \ -- cgit v1.2.3 From 702773b16e83fcddc41e0019b8214d3c3cecedbe Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:11:54 +0100 Subject: Include in fs/exec.c only for Alpha. We only need it for the /sbin/loader hack for OSF/1 executables, and we don't want to include it otherwise. While we're at it, remove the redundant '&& CONFIG_ARCH_SUPPORTS_AOUT' in the ifdef around that code. It's already dependent on __alpha__, and CONFIG_ARCH_SUPPORTS_AOUT is hard-coded to 'y' there. Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 9448f1b50b4a..da94a6f05df3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +60,11 @@ #include #endif +#ifdef __alpha__ +/* for /sbin/loader handling in search_binary_handler() */ +#include +#endif + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -1155,7 +1159,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval; struct linux_binfmt *fmt; -#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) +#ifdef __alpha__ /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; -- cgit v1.2.3 From a9e0f5293d4999f93b469af4e70382db800a8204 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:18:13 +0100 Subject: Remove last traces of a.out support from ELF loader. In commit d20894a23708c2af75966534f8e4dedb46d48db2 ("Remove a.out interpreter support in ELF loader"), Andi removed support for a.out interpreters from the ELF loader, which was only ever needed for the transition from a.out to ELF. This removes the last traces of that support, in particular the inclusion of . Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0fa95b198e6e..d48ff5f370f4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -548,7 +547,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; - struct exec interp_ex; } *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL); @@ -680,7 +678,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } /* Get the exec headers */ - loc->interp_ex = *((struct exec *)bprm->buf); loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } -- cgit v1.2.3 From 3878f110f71a0971ff7acc15dd6db711b6ef37c6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:30:49 -0700 Subject: ocfs2: Move the hb_ctl_path sysctl into the stack glue. ocfs2 needs to call out to the hb_ctl program at unmount for all cluster stacks. The first step is to move the hb_ctl_path sysctl out of the o2cb code and into the generic stack glue. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/nodemanager.c | 74 +----------------------------------- fs/ocfs2/cluster/nodemanager.h | 4 -- fs/ocfs2/stack_o2cb.c | 2 +- fs/ocfs2/stackglue.c | 85 ++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/stackglue.h | 2 + 5 files changed, 89 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cf9401e8cd0b..cfdb08b484ed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -21,7 +21,6 @@ #include #include -#include #include #include "tcp.h" @@ -36,65 +35,6 @@ * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - -static ctl_table ocfs2_nm_table[] = { - { - .ctl_name = 1, - .procname = "hb_ctl_path", - .data = ocfs2_hb_ctl_path, - .maxlen = OCFS2_MAX_HB_CTL_PATH, - .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, - }, - { .ctl_name = 0 } -}; - -static ctl_table ocfs2_mod_table[] = { - { - .ctl_name = FS_OCFS2_NM, - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_kern_table[] = { - { - .ctl_name = FS_OCFS2, - .procname = "ocfs2", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_mod_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_root_table[] = { - { - .ctl_name = CTL_FS, - .procname = "fs", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_kern_table - }, - { .ctl_name = 0 } -}; - -static struct ctl_table_header *ocfs2_table_header = NULL; - -const char *o2nm_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { @@ -941,9 +881,6 @@ void o2nm_undepend_this_node(void) static void __exit exit_o2nm(void) { - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); - /* XXX sync with hb callbacks and shut down hb? */ o2net_unregister_hb_callbacks(); configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); @@ -964,16 +901,9 @@ static int __init init_o2nm(void) if (ret) goto out; - ocfs2_table_header = register_sysctl_table(ocfs2_root_table); - if (!ocfs2_table_header) { - printk(KERN_ERR "nodemanager: unable to register sysctl\n"); - ret = -ENOMEM; /* or something. */ - goto out_o2net; - } - ret = o2net_register_hb_callbacks(); if (ret) - goto out_sysctl; + goto out_o2net; config_group_init(&o2nm_cluster_group.cs_subsys.su_group); mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); @@ -990,8 +920,6 @@ static int __init init_o2nm(void) configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); out_callbacks: o2net_unregister_hb_callbacks(); -out_sysctl: - unregister_sysctl_table(ocfs2_table_header); out_o2net: o2net_exit(); out: diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 7c860361b8dd..c992ea0da4ad 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -33,10 +33,6 @@ #include #include -#define FS_OCFS2_NM 1 - -const char *o2nm_get_hb_ctl_path(void); - struct o2nm_node { spinlock_t nd_lock; struct config_item nd_item; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index bbd1667aa7d3..fb26a7c69c47 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -338,7 +338,7 @@ static void o2hb_stop(const char *group) int ret; char *argv[5], *envp[3]; - argv[0] = (char *)o2nm_get_hb_ctl_path(); + argv[0] = (char *)ocfs2_get_hb_ctl_path(); argv[1] = "-K"; argv[2] = "-u"; argv[3] = (char *)group; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 119f60cea9cc..fb9b8e0db260 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ocfs2_fs.h" @@ -548,10 +549,92 @@ error: return ret; } +/* + * Sysctl bits + * + * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't + * make as much sense in a multiple cluster stack world, but it's safer + * and easier to preserve the name. + */ + +#define FS_OCFS2_NM 1 + +#define OCFS2_MAX_HB_CTL_PATH 256 +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; + +static ctl_table ocfs2_nm_table[] = { + { + .ctl_name = 1, + .procname = "hb_ctl_path", + .data = ocfs2_hb_ctl_path, + .maxlen = OCFS2_MAX_HB_CTL_PATH, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, + { .ctl_name = 0 } +}; + +static ctl_table ocfs2_mod_table[] = { + { + .ctl_name = FS_OCFS2_NM, + .procname = "nm", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_nm_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_kern_table[] = { + { + .ctl_name = FS_OCFS2, + .procname = "ocfs2", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_mod_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_root_table[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_kern_table + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *ocfs2_table_header = NULL; + +const char *ocfs2_get_hb_ctl_path(void) +{ + return ocfs2_hb_ctl_path; +} +EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); + + +/* + * Initialization + */ + static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); + ocfs2_table_header = register_sysctl_table(ocfs2_root_table); + if (!ocfs2_table_header) { + printk(KERN_ERR + "ocfs2 stack glue: unable to register sysctl\n"); + return -ENOMEM; /* or something. */ + } + return ocfs2_sysfs_init(); } @@ -559,6 +642,8 @@ static void __exit ocfs2_stack_glue_exit(void) { lproto = NULL; ocfs2_sysfs_exit(); + if (ocfs2_table_header) + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 005e4f170e0f..c9fb01ab6347 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,4 +258,6 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +const char *ocfs2_get_hb_ctl_path(void); + #endif /* STACKGLUE_H */ -- cgit v1.2.3 From 9f9a99f4eccc64650e932090cff0ebd07b81e334 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:43:58 -0700 Subject: ocfs2: Move the call of ocfs2_hb_ctl into the stack glue. Take o2hb_stop() out of the o2cb code and make it part of the generic stack glue as ocfs2_leave_group(). This also allows us to remove the ocfs2_get_hb_ctl_path() function - everything to do with hb_ctl is now part of stackglue.c. o2cb no longer needs a ->hangup() function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 38 -------------------------------------- fs/ocfs2/stackglue.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- fs/ocfs2/stackglue.h | 1 - 3 files changed, 40 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fb26a7c69c47..765ade5ee84a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -333,43 +333,6 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, return 0; } -static void o2hb_stop(const char *group) -{ - int ret; - char *argv[5], *envp[3]; - - argv[0] = (char *)ocfs2_get_hb_ctl_path(); - argv[1] = "-K"; - argv[2] = "-u"; - argv[3] = (char *)group; - argv[4] = NULL; - - mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); - - /* minimal command environment taken from cpu_run_sbin_hotplug */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - if (ret < 0) - mlog_errno(ret); -} - -/* - * Hangup is a hack for tools compatibility. Older ocfs2-tools software - * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This - * happens regardless of whether the DLM got started, so we can't do it - * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into - * the glue and provide a "hangup" API for super.c to call. - * - * Other stacks will eventually provide a NULL ->hangup() pointer. - */ -static void o2cb_cluster_hangup(const char *group, int grouplen) -{ - o2hb_stop(group); -} - static int o2cb_cluster_this_node(unsigned int *node) { int node_num; @@ -388,7 +351,6 @@ static int o2cb_cluster_this_node(unsigned int *node) static struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, - .hangup = o2cb_cluster_hangup, .this_node = o2cb_cluster_this_node, .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index fb9b8e0db260..5f78ff4c76c7 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -34,11 +34,13 @@ #define OCFS2_STACK_PLUGIN_O2CB "o2cb" #define OCFS2_STACK_PLUGIN_USER "user" +#define OCFS2_MAX_HB_CTL_PATH 256 static struct ocfs2_locking_protocol *lproto; static DEFINE_SPINLOCK(ocfs2_stack_lock); static LIST_HEAD(ocfs2_stack_list); static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; /* * The stack currently in use. If not null, active_stack->sp_count > 0, @@ -363,6 +365,42 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, } EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); +/* + * Leave the group for this filesystem. This is executed by a userspace + * program (stored in ocfs2_hb_ctl_path). + */ +static void ocfs2_leave_group(const char *group) +{ + int ret; + char *argv[5], *envp[3]; + + argv[0] = ocfs2_hb_ctl_path; + argv[1] = "-K"; + argv[2] = "-u"; + argv[3] = (char *)group; + argv[4] = NULL; + + /* minimal command environment taken from cpu_run_sbin_hotplug */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (ret < 0) { + printk(KERN_ERR + "ocfs2: Error %d running user helper " + "\"%s %s %s %s\"\n", + ret, argv[0], argv[1], argv[2], argv[3]); + } +} + +/* + * Hangup is a required post-umount. ocfs2-tools software expects the + * filesystem to call "ocfs2_hb_ctl" during unmount. This happens + * regardless of whether the DLM got started, so we can't do it + * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does + * the actual work. + */ void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); @@ -371,6 +409,8 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) if (active_stack->sp_ops->hangup) active_stack->sp_ops->hangup(group, grouplen); + ocfs2_leave_group(group); + /* cluster_disconnect() was called with hangup_pending==1 */ ocfs2_stack_driver_put(); } @@ -559,9 +599,6 @@ error: #define FS_OCFS2_NM 1 -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - static ctl_table ocfs2_nm_table[] = { { .ctl_name = 1, @@ -613,12 +650,6 @@ static ctl_table ocfs2_root_table[] = { static struct ctl_table_header *ocfs2_table_header = NULL; -const char *ocfs2_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); - /* * Initialization diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c9fb01ab6347..fe3fd2a12821 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,6 +258,5 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -const char *ocfs2_get_hb_ctl_path(void); #endif /* STACKGLUE_H */ -- cgit v1.2.3 From 2c39450b39880e162b3eb339672314101f58ee1a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:58:26 -0700 Subject: ocfs2: Remove ->hangup() from stack glue operations. The ->hangup() call was only used to execute ocfs2_hb_ctl. Now that the generic stack glue code handles this, the underlying stack drivers don't need to know about it. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 3 +-- fs/ocfs2/stack_user.c | 3 +-- fs/ocfs2/stackglue.c | 5 +---- fs/ocfs2/stackglue.h | 18 +++--------------- 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 765ade5ee84a..fcd120f1493a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -317,8 +317,7 @@ out: return rc; } -static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) { struct dlm_ctxt *dlm = conn->cc_lockspace; struct o2dlm_private *priv = conn->cc_private; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 6b97d11f6bf8..c021280dd462 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -816,8 +816,7 @@ out: return rc; } -static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { dlm_release_lockspace(conn->cc_lockspace, 2); conn->cc_lockspace = NULL; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 5f78ff4c76c7..10e149ae5e3a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -352,7 +352,7 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, BUG_ON(conn == NULL); - ret = active_stack->sp_ops->disconnect(conn, hangup_pending); + ret = active_stack->sp_ops->disconnect(conn); /* XXX Should we free it anyway? */ if (!ret) { @@ -406,9 +406,6 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - if (active_stack->sp_ops->hangup) - active_stack->sp_ops->hangup(group, grouplen); - ocfs2_leave_group(group); /* cluster_disconnect() was called with hangup_pending==1 */ diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index fe3fd2a12821..db56281dd1be 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -134,22 +134,10 @@ struct ocfs2_stack_operations { * be freed. Thus, a stack must not return from ->disconnect() * until it will no longer reference the conn pointer. * - * If hangup_pending is zero, ocfs2_cluster_disconnect() will also - * be dropping the reference on the module. + * Once this call returns, the stack glue will be dropping this + * connection's reference on the module. */ - int (*disconnect)(struct ocfs2_cluster_connection *conn, - int hangup_pending); - - /* - * ocfs2_cluster_hangup() exists for compatibility with older - * ocfs2 tools. Only the classic stack really needs it. As such - * ->hangup() is not required of all stacks. See the comment by - * ocfs2_cluster_hangup() for more details. - * - * Note that ocfs2_cluster_hangup() can only be called if - * hangup_pending was passed to ocfs2_cluster_disconnect(). - */ - void (*hangup)(const char *group, int grouplen); + int (*disconnect)(struct ocfs2_cluster_connection *conn); /* * ->this_node() returns the cluster's unique identifier for the -- cgit v1.2.3 From f948d56435fc1f7506f08866302ecd6e60b533dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 17 Jun 2008 18:05:40 +0200 Subject: fuse: fix thinko in max I/O size calucation Use max not min to enforce a lower limit on the max I/O size. This bug was introduced by "fuse: fix max i/o size calculation" (commit e5d9a0df07484d6d191756878c974e4307fb24ce). Thanks to Brian Wang for noticing. Reported-by: Brian Wang Signed-off-by: Miklos Szeredi Acked-by: Szabolcs Szakacsits Signed-off-by: Linus Torvalds --- fs/fuse/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43e99513334a..3141690558c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -591,7 +591,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; - fc->max_write = min_t(unsigned, 4096, fc->max_write); + fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } fuse_put_request(fc, req); @@ -667,7 +667,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; - fc->max_read = min_t(unsigned, 4096, d.max_read); + fc->max_read = max_t(unsigned, 4096, d.max_read); /* Used by get_root_inode() */ sb->s_fs_info = fc; -- cgit v1.2.3 From 2856922c158605514ec5974a03097eaec91f4c0d Mon Sep 17 00:00:00 2001 From: Frederic Bohe Date: Fri, 20 Jun 2008 11:48:48 -0400 Subject: Ext4: Fix online resize block group descriptor corruption This is the patch for the group descriptor table corruption during online resize pointed out by Theodore Tso. The problem was caused by the fact that the ext4 group descriptor can be either 32 or 64 bytes long. Only the 64 bytes structure was taken into account. Signed-off-by: Frederic Bohe Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ecb92f68543..9ff7b1c04239 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; + gdp = (struct ext4_group_desc *)((char *)primary->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ -- cgit v1.2.3 From 55d8538498f62ec72b5ba67aa386c7726f630475 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Jun 2008 12:23:15 -0700 Subject: Fix performance regression on lmbench select benchmark Christian Borntraeger reported that reinstating cond_resched() with CONFIG_PREEMPT caused a performance regression on lmbench: For example select file 500: 23 microseconds 32 microseconds and that's really because we totally unnecessarily do the cond_resched() in the innermost loop of select(), which is just silly. This moves it out from the innermost loop (which only ever loops ove the bits in a single "unsigned long" anyway), which makes the performance regression go away. Reported-and-tested-by: Christian Borntraeger Signed-off-by: Linus Torvalds --- fs/select.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 8dda969614a9..da0e88201c3a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) retval++; } } - cond_resched(); } if (res_in) *rinp = res_in; @@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) *routp = res_out; if (res_ex) *rexp = res_ex; + cond_resched(); } wait = NULL; if (retval || !*timeout || signal_pending(current)) -- cgit v1.2.3 From fe6e9c1f25ac01f848bd084ee0ee62a5a0966ff3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 23 Jun 2008 08:30:55 -0400 Subject: [PATCH] fix cgroup-inflicted breakage in block_dev.c devcgroup_inode_permission() expects MAY_FOO, not FMODE_FOO; kindly keep your misdesign consistent if you positively have to inflict it on the kernel. Signed-off-by: Al Viro --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 470c10ceb0fb..10d8a0aa871a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) struct gendisk *disk; int ret; int part; + int perm = 0; - ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (file->f_mode & FMODE_READ) + perm |= MAY_READ; + if (file->f_mode & FMODE_WRITE) + perm |= MAY_WRITE; + /* + * hooks: /n/, see "layering violations". + */ + ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) return ret; -- cgit v1.2.3 From 12fd0d3088d27867be68655bcab2b074f2835f60 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:07 -0700 Subject: [patch for 2.6.26 2/4] vfs: utimensat(): be consistent with utime() for immutable and append-only files This patch fixes utimensat() to make its behavior consistent with that of utime()/utimes() when dealing with files marked immutable and append-only. The current utimensat() implementation also returns EPERM if 'times' is non-NULL and the tv_nsec fields are both UTIME_NOW. For consistency, the (times != NULL && times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) case should be treated like the traditional utimes() case where 'times' is NULL. That is, the call should succeed for a file marked append-only and should give the error EACCES if the file is marked as immutable. The simple way to do this is to set 'times' to NULL if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW). This is also the natural approach, since POSIX.1 semantics consider the times == {{x, UTIME_NOW}, {y, UTIME_NOW}} to be exactly equivalent to the case for times == NULL. (Thanks to Miklos for pointing this out.) Patch 3 in this series relies on the simplification provided by this patch. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index af059d5cb485..14d3edbb3d7c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -102,6 +102,10 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (error) goto dput_and_out; + if (times && times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { -- cgit v1.2.3 From 94c70b9ba7e9c1036284e779e2fef5be89021533 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:05 -0700 Subject: [patch for 2.6.26 1/4] vfs: utimensat(): ignore tv_sec if tv_nsec == UTIME_OMIT or UTIME_NOW The POSIX.1 draft spec for utimensat() says that if a times[n].tv_nsec field is UTIME_OMIT or UTIME_NOW, then the value in the corresponding tv_sec field is ignored. See the last sentence of this para, from the spec: If the tv_nsec field of a timespec structure has the special value UTIME_NOW, the file's relevant timestamp shall be set to the greatest value supported by the file system that is not greater than the current time. If the tv_nsec field has the special value UTIME_OMIT, the file's relevant timestamp shall not be changed. In either case, the tv_sec field shall be ignored. However the current Linux implementation requires the tv_sec value to be zero (or the EINVAL error results). This requirement should be removed. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index 14d3edbb3d7c..d466bc587e6e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -173,14 +173,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ if (utimes) { if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) return -EFAULT; - if ((tstimes[0].tv_nsec == UTIME_OMIT || - tstimes[0].tv_nsec == UTIME_NOW) && - tstimes[0].tv_sec != 0) - return -EINVAL; - if ((tstimes[1].tv_nsec == UTIME_OMIT || - tstimes[1].tv_nsec == UTIME_NOW) && - tstimes[1].tv_sec != 0) - return -EINVAL; /* Nothing to do, we must not even check the path. */ if (tstimes[0].tv_nsec == UTIME_OMIT && -- cgit v1.2.3 From 4cca92264e61a90b43fc4e076cd25b7f4e16dc61 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:08 -0700 Subject: [patch for 2.6.26 3/4] vfs: utimensat(): fix error checking for {UTIME_NOW,UTIME_OMIT} case The POSIX.1 draft spec for utimensat() says: Only a process with the effective user ID equal to the user ID of the file or with appropriate privileges may use futimens() or utimensat() with a non-null times argument that does not have both tv_nsec fields set to UTIME_NOW and does not have both tv_nsec fields set to UTIME_OMIT. If this condition is violated, then the error EPERM should result. However, the current implementation does not generate EPERM if one tv_nsec field is UTIME_NOW while the other is UTIME_OMIT. It should give this error for that case. This patch: a) Repairs that problem. b) Removes the now unneeded nsec_special() helper function. c) Adds some comments to explain the checks that are being performed. Thanks to Miklos, who provided comments on the previous iteration of this patch. As a result, this version is a little simpler and and its logic is better structured. Miklos suggested an alternative idea, migrating the is_owner_or_cap() checks into fs/attr.c:inode_change_ok() via the use of an ATTR_OWNER_CHECK flag. Maybe we could do that later, but for now I've gone with this version, which is IMO simpler, and can be more easily read as being correct. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index d466bc587e6e..118d1c3241be 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif -static bool nsec_special(long nsec) -{ - return nsec == UTIME_OMIT || nsec == UTIME_NOW; -} - static bool nsec_valid(long nsec) { - if (nsec_special(nsec)) + if (nsec == UTIME_OMIT || nsec == UTIME_NOW) return true; return nsec >= 0 && nsec <= 999999999; @@ -106,7 +101,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags times[1].tv_nsec == UTIME_NOW) times = NULL; - /* Don't worry, the checks are done in inode_change_ok() */ + /* In most cases, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { error = -EPERM; @@ -128,15 +123,26 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } - /* - * If times is NULL or both times are either UTIME_OMIT or - * UTIME_NOW, then need to check permissions, because - * inode_change_ok() won't do it. - */ - if (!times || (nsec_special(times[0].tv_nsec) && - nsec_special(times[1].tv_nsec))) { + /* + * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT + * cases, we need to make an extra check that is not done by + * inode_change_ok(). + */ + if (((times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_OMIT) + || + (times[0].tv_nsec == UTIME_OMIT && + times[1].tv_nsec == UTIME_NOW)) + && !is_owner_or_cap(inode)) + goto mnt_drop_write_and_out; + } else { + + /* + * If times is NULL (or both times are UTIME_NOW), + * then we need to check permissions, because + * inode_change_ok() won't do it. + */ error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; -- cgit v1.2.3 From c70f84417429f41519be0197a1092a53c2201f47 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:09 -0700 Subject: [patch for 2.6.26 4/4] vfs: utimensat(): fix write access check for futimens() The POSIX.1 draft spec for futimens()/utimensat() says: Only a process with the effective user ID equal to the user ID of the file, *or with write access to the file*, or with appropriate privileges may use futimens() or utimensat() with a null pointer as the times argument or with both tv_nsec fields set to the special value UTIME_NOW. The important piece here is "with write access to the file", and this matters for futimens(), which deals with an argument that is a file descriptor referring to the file whose timestamps are being updated, The standard is saying that the "writability" check is based on the file permissions, not the access mode with which the file is opened. (This behavior is consistent with the semantics of FreeBSD's futimes().) However, Linux is currently doing the latter -- futimens(fd, times) is a library function implemented as utimensat(fd, NULL, times, 0) and within the utimensat() implementation we have the code: f = fget(dfd); // dfd is 'fd' ... if (f) { if (!(f->f_mode & FMODE_WRITE)) goto mnt_drop_write_and_out; The check should instead be based on the file permissions. Thanks to Miklos for pointing out how to do this check. Miklos also pointed out a simplification that could be made to my first version of this patch, since the checks for the pathname and file descriptor cases can now be conflated. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index 118d1c3241be..b6b664e7145e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -148,14 +148,9 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { - if (f) { - if (!(f->f_mode & FMODE_WRITE)) - goto mnt_drop_write_and_out; - } else { - error = vfs_permission(&nd, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; - } + error = permission(inode, MAY_WRITE, NULL); + if (error) + goto mnt_drop_write_and_out; } } mutex_lock(&inode->i_mutex); -- cgit v1.2.3 From c8e7f449b225ee6c87454ac069f0a041035c5140 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 9 Jun 2008 16:40:35 -0700 Subject: [patch 1/4] vfs: path_{get,put}() cleanups Here are some more places where path_{get,put}() can be used instead of dput()/mntput() pair. Signed-off-by: Jan Blunck Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 11 +++++------ fs/pipe.c | 10 ++++------ 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index c7e43536c49a..ee1544696e83 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd int result; /* make sure the stuff we saved doesn't go away */ - dget(save.dentry); - mntget(save.mnt); + path_get(&save); result = __link_path_walk(name, nd); if (result == -ESTALE) { /* nd->path had been dropped */ nd->path = save; - dget(nd->path.dentry); - mntget(nd->path.mnt); + path_get(&nd->path); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } @@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->flags = flags; nd->depth = 0; - nd->path.mnt = mntget(mnt); - nd->path.dentry = dget(dentry); + nd->path.dentry = dentry; + nd->path.mnt = mnt; + path_get(&nd->path); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && diff --git a/fs/pipe.c b/fs/pipe.c index ec228bc9f882..700f4e0d9572 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void) void free_write_pipe(struct file *f) { free_pipe_info(f->f_dentry->d_inode); - dput(f->f_path.dentry); - mntput(f->f_path.mnt); + path_put(&f->f_path); put_filp(f); } @@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf) return ERR_PTR(-ENFILE); /* Grab pipe from the writer */ - f->f_path.mnt = mntget(wrf->f_path.mnt); - f->f_path.dentry = dget(wrf->f_path.dentry); + f->f_path = wrf->f_path; + path_get(&wrf->f_path); f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; f->f_pos = 0; @@ -1068,8 +1067,7 @@ int do_pipe(int *fd) err_fdr: put_unused_fd(fdr); err_read_pipe: - dput(fr->f_dentry); - mntput(fr->f_vfsmnt); + path_put(&fr->f_path); put_filp(fr); err_write_pipe: free_write_pipe(fw); -- cgit v1.2.3 From 20d4fdc1a788e4ca0aaf2422772ba668e7e10839 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 9 Jun 2008 16:40:36 -0700 Subject: [patch 2/4] fs: make struct file arg to d_path const Signed-off-by: Jan Engelhardt Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/dcache.c | 2 +- include/linux/dcache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 3ee588d5f585..c4c9072d810c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1847,7 +1847,7 @@ Elong: * * "buflen" should be positive. Caller holds the dcache_lock. */ -char *d_path(struct path *path, char *buf, int buflen) +char *d_path(const struct path *path, char *buf, int buflen) { char *res; struct path root; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..d982eb89c77d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -300,7 +300,7 @@ extern int d_validate(struct dentry *, struct dentry *); extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); -extern char *d_path(struct path *, char *, int); +extern char *d_path(const struct path *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ -- cgit v1.2.3 From 694a1764d657e0f7a9b139bc7269c8d5f5a2534b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 9 Jun 2008 16:40:37 -0700 Subject: [patch 3/4] vfs: fix ERR_PTR abuse in generic_readlink generic_readlink calls ERR_PTR for negative and positive values (vfs_readlink returns length of "link"), but it should not (not an errno) and does not need to. Signed-off-by: Marcin Slusarz Cc: Al Viro Cc: Christoph Hellwig Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ee1544696e83..01e67dddcc3d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2856,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; void *cookie; + int res; nd.depth = 0; cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!IS_ERR(cookie)) { - int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); - cookie = ERR_PTR(res); - } - return PTR_ERR(cookie); + if (IS_ERR(cookie)) + return PTR_ERR(cookie); + + res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + return res; } int vfs_follow_link(struct nameidata *nd, const char *link) -- cgit v1.2.3 From f9f48ec72bfc9489a30bc6ddbfcf27d86a8bc651 Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Mon, 9 Jun 2008 16:40:38 -0700 Subject: [patch 4/4] flock: remove unused fields from file_lock_operations fl_insert and fl_remove are not used right now in the kernel. Remove them. Signed-off-by: Denis V. Lunev Cc: Matthew Wilcox Cc: Alexander Viro Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/locks.c | 6 ------ include/linux/fs.h | 2 -- 2 files changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 11dbf08651b7..dce8c747371c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) /* insert into file's list */ fl->fl_next = *pos; *pos = fl; - - if (fl->fl_ops && fl->fl_ops->fl_insert) - fl->fl_ops->fl_insert(fl); } /* @@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) fl->fl_fasync = NULL; } - if (fl->fl_ops && fl->fl_ops->fl_remove) - fl->fl_ops->fl_remove(fl); - if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index d490779f18d9..7c1080826832 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -894,8 +894,6 @@ static inline int file_check_writeable(struct file *filp) typedef struct files_struct *fl_owner_t; struct file_lock_operations { - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ - void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; -- cgit v1.2.3 From be285c712bbbe5db43e503782fbef2bfeaa345f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Jun 2008 13:28:07 +0200 Subject: [patch 3/3] vfs: make d_path() consistent across mount operations The path that __d_path() computes can become slightly inconsistent when it races with mount operations: it grabs the vfsmount_lock when traversing mount points but immediately drops it again, only to re-grab it when it reaches the next mount point. The result is that the filename computed is not always consisent, and the file may never have had that name. (This is unlikely, but still possible.) Fix this by grabbing the vfsmount_lock for the whole duration of __d_path(). Signed-off-by: Andreas Gruenbacher Signed-off-by: John Johansen Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index c4c9072d810c..2b479de10a0a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1782,6 +1782,7 @@ char *__d_path(const struct path *path, struct path *root, char * end = buffer+buflen; char * retval; + spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); if (!IS_ROOT(dentry) && d_unhashed(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) @@ -1800,14 +1801,11 @@ char *__d_path(const struct path *path, struct path *root, break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { - spin_unlock(&vfsmount_lock); goto global_root; } dentry = vfsmnt->mnt_mountpoint; vfsmnt = vfsmnt->mnt_parent; - spin_unlock(&vfsmount_lock); continue; } parent = dentry->d_parent; @@ -1820,6 +1818,8 @@ char *__d_path(const struct path *path, struct path *root, dentry = parent; } +out: + spin_unlock(&vfsmount_lock); return retval; global_root: @@ -1829,9 +1829,11 @@ global_root: goto Elong; root->mnt = vfsmnt; root->dentry = dentry; - return retval; + goto out; + Elong: - return ERR_PTR(-ENAMETOOLONG); + retval = ERR_PTR(-ENAMETOOLONG); + goto out; } /** -- cgit v1.2.3 From 31f3e0b3a18c6d48196c40a82a3b8c01f4ff6b23 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:52 +0200 Subject: [patch 1/3] vfs: dcache sparse fixes Fix the following sparse warnings: fs/dcache.c:2183:19: warning: symbol 'filp_cachep' was not declared. Should it be static? fs/dcache.c:115:3: warning: context imbalance in 'dentry_iput' - unexpected unlock fs/dcache.c:188:2: warning: context imbalance in 'dput' - different lock contexts for basic block fs/dcache.c:400:2: warning: context imbalance in 'prune_one_dentry' - different lock contexts for basic block fs/dcache.c:431:22: warning: context imbalance in 'prune_dcache' - different lock contexts for basic block fs/dcache.c:563:2: warning: context imbalance in 'shrink_dcache_sb' - different lock contexts for basic block fs/dcache.c:1385:6: warning: context imbalance in 'd_delete' - wrong count at exit fs/dcache.c:1636:2: warning: context imbalance in '__d_unalias' - unexpected unlock fs/dcache.c:1735:2: warning: context imbalance in 'd_materialise_unique' - different lock contexts for basic block Signed-off-by: Miklos Szeredi Reviewed-by: Matthew Wilcox Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 2b479de10a0a..e4b2b9436b32 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry) /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. - * Called with dcache_lock and per dentry lock held, drops both. */ static void dentry_iput(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry) * d_kill - kill dentry and return parent * @dentry: dentry to kill * - * Called with dcache_lock and d_lock, releases both. The dentry must - * already be unhashed and removed from the LRU. + * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. */ static struct dentry *d_kill(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct dentry *parent; @@ -383,11 +386,11 @@ restart: * Try to prune ancestors as well. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. - * - * Called with dcache_lock, drops it and then regains. - * Called with dentry->d_lock held, drops it. */ static void prune_one_dentry(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) + __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2) * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... - * - * On return, dcache_lock will have been unlocked. */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) + __releases(dcache_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -1743,7 +1745,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); - goto shouldnt_be_hashed; } static int prepend(char **buffer, int *buflen, const char *str, @@ -1758,7 +1759,7 @@ static int prepend(char **buffer, int *buflen, const char *str, } /** - * d_path - return the path of a dentry + * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in @@ -1847,7 +1848,7 @@ Elong: * * Returns the buffer or an error code if the path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. */ char *d_path(const struct path *path, char *buf, int buflen) { -- cgit v1.2.3 From cdd16d0265c9234228fd37fbbad844d7e894b278 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:53 +0200 Subject: [patch 2/3] vfs: dcache cleanups Comment from Al Viro: add prepend_name() wrapper. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/dcache.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index e4b2b9436b32..6068c25b393c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1747,8 +1747,7 @@ shouldnt_be_hashed: BUG(); } -static int prepend(char **buffer, int *buflen, const char *str, - int namelen) +static int prepend(char **buffer, int *buflen, const char *str, int namelen) { *buflen -= namelen; if (*buflen < 0) @@ -1758,6 +1757,11 @@ static int prepend(char **buffer, int *buflen, const char *str, return 0; } +static int prepend_name(char **buffer, int *buflen, struct qstr *name) +{ + return prepend(buffer, buflen, name->name, name->len); +} + /** * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report @@ -1780,8 +1784,8 @@ char *__d_path(const struct path *path, struct path *root, { struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; - char * end = buffer+buflen; - char * retval; + char *end = buffer + buflen; + char *retval; spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); @@ -1811,8 +1815,7 @@ char *__d_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; retval = end; @@ -1825,8 +1828,7 @@ out: global_root: retval += 1; /* hit the slash */ - if (prepend(&retval, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) + if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) goto Elong; root->mnt = vfsmnt; root->dentry = dentry; @@ -1918,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) retval = end-1; *retval = '/'; - for (;;) { - struct dentry *parent; - if (IS_ROOT(dentry)) - break; + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; - parent = dentry->d_parent; prefetch(parent); - - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; @@ -1978,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { + if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; -- cgit v1.2.3 From 33852a1f2bb014e4047a844556c0d76a2f790c37 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 14:20:11 -0400 Subject: NFS: Reduce the NFS mount code stack usage. This appears to fix the Oops reported in http://bugzilla.kernel.org/show_bug.cgi?id=10826 Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 68 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2a4a024a4e7b..dac663dc5611 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type, { struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh mntfh; - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, &data); + nfs_fill_super(s, data); } - mntroot = nfs_get_root(s, &mntfh); + mntroot = nfs_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.nfs_server.hostname); - kfree(data.mount_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->nfs_server.hostname); + kfree(data->mount_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_err_nosb: @@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options, struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1959,26 +1963,31 @@ out_no_client_address: static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; struct super_block *s; struct nfs_server *server; - struct nfs_fh mntfh; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, &data, dev_name); + error = nfs4_validate_mount_data(raw_data, data, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, nfs4_fill_super(s); } - mntroot = nfs4_get_root(s, &mntfh); + mntroot = nfs4_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); - kfree(data.nfs_server.export_path); - kfree(data.nfs_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_free: -- cgit v1.2.3 From b7e2445737ff69cef892b6fd9cd71cae2c9e9515 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 15:21:11 -0400 Subject: NFS: Fix filehandle size comparisons in the mount code Fix a sign issue in xdr_decode_fhstatus3() Fix incorrect comparison in nfs_validate_mount_data() Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 5 +++-- fs/nfs/super.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0502cc..779d2eb649c5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; + unsigned size; if ((res->status = ntohl(*p++)) == 0) { - int size = ntohl(*p++); - if (size <= NFS3_FHSIZE) { + size = ntohl(*p++); + if (size <= NFS3_FHSIZE && size != 0) { fh->size = size; memcpy(fh->data, p, size); } else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dac663dc5611..614efeed5437 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1249,13 +1249,13 @@ static int nfs_validate_mount_data(void *options, case 5: memset(data->context, 0, sizeof(data->context)); case 6: - if (data->flags & NFS_MOUNT_VER3) + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; mntfh->size = data->root.size; - else + } else mntfh->size = NFS2_FHSIZE; - if (mntfh->size > sizeof(mntfh->data)) - goto out_invalid_fh; memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) -- cgit v1.2.3 From 03fa9e84e5dc10aeacb0e4eb2f708cd9fc36a5b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jun 2008 16:02:35 -0400 Subject: NFS: nfs_updatepage(): don't mark page as dirty if an error occurred Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6d8ace3e3259..f333848fd3be 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page, } status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); + if (status < 0) + nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); - if (status < 0) - nfs_set_pageerror(page); return status; } -- cgit v1.2.3 From e8183c2452041326c95258ecc7865b6fcd91c730 Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Mon, 23 Jun 2008 15:12:35 +0200 Subject: udf: Fix regression in UDF anchor block detection In some cases it could happen that some block passed test in udf_check_anchor_block() even though udf_read_tagged() refused to read it later (e.g. because checksum was not correct). This patch makes udf_check_anchor_block() use udf_read_tagged() so that the checking is stricter. This fixes the regression (certain disks unmountable) caused by commit 423cf6dc04eb79d441bfda2b127bc4b57134b41d. Signed-off-by: Tomas Janousek Signed-off-by: Jan Kara --- fs/udf/super.c | 57 +++++++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 7a5f69be6ac2..44cc702f96cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent) /* * Check whether there is an anchor block in the given block */ -static int udf_check_anchor_block(struct super_block *sb, sector_t block, - bool varconv) +static int udf_check_anchor_block(struct super_block *sb, sector_t block) { - struct buffer_head *bh = NULL; - tag *t; + struct buffer_head *bh; uint16_t ident; - uint32_t location; - if (varconv) { - if (udf_fixed_to_variable(block) >= - sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) - return 0; - bh = sb_bread(sb, udf_fixed_to_variable(block)); - } - else - bh = sb_bread(sb, block); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && + udf_fixed_to_variable(block) >= + sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) + return 0; + bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return 0; - - t = (tag *)bh->b_data; - ident = le16_to_cpu(t->tagIdent); - location = le32_to_cpu(t->tagLocation); brelse(bh); - if (ident != TAG_IDENT_AVDP) - return 0; - return location == block; + + return ident == TAG_IDENT_AVDP; } /* Search for an anchor volume descriptor pointer */ -static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, - sector_t lastblock) +static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock) { sector_t last[6]; int i; @@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, sb->s_blocksize_bits) continue; - if (udf_check_anchor_block(sb, last[i], varconv)) { + if (udf_check_anchor_block(sb, last[i])) { sbi->s_anchor[0] = last[i]; sbi->s_anchor[1] = last[i] - 256; return last[i]; @@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, if (last[i] < 256) continue; - if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { + if (udf_check_anchor_block(sb, last[i] - 256)) { sbi->s_anchor[1] = last[i] - 256; return last[i]; } } - if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 256)) { sbi->s_anchor[0] = sbi->s_session + 256; return last[0]; } - if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 512)) { sbi->s_anchor[0] = sbi->s_session + 512; return last[0]; } @@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb) int i; struct udf_sb_info *sbi = UDF_SB(sb); - lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); if (lastblock) goto check_anchor; /* No anchor found? Try VARCONV conversion of block numbers */ + UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); /* Firstly, we try to not convert number of the last block */ - lastblock = udf_scan_anchors(sb, 1, + lastblock = udf_scan_anchors(sb, udf_variable_to_fixed(sbi->s_last_block)); - if (lastblock) { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + if (lastblock) goto check_anchor; - } /* Secondly, we try with converted number of the last block */ - lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); - if (lastblock) - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); + if (!lastblock) { + /* VARCONV didn't help. Clear it. */ + UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); + } check_anchor: /* -- cgit v1.2.3 From 17c15da00c0e7289375ad57e8fea0c7892b74aa0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 18 Jun 2008 11:30:40 -0500 Subject: [GFS2] BUG: unable to handle kernel paging request at ffff81002690e000 This patch fixes bugzilla bug bz448866: gfs2: BUG: unable to handle kernel paging request at ffff81002690e000. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6387523a3153..3401628d742b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -195,7 +195,7 @@ ulong_aligned: depending on architecture. I've experimented with several ways of writing this section such as using an else before the goto but this one seems to be the fastest. */ - while ((unsigned char *)plong < end - 1) { + while ((unsigned char *)plong < end - sizeof(unsigned long)) { prefetch(plong + 1); if (((*plong) & LBITMASK) != lskipval) break; -- cgit v1.2.3 From 5af4e7a0bea715f2dd7190859a43eb2258b1f388 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 24 Jun 2008 12:53:38 -0500 Subject: [GFS2] fix gfs2 block allocation (cleaned up) This patch fixes bz 450641. This patch changes the computation for zero_metapath_length(), which it renames to metapath_branch_start(). When you are extending the metadata tree, The indirect blocks that point to the new data block must either diverge from the existing tree either at the inode, or at the first indirect block. They can diverge at the first indirect block because the inode has room for 483 pointers while the indirect blocks have room for 509 pointers, so when the tree is grown, there is some free space in the first indirect block. What metapath_branch_start() now computes is the height where the first indirect block for the new data block is located. It can either be 1 (if the indirect block diverges from the inode) or 2 (if it diverges from the first indirect block). Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c19184f2e70e..bec76b1c2bb0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, } -static inline unsigned int zero_metapath_length(const struct metapath *mp, - unsigned height) +static inline unsigned int metapath_branch_start(const struct metapath *mp) { - unsigned int i; - for (i = 0; i < height - 1; i++) { - if (mp->mp_list[i] != 0) - return i; - } - return height; + if (mp->mp_list[0] == 0) + return 2; + return 1; } /** @@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = mp->mp_bh[0]; u64 bn, dblock = 0; - unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; + unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; unsigned dblks = 0; unsigned ptrs_per_blk; const unsigned end_of_metadata = height - 1; @@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = height - ip->i_height; - zmpl = zero_metapath_length(mp, height); - iblks -= zmpl; - iblks += height; + branch_start = metapath_branch_start(mp); + iblks += (height - branch_start); } } @@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; - for(i = zmpl; i < height; i++) { + for(i = branch_start; i < height; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } - i = zmpl; + i = branch_start; } if (n == 0) break; -- cgit v1.2.3 From 18ce3751ccd488c78d3827e9f6bf54e6322676fb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Jul 2008 09:07:34 +0200 Subject: Properly notify block layer of sync writes fsync_buffers_list() and sync_dirty_buffer() both issue async writes and then immediately wait on them. Conceptually, that makes them sync writes and we should treat them as such so that the IO schedulers can handle them appropriately. This patch fixes a write starvation issue that Lin Ming reported, where xx is stuck for more than 2 minutes because of a large number of synchronous IO in the system: INFO: task kjournald:20558 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kjournald D ffff810010820978 6712 20558 2 ffff81022ddb1d10 0000000000000046 ffff81022e7baa10 ffffffff803ba6f2 ffff81022ecd0000 ffff8101e6dc9160 ffff81022ecd0348 000000008048b6cb 0000000000000086 ffff81022c4e8d30 0000000000000000 ffffffff80247537 Call Trace: [] kobject_get+0x12/0x17 [] getnstimeofday+0x2f/0x83 [] sync_buffer+0x0/0x3f [] io_schedule+0x5d/0x9f [] sync_buffer+0x3b/0x3f [] __wait_on_bit+0x40/0x6f [] sync_buffer+0x0/0x3f [] out_of_line_wait_on_bit+0x6c/0x78 [] wake_bit_function+0x0/0x23 [] sync_dirty_buffer+0x98/0xcb [] journal_commit_transaction+0x97d/0xcb6 [] lock_timer_base+0x26/0x4b [] kjournald+0xc1/0x1fb [] autoremove_wake_function+0x0/0x2e [] kjournald+0x0/0x1fb [] kthread+0x47/0x74 [] schedule_tail+0x28/0x5d [] child_rip+0xa/0x12 [] kthread+0x0/0x74 [] child_rip+0x0/0x12 Lin Ming confirms that this patch fixes the issue. I've run tests with it for the past week and no ill effects have been observed, so I'm proposing it for inclusion into 2.6.26. Signed-off-by: Jens Axboe --- fs/buffer.c | 13 ++++++++----- include/linux/fs.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index a073f3f4f013..0f51c0f7c266 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block(SWRITE_SYNC, 1, &bh); brelse(bh); spin_lock(lock); } @@ -2940,16 +2940,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE) + if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); else if (test_set_buffer_locked(bh)) continue; - if (rw == WRITE || rw == SWRITE) { + if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + if (rw == SWRITE_SYNC) + submit_bh(WRITE_SYNC, bh); + else + submit_bh(WRITE, bh); continue; } } else { @@ -2978,7 +2981,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c1080826832..d8e2762ed14d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -83,6 +83,7 @@ extern int dir_notify_enable; #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) +#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 -- cgit v1.2.3