aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Layton2024-08-07 08:10:27 -0400
committerChristian Brauner2024-08-30 08:22:34 +0200
commite747e15156b79efeea0ad056df8de14b93d318c2 (patch)
treebc352e2008d92204eb849eeb0b735436bca4c7e3
parentb9ca079dd6b09e08863aa998edf5c47597806c05 (diff)
fs: try an opportunistic lookup for O_CREAT opens too
Today, when opening a file we'll typically do a fast lookup, but if O_CREAT is set, the kernel always takes the exclusive inode lock. I assume this was done with the expectation that O_CREAT means that we always expect to do the create, but that's often not the case. Many programs set O_CREAT even in scenarios where the file already exists. This patch rearranges the pathwalk-for-open code to also attempt a fast_lookup in certain O_CREAT cases. If a positive dentry is found, the inode_lock can be avoided altogether, and if auditing isn't enabled, it can stay in rcuwalk mode for the last step_into. One notable exception that is hopefully temporary: if we're doing an rcuwalk and auditing is enabled, skip the lookup_fast. Legitimizing the dentry in that case is more expensive than taking the i_rwsem for now. Signed-off-by: Jeff Layton <jlayton@kernel.org> Link: https://lore.kernel.org/r/20240807-openfast-v3-1-040d132d2559@kernel.org Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/namei.c74
1 files changed, 64 insertions, 10 deletions
diff --git a/fs/namei.c b/fs/namei.c
index c3459785704d..3e34f4d97d83 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3605,6 +3605,49 @@ out_dput:
return ERR_PTR(error);
}
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+ return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+ struct dentry *dentry;
+
+ if (open_flag & O_CREAT) {
+ /* Don't bother on an O_EXCL create */
+ if (open_flag & O_EXCL)
+ return NULL;
+
+ /*
+ * FIXME: If auditing is enabled, then we'll have to unlazy to
+ * use the dentry. For now, don't do this, since it shifts
+ * contention from parent's i_rwsem to its d_lockref spinlock.
+ * Reconsider this once dentry refcounting handles heavy
+ * contention better.
+ */
+ if ((nd->flags & LOOKUP_RCU) && !audit_dummy_context())
+ return NULL;
+ }
+
+ if (trailing_slashes(nd))
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+ dentry = lookup_fast(nd);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry;
+
+ if (open_flag & O_CREAT) {
+ /* Discard negative dentries. Need inode_lock to do the create */
+ if (!dentry->d_inode) {
+ if (!(nd->flags & LOOKUP_RCU))
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ return dentry;
+}
+
static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
@@ -3622,28 +3665,39 @@ static const char *open_last_lookups(struct nameidata *nd,
return handle_dots(nd, nd->last_type);
}
+ /* We _can_ be in RCU mode here */
+ dentry = lookup_fast_for_open(nd, open_flag);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
if (!(open_flag & O_CREAT)) {
- if (nd->last.name[nd->last.len])
- nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- /* we _can_ be in RCU mode here */
- dentry = lookup_fast(nd);
- if (IS_ERR(dentry))
- return ERR_CAST(dentry);
if (likely(dentry))
goto finish_lookup;
if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
return ERR_PTR(-ECHILD);
} else {
- /* create side of things */
if (nd->flags & LOOKUP_RCU) {
- if (!try_to_unlazy(nd))
+ bool unlazied;
+
+ /* can stay in rcuwalk if not auditing */
+ if (dentry && audit_dummy_context()) {
+ if (trailing_slashes(nd))
+ return ERR_PTR(-EISDIR);
+ goto finish_lookup;
+ }
+ unlazied = dentry ? try_to_unlazy_next(nd, dentry) :
+ try_to_unlazy(nd);
+ if (!unlazied)
return ERR_PTR(-ECHILD);
}
audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
- /* trailing slashes? */
- if (unlikely(nd->last.name[nd->last.len]))
+ if (trailing_slashes(nd)) {
+ dput(dentry);
return ERR_PTR(-EISDIR);
+ }
+ if (dentry)
+ goto finish_lookup;
}
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {