diff options
author | Al Viro | 2022-07-04 18:12:39 -0400 |
---|---|---|
committer | Al Viro | 2022-07-06 13:14:42 -0400 |
commit | 03fa86e9f79d8b9a6aa28965829a4a8646139a0a (patch) | |
tree | d44fb5a33c51fc798cfb5f416b9639321f36fcaf /fs/namei.c | |
parent | 6e180327153071281dbbf6a16759e49862debdca (diff) |
namei: stash the sampled ->d_seq into nameidata
New field: nd->next_seq. Set to 0 outside of RCU mode, holds the sampled
value for the next dentry to be considered. Used instead of an arseload
of local variables, arguments, etc.
step_into() has lost seq argument; nd->next_seq is used, so dentry passed
to it must be the one ->next_seq is about.
There are two requirements for RCU pathwalk:
1) it should not give a hard failure (other than -ECHILD) unless
non-RCU pathwalk might fail that way given suitable timings.
2) it should not succeed unless non-RCU pathwalk might succeed
with the same end location given suitable timings.
The use of seq numbers is the way we achieve that. Invariant we want
to maintain is:
if RCU pathwalk can reach the state with given nd->path, nd->inode
and nd->seq after having traversed some part of pathname, it must be possible
for non-RCU pathwalk to reach the same nd->path and nd->inode after having
traversed the same part of pathname, and observe the nd->path.dentry->d_seq
equal to what RCU pathwalk has in nd->seq
For transition from parent to child, we sample child's ->d_seq
and verify that parent's ->d_seq remains unchanged. Anything that
disrupts parent-child relationship would've bumped ->d_seq on both.
For transitions from child to parent we sample parent's ->d_seq
and verify that child's ->d_seq has not changed. Same reasoning as
for the previous case applies.
For transition from mountpoint to root of mounted we sample
the ->d_seq of root and verify that nobody has touched mount_lock since
the beginning of pathwalk. That guarantees that mount we'd found had
been there all along, with these mountpoint and root of the mounted.
It would be possible for a non-RCU pathwalk to reach the previous state,
find the same mount and observe its root at the moment we'd sampled
->d_seq of that
For transitions from root of mounted to mountpoint we sample
->d_seq of mountpoint and verify that mount_lock had not been touched
since the beginning of pathwalk. The same reasoning as in the
previous case applies.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 98 |
1 files changed, 48 insertions, 50 deletions
diff --git a/fs/namei.c b/fs/namei.c index 4b7a2147c207..8dd7874816cc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -567,7 +567,7 @@ struct nameidata { struct path root; struct inode *inode; /* path.dentry.d_inode */ unsigned int flags, state; - unsigned seq, m_seq, r_seq; + unsigned seq, next_seq, m_seq, r_seq; int last_type; unsigned depth; int total_link_count; @@ -668,6 +668,7 @@ static void drop_links(struct nameidata *nd) static void leave_rcu(struct nameidata *nd) { nd->flags &= ~LOOKUP_RCU; + nd->seq = nd->next_seq = 0; rcu_read_unlock(); } @@ -792,7 +793,6 @@ out: * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: next dentry to step into - * @seq: seq number to check @dentry against * Returns: true on success, false on failure * * Similar to try_to_unlazy(), but here we have the next dentry already @@ -801,7 +801,7 @@ out: * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk(). */ -static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq) +static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) { int res; BUG_ON(!(nd->flags & LOOKUP_RCU)); @@ -826,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi */ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) goto out; - if (read_seqcount_retry(&dentry->d_seq, seq)) + if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) goto out_dput; /* * Sequence counts matched. Now make sure that the root is @@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(follow_down); * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode, unsigned *seqp) + struct inode **inode) { struct dentry *dentry = path->dentry; unsigned int flags = dentry->d_flags; @@ -1504,7 +1504,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->mnt = &mounted->mnt; dentry = path->dentry = mounted->mnt.mnt_root; nd->state |= ND_JUMPED; - *seqp = read_seqcount_begin(&dentry->d_seq); + nd->next_seq = read_seqcount_begin(&dentry->d_seq); *inode = dentry->d_inode; /* * We don't need to re-check ->d_seq after this @@ -1513,6 +1513,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * becoming unpinned. */ flags = dentry->d_flags; + // makes sure that non-RCU pathwalk could reach + // this state. if (read_seqretry(&mount_lock, nd->m_seq)) return false; continue; @@ -1525,8 +1527,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, } static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct inode **inode, - unsigned int *seqp) + struct path *path, struct inode **inode) { bool jumped; int ret; @@ -1534,16 +1535,17 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, path->mnt = nd->path.mnt; path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { - unsigned int seq = *seqp; + unsigned int seq = nd->next_seq; if (unlikely(!*inode)) return -ENOENT; - if (likely(__follow_mount_rcu(nd, path, inode, seqp))) + if (likely(__follow_mount_rcu(nd, path, inode))) return 0; - if (!try_to_unlazy_next(nd, dentry, seq)) - return -ECHILD; - // *path might've been clobbered by __follow_mount_rcu() + // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; + nd->next_seq = seq; + if (!try_to_unlazy_next(nd, dentry)) + return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); if (jumped) { @@ -1558,7 +1560,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, mntput(path->mnt); } else { *inode = d_backing_inode(path->dentry); - *seqp = 0; /* out of RCU mode, so the value doesn't matter */ } return ret; } @@ -1618,8 +1619,7 @@ static struct dentry *__lookup_hash(const struct qstr *name, } static struct dentry *lookup_fast(struct nameidata *nd, - struct inode **inode, - unsigned *seqp) + struct inode **inode) { struct dentry *dentry, *parent = nd->path.dentry; int status = 1; @@ -1630,8 +1630,7 @@ static struct dentry *lookup_fast(struct nameidata *nd, * going to fall back to non-racy lookup. */ if (nd->flags & LOOKUP_RCU) { - unsigned seq; - dentry = __d_lookup_rcu(parent, &nd->last, &seq); + dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq); if (unlikely(!dentry)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); @@ -1643,7 +1642,7 @@ static struct dentry *lookup_fast(struct nameidata *nd, * the dentry name information from lookup. */ *inode = d_backing_inode(dentry); - if (read_seqcount_retry(&dentry->d_seq, seq)) + if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); /* @@ -1656,11 +1655,10 @@ static struct dentry *lookup_fast(struct nameidata *nd, if (__read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - *seqp = seq; status = d_revalidate(dentry, nd->flags); if (likely(status > 0)) return dentry; - if (!try_to_unlazy_next(nd, dentry, seq)) + if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ @@ -1741,7 +1739,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns, return inode_permission(mnt_userns, nd->inode, MAY_EXEC); } -static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) +static int reserve_stack(struct nameidata *nd, struct path *link) { if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) return -ELOOP; @@ -1756,7 +1754,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) if (nd->flags & LOOKUP_RCU) { // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it - bool grabbed_link = legitimize_path(nd, link, seq); + bool grabbed_link = legitimize_path(nd, link, nd->next_seq); if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD; @@ -1770,11 +1768,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; static const char *pick_link(struct nameidata *nd, struct path *link, - struct inode *inode, unsigned seq, int flags) + struct inode *inode, int flags) { struct saved *last; const char *res; - int error = reserve_stack(nd, link, seq); + int error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) @@ -1784,7 +1782,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link, last = nd->stack + nd->depth++; last->link = *link; clear_delayed_call(&last->done); - last->seq = seq; + last->seq = nd->next_seq; if (flags & WALK_TRAILING) { error = may_follow_link(nd, inode); @@ -1846,12 +1844,14 @@ all_done: // pure jump * to do this check without having to look at inode->i_op, * so we keep a cache of "no, this doesn't need follow_link" * for the common case. + * + * NOTE: dentry must be what nd->next_seq had been sampled from. */ static const char *step_into(struct nameidata *nd, int flags, - struct dentry *dentry, struct inode *inode, unsigned seq) + struct dentry *dentry, struct inode *inode) { struct path path; - int err = handle_mounts(nd, dentry, &path, &inode, &seq); + int err = handle_mounts(nd, dentry, &path, &inode); if (err < 0) return ERR_PTR(err); @@ -1866,23 +1866,22 @@ static const char *step_into(struct nameidata *nd, int flags, } nd->path = path; nd->inode = inode; - nd->seq = seq; + nd->seq = nd->next_seq; return NULL; } if (nd->flags & LOOKUP_RCU) { /* make sure that d_is_symlink above matches inode */ - if (read_seqcount_retry(&path.dentry->d_seq, seq)) + if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); } else { if (path.mnt == nd->path.mnt) mntget(path.mnt); } - return pick_link(nd, &path, inode, seq, flags); + return pick_link(nd, &path, inode, flags); } static struct dentry *follow_dotdot_rcu(struct nameidata *nd, - struct inode **inodep, - unsigned *seqp) + struct inode **inodep) { struct dentry *parent, *old; @@ -1899,6 +1898,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd, nd->path = path; nd->inode = path.dentry->d_inode; nd->seq = seq; + // makes sure that non-RCU pathwalk could reach this state if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); /* we know that mountpoint was pinned */ @@ -1906,7 +1906,8 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd, old = nd->path.dentry; parent = old->d_parent; *inodep = parent->d_inode; - *seqp = read_seqcount_begin(&parent->d_seq); + nd->next_seq = read_seqcount_begin(&parent->d_seq); + // makes sure that non-RCU pathwalk could reach this state if (read_seqcount_retry(&old->d_seq, nd->seq)) return ERR_PTR(-ECHILD); if (unlikely(!path_connected(nd->path.mnt, parent))) @@ -1917,14 +1918,13 @@ in_root: return ERR_PTR(-ECHILD); if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-ECHILD); - *seqp = nd->seq; + nd->next_seq = nd->seq; *inodep = nd->path.dentry->d_inode; return nd->path.dentry; } static struct dentry *follow_dotdot(struct nameidata *nd, - struct inode **inodep, - unsigned *seqp) + struct inode **inodep) { struct dentry *parent; @@ -1948,14 +1948,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd, dput(parent); return ERR_PTR(-ENOENT); } - *seqp = 0; *inodep = parent->d_inode; return parent; in_root: if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-EXDEV); - *seqp = 0; *inodep = nd->path.dentry->d_inode; return dget(nd->path.dentry); } @@ -1966,7 +1964,6 @@ static const char *handle_dots(struct nameidata *nd, int type) const char *error = NULL; struct dentry *parent; struct inode *inode; - unsigned seq; if (!nd->root.mnt) { error = ERR_PTR(set_root(nd)); @@ -1974,12 +1971,12 @@ static const char *handle_dots(struct nameidata *nd, int type) return error; } if (nd->flags & LOOKUP_RCU) - parent = follow_dotdot_rcu(nd, &inode, &seq); + parent = follow_dotdot_rcu(nd, &inode); else - parent = follow_dotdot(nd, &inode, &seq); + parent = follow_dotdot(nd, &inode); if (IS_ERR(parent)) return ERR_CAST(parent); - error = step_into(nd, WALK_NOFOLLOW, parent, inode, seq); + error = step_into(nd, WALK_NOFOLLOW, parent, inode); if (unlikely(error)) return error; @@ -2004,7 +2001,6 @@ static const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; struct inode *inode; - unsigned seq; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and @@ -2015,7 +2011,7 @@ static const char *walk_component(struct nameidata *nd, int flags) put_link(nd); return handle_dots(nd, nd->last_type); } - dentry = lookup_fast(nd, &inode, &seq); + dentry = lookup_fast(nd, &inode); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (unlikely(!dentry)) { @@ -2025,7 +2021,7 @@ static const char *walk_component(struct nameidata *nd, int flags) } if (!(flags & WALK_MORE) && nd->depth) put_link(nd); - return step_into(nd, flags, dentry, inode, seq); + return step_into(nd, flags, dentry, inode); } /* @@ -2380,6 +2376,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) rcu_read_lock(); + else + nd->seq = nd->next_seq = 0; nd->flags = flags; nd->state |= ND_JUMPED; @@ -2481,8 +2479,9 @@ static int handle_lookup_down(struct nameidata *nd) { if (!(nd->flags & LOOKUP_RCU)) dget(nd->path.dentry); + nd->next_seq = nd->seq; return PTR_ERR(step_into(nd, WALK_NOFOLLOW, - nd->path.dentry, nd->inode, nd->seq)); + nd->path.dentry, nd->inode)); } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ @@ -3401,7 +3400,6 @@ static const char *open_last_lookups(struct nameidata *nd, struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; - unsigned seq; struct inode *inode; struct dentry *dentry; const char *res; @@ -3418,7 +3416,7 @@ static const char *open_last_lookups(struct nameidata *nd, if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd, &inode, &seq); + dentry = lookup_fast(nd, &inode); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (likely(dentry)) @@ -3472,7 +3470,7 @@ static const char *open_last_lookups(struct nameidata *nd, finish_lookup: if (nd->depth) put_link(nd); - res = step_into(nd, WALK_TRAILING, dentry, inode, seq); + res = step_into(nd, WALK_TRAILING, dentry, inode); if (unlikely(res)) nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); return res; |