diff options
author | Sowmini Varadhan | 2018-02-03 04:26:51 -0800 |
---|---|---|
committer | David S. Miller | 2018-02-08 15:23:52 -0500 |
commit | ebeeb1ad9b8adcc37c2ec21a96f39e9d35199b46 (patch) | |
tree | 25c1f3a044ab1c8f8e15031f606b00ae35db2ed0 /net/rds/tcp.c | |
parent | 79a8a642bf05cd0dced20621f6fef9d884124abd (diff) |
rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management
An rds_connection can get added during netns deletion between lines 528
and 529 of
506 static void rds_tcp_kill_sock(struct net *net)
:
/* code to pull out all the rds_connections that should be destroyed */
:
528 spin_unlock_irq(&rds_tcp_conn_lock);
529 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
530 rds_conn_destroy(tc->t_cpath->cp_conn);
Such an rds_connection would miss out the rds_conn_destroy()
loop (that cancels all pending work) and (if it was scheduled
after netns deletion) could trigger the use-after-free.
A similar race-window exists for the module unload path
in rds_tcp_exit -> rds_tcp_destroy_conns
Concurrency with netns deletion (rds_tcp_kill_sock()) must be handled
by checking check_net() before enqueuing new work or adding new
connections.
Concurrency with module-unload is handled by maintaining a module
specific flag that is set at the start of the module exit function,
and must be checked before enqueuing new work or adding new connections.
This commit refactors existing RDS_DESTROY_PENDING checks added by
commit 3db6e0d172c9 ("rds: use RCU to synchronize work-enqueue with
connection teardown") and consolidates all the concurrency checks
listed above into the function rds_destroy_pending().
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/tcp.c')
-rw-r--r-- | net/rds/tcp.c | 42 |
1 files changed, 30 insertions, 12 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 9920d2f84eff..44c4652721af 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count; /* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); static LIST_HEAD(rds_tcp_conn_list); +static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); static struct kmem_cache *rds_tcp_conn_slab; @@ -274,14 +275,13 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; - unsigned long flags; rdsdebug("freeing tc %p\n", tc); - spin_lock_irqsave(&rds_tcp_conn_lock, flags); + spin_lock_bh(&rds_tcp_conn_lock); if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); - spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); + spin_unlock_bh(&rds_tcp_conn_lock); kmem_cache_free(rds_tcp_conn_slab, tc); } @@ -296,7 +296,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); if (!tc) { ret = -ENOMEM; - break; + goto fail; } mutex_init(&tc->t_conn_path_lock); tc->t_sock = NULL; @@ -306,14 +306,19 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) conn->c_path[i].cp_transport_data = tc; tc->t_cpath = &conn->c_path[i]; + tc->t_tcp_node_detached = true; - spin_lock_irq(&rds_tcp_conn_lock); - tc->t_tcp_node_detached = false; - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } + spin_lock_bh(&rds_tcp_conn_lock); + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = conn->c_path[i].cp_transport_data; + tc->t_tcp_node_detached = false; + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + } + spin_unlock_bh(&rds_tcp_conn_lock); +fail: if (ret) { for (j = 0; j < i; j++) rds_tcp_conn_free(conn->c_path[j].cp_transport_data); @@ -332,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn) return false; } +static void rds_tcp_set_unloading(void) +{ + atomic_set(&rds_tcp_unloading, 1); +} + +static bool rds_tcp_is_unloading(struct rds_connection *conn) +{ + return atomic_read(&rds_tcp_unloading) != 0; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -370,6 +385,7 @@ struct rds_transport rds_tcp_transport = { .t_type = RDS_TRANS_TCP, .t_prefer_loopback = 1, .t_mp_capable = 1, + .t_unloading = rds_tcp_is_unloading, }; static unsigned int rds_tcp_netid; @@ -513,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net) rtn->rds_tcp_listen_sock = NULL; rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - spin_lock_irq(&rds_tcp_conn_lock); + spin_lock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -526,7 +542,7 @@ static void rds_tcp_kill_sock(struct net *net) tc->t_tcp_node_detached = true; } } - spin_unlock_irq(&rds_tcp_conn_lock); + spin_unlock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); } @@ -574,7 +590,7 @@ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; - spin_lock_irq(&rds_tcp_conn_lock); + spin_lock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -584,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net) /* reconnect with new parameters */ rds_conn_path_drop(tc->t_cpath, false); } - spin_unlock_irq(&rds_tcp_conn_lock); + spin_unlock_bh(&rds_tcp_conn_lock); } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, @@ -607,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, static void rds_tcp_exit(void) { + rds_tcp_set_unloading(); + synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); unregister_pernet_subsys(&rds_tcp_net_ops); if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) |