aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski2021-09-23 11:19:49 -0700
committerJakub Kicinski2021-09-23 11:19:49 -0700
commit2fcd14d0f78090f57aecd7f424e2b0373cd631a7 (patch)
treef8946c307a3b5d319c17ec988d8a38c0fe7a08de /net
parent9aad3e4ede9bebda23579f674420123dacb61fda (diff)
parent9bc62afe03afdf33904f5e784e1ad68c50ff00bb (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
net/mptcp/protocol.c 977d293e23b4 ("mptcp: ensure tx skbs always have the MPTCP ext") efe686ffce01 ("mptcp: ensure tx skbs always have the MPTCP ext") same patch merged in both trees, keep net-next. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/sock.c37
-rw-r--r--net/dsa/dsa2.c114
-rw-r--r--net/dsa/tag_ocelot.c2
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/ipv4/nexthop.c21
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_core.c2
8 files changed, 161 insertions, 36 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f930329f0dc2..62ddd7d6e00d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6925,12 +6925,16 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
- if (n->dev->threaded && n->thread)
- set_bit(NAPI_STATE_THREADED, &n->state);
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+ new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+ if (n->dev->threaded && n->thread)
+ new |= NAPIF_STATE_THREADED;
+ } while (cmpxchg(&n->state, val, new) != val);
}
EXPORT_SYMBOL(napi_enable);
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..512e629f9780 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock);
*/
bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
EXPORT_SYMBOL(lock_sock_fast);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 96f211f52ac3..a020339e1973 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
bool dsa_port_enabled = false;
int err = 0;
@@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp)
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
dsa_port_link_unregister_of(dp);
- if (err)
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
return err;
+ }
dp->setup = true;
@@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a, *tmp;
if (!dp->setup)
return;
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
devlink_port_type_clear(dlp);
switch (dp->type) {
@@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp)
dp->devlink_port_setup = false;
}
+/* Destroy the current devlink port, and create a new one which has the UNUSED
+ * flavour. At this point, any call to ds->ops->port_setup has been already
+ * balanced out by a call to ds->ops->port_teardown, so we know that any
+ * devlink port regions the driver had are now unregistered. We then call its
+ * ds->ops->port_setup again, in order for the driver to re-create them on the
+ * new devlink port.
+ */
+static int dsa_port_reinit_as_unused(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ dsa_port_devlink_teardown(dp);
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ if (ds->ops->port_setup) {
+ /* On error, leave the devlink port registered,
+ * dsa_switch_teardown will clean it up later.
+ */
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int dsa_devlink_info_get(struct devlink *dl,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
@@ -833,7 +877,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
devlink_params_publish(ds->devlink);
if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ ds->slave_mii_bus = mdiobus_alloc();
if (!ds->slave_mii_bus) {
err = -ENOMEM;
goto teardown;
@@ -843,13 +887,16 @@ static int dsa_switch_setup(struct dsa_switch *ds)
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto teardown;
+ goto free_slave_mii_bus;
}
ds->setup = true;
return 0;
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
teardown:
if (ds->ops->teardown)
ds->ops->teardown(ds);
@@ -872,8 +919,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
if (!ds->setup)
return;
- if (ds->slave_mii_bus && ds->ops->phy_read)
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
dsa_switch_unregister_notifier(ds);
@@ -933,12 +983,9 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err) {
- dsa_port_devlink_teardown(dp);
- dp->type = DSA_PORT_TYPE_UNUSED;
- err = dsa_port_devlink_setup(dp);
+ err = dsa_port_reinit_as_unused(dp);
if (err)
goto teardown;
- continue;
}
}
@@ -1043,6 +1090,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
+ dsa_tree_teardown_ports(dst);
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
dsa_tree_teardown_cpu_ports(dst);
@@ -1557,3 +1605,53 @@ void dsa_unregister_switch(struct dsa_switch *ds)
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+ struct net_device *master, *slave_dev;
+ LIST_HEAD(unregister_list);
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+ rtnl_lock();
+
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+
+ if (!dsa_port_is_user(dp))
+ continue;
+
+ master = dp->cpu_dp->master;
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ /* Just unlinking ourselves as uppers of the master is not
+ * sufficient. When the master net device unregisters, that will
+ * also call dev_close, which we will catch as NETDEV_GOING_DOWN
+ * and trigger a dev_close on our own devices (dsa_slave_close).
+ * In turn, that will call dev_mc_unsync on the master's net
+ * device. If the master is also a DSA switch port, this will
+ * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
+ * its own master. Lockdep will complain about the fact that
+ * all cascaded masters have the same dsa_master_addr_list_lock_key,
+ * which it normally would not do if the cascaded masters would
+ * be in a proper upper/lower relationship, which we've just
+ * destroyed.
+ * To suppress the lockdep warnings, let's actually unregister
+ * the DSA slave interfaces too, to avoid the nonsensical
+ * multicast address list synchronization on shutdown.
+ */
+ unregister_netdevice_queue(slave_dev, &unregister_list);
+ }
+ unregister_netdevice_many(&unregister_list);
+
+ rtnl_unlock();
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..8025ed778d33 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
#include <soc/mscc/ocelot.h>
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..59072930cb02 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
*
* An implementation of the software-defined tag_8021q.c tagger format, which
* also preserves full functionality under a vlan_filtering bridge. It does
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 75ca4b6e484f..9e8100728d46 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
rcu_assign_pointer(old->nh_grp, newg);
if (newg->resilient) {
+ /* Make sure concurrent readers are not using 'oldg' anymore. */
+ synchronize_net();
rcu_assign_pointer(oldg->res_table, tmp_table);
rcu_assign_pointer(oldg->spare->res_table, tmp_table);
}
@@ -3565,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = {
};
static int nexthops_dump(struct net *net, struct notifier_block *nb,
+ enum nexthop_event_type event_type,
struct netlink_ext_ack *extack)
{
struct rb_root *root = &net->nexthop.rb_root;
@@ -3575,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
- err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
- extack);
+ err = call_nexthop_notifier(nb, net, event_type, nh, extack);
if (err)
break;
}
@@ -3590,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
int err;
rtnl_lock();
- err = nexthops_dump(net, nb, extack);
+ err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
@@ -3603,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
- return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
- nb);
+ int err;
+
+ rtnl_lock();
+ err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
+ nb);
+ if (err)
+ goto unlock;
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+unlock:
+ rtnl_unlock();
+ return err;
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 4afd9e71e5c2..1cc8a76b39f9 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -510,7 +510,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+ goto out_rel;
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4d0fcd8f8eba..f57449089a16 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1468,7 +1468,9 @@ static void smc_conn_abort_work(struct work_struct *work)
abort_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ lock_sock(&smc->sk);
smc_conn_kill(conn, true);
+ release_sock(&smc->sk);
sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}