aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/8021q/vlan_netlink.c7
-rw-r--r--net/ax25/af_ax25.c28
-rw-r--r--net/ax25/ax25_dev.c28
-rw-r--r--net/ax25/ax25_route.c13
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_vlan.c9
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/can/isotp.c29
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/messenger_v1.c54
-rw-r--r--net/ceph/messenger_v2.c250
-rw-r--r--net/core/drop_monitor.c11
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net-procfs.c38
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c4
-rw-r--r--net/dsa/dsa.c1
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/master.c7
-rw-r--r--net/dsa/port.c29
-rw-r--r--net/dsa/tag_lan9303.c21
-rw-r--r--net/ieee802154/nl802154.c8
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_lookup.h7
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/addrconf.c33
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c0
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/mac80211/mlme.c29
-rw-r--r--net/mctp/route.c11
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/pm_netlink.c76
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/netfilter/nf_conntrack_core.c8
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c59
-rw-r--r--net/netfilter/nf_tables_api.c21
-rw-r--r--net/netfilter/nf_tables_offload.c3
-rw-r--r--net/netfilter/nft_byteorder.c12
-rw-r--r--net/netfilter/nft_connlimit.c11
-rw-r--r--net/netfilter/nft_ct.c5
-rw-r--r--net/netfilter/nft_dup_netdev.c6
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_fwd_netdev.c6
-rw-r--r--net/netfilter/nft_immediate.c12
-rw-r--r--net/netfilter/nft_limit.c18
-rw-r--r--net/netfilter/nft_payload.c9
-rw-r--r--net/netfilter/nft_synproxy.c4
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/openvswitch/actions.c46
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/rxrpc/call_event.c8
-rw-r--r--net/rxrpc/output.c2
-rw-r--r--net/sched/act_api.c15
-rw-r--r--net/sched/act_ct.c5
-rw-r--r--net/sched/cls_api.c17
-rw-r--r--net/sched/sch_api.c24
-rw-r--r--net/sched/sch_generic.c29
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/smc/af_smc.c196
-rw-r--r--net/smc/smc.h20
-rw-r--r--net/smc/smc_diag.c2
-rw-r--r--net/smc/smc_pnet.c50
-rw-r--r--net/smc/smc_pnet.h2
-rw-r--r--net/socket.c4
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/sysfs.c46
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c4
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/transport.c4
-rw-r--r--net/sunrpc/xprtrdma/verbs.c26
-rw-r--r--net/sunrpc/xprtsock.c9
-rw-r--r--net/tipc/crypto.c2
-rw-r--r--net/tipc/link.c9
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/node.c13
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/wireless/core.c17
118 files changed, 1282 insertions, 549 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 1a705a4ef7fa..5eaf38875554 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
+void vlan_dev_free_egress_priority(const struct net_device *dev);
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
size_t size);
@@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
-void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 26d031a43cc1..d1902828a18a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -622,7 +622,7 @@ static int vlan_dev_init(struct net_device *dev)
}
/* Note: this function might be called multiple times for the same device. */
-void vlan_dev_uninit(struct net_device *dev)
+void vlan_dev_free_egress_priority(const struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -636,6 +636,16 @@ void vlan_dev_uninit(struct net_device *dev)
}
}
+static void vlan_dev_uninit(struct net_device *dev)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+ vlan_dev_free_egress_priority(dev);
+
+ /* Get rid of the vlan's reference to real_dev */
+ dev_put_track(vlan->real_dev, &vlan->dev_tracker);
+}
+
static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -846,9 +856,6 @@ static void vlan_dev_free(struct net_device *dev)
free_percpu(vlan->vlan_pcpu_stats);
vlan->vlan_pcpu_stats = NULL;
-
- /* Get rid of the vlan's reference to real_dev */
- dev_put_track(vlan->real_dev, &vlan->dev_tracker);
}
void vlan_setup(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0db85aeb119b..53b1955b027f 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -183,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
err = vlan_changelink(dev, tb, data, extack);
- if (!err)
- err = register_vlan_dev(dev, extack);
if (err)
- vlan_dev_uninit(dev);
+ return err;
+ err = register_vlan_dev(dev, extack);
+ if (err)
+ vlan_dev_free_egress_priority(dev);
return err;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 02f43f3e2c56..d53cbb4e2503 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *s;
+ struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
@@ -85,13 +86,17 @@ static void ax25_kill_by_device(struct net_device *dev)
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
+ sk = s->sk;
+ sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
- lock_sock(s->sk);
+ lock_sock(sk);
s->ax25_dev = NULL;
- release_sock(s->sk);
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
ax25_disconnect(s, ENETUNREACH);
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
-
+ sock_put(sk);
/* The entry could have been deleted from the
* list meanwhile and thus the next pointer is
* no longer valid. Play it safe and restart
@@ -355,21 +360,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -436,6 +445,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -1107,8 +1117,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+ }
done:
ax25_cb_add(ax25);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 256fadb94df3..d2a244e1c260 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev)
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
@@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev)
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev)
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d0b2e094bd55..9751207f7757 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
@@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de2409889489..db4f2641d1cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+ struct netlink_ext_ack *extack);
+
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
struct net_bridge_port_group_sg_key *sg_p)
@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 84ba456a78cc..1402d5ca242d 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
- return br_vlan_state_allowed(*state, true);
- } else {
- return true;
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
}
+ return true;
}
}
v = br_vlan_find(vg, *vid);
@@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
- if (err && err != -EMSGSIZE)
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
goto out_err;
} else {
for_each_netdev_rcu(net, dev) {
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eba0efe64d05..fbf858ddec35 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 02cbcb2ecf0d..d2a430b6a13b 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -56,6 +56,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/wait.h>
#include <linux/uio.h>
@@ -145,6 +146,7 @@ struct isotp_sock {
struct tpcon rx, tx;
struct list_head notifier;
wait_queue_head_t wait;
+ spinlock_t rx_lock; /* protect single thread state machine */
};
static LIST_HEAD(isotp_notifier_list);
@@ -615,11 +617,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
n_pci_type = cf->data[ae] & 0xF0;
+ /* Make sure the state changes and data structures stay consistent at
+ * CAN frame reception time. This locking is not needed in real world
+ * use cases but the inconsistency can be triggered with syzkaller.
+ */
+ spin_lock(&so->rx_lock);
+
if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
/* check rx/tx path half duplex expectations */
if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
(so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
- return;
+ goto out_unlock;
}
switch (n_pci_type) {
@@ -668,6 +676,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
isotp_rcv_cf(sk, cf, ae, skb);
break;
}
+
+out_unlock:
+ spin_unlock(&so->rx_lock);
}
static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
@@ -876,7 +887,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!size || size > MAX_MSG_LENGTH) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
@@ -886,24 +897,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
(size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- goto err_out;
+ goto err_out_drop;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
if (!dev) {
err = -ENXIO;
- goto err_out;
+ goto err_out_drop;
}
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- goto err_out;
+ goto err_out_drop;
}
can_skb_reserve(skb);
@@ -965,7 +976,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- goto err_out;
+ goto err_out_drop;
}
if (wait_tx_done) {
@@ -978,6 +989,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return size;
+err_out_drop:
+ /* drop this PDU and unlock a potential wait queue */
+ old_state = ISOTP_IDLE;
err_out:
so->tx.state = old_state;
if (so->tx.state == ISOTP_IDLE)
@@ -1444,6 +1458,7 @@ static int isotp_init(struct sock *sk)
so->txtimer.function = isotp_tx_timer_handler;
init_waitqueue_head(&so->wait);
+ spin_lock_init(&so->rx_lock);
spin_lock(&isotp_notifier_lock);
list_add_tail(&so->notifier, &isotp_notifier_list);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index a271688780a2..307ee1174a6e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
/* set the end-packet for broadcast */
session->pkt.last = session->pkt.total;
- skcb->tskey = session->sk->sk_tskey++;
+ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
session->tskey = skcb->tskey;
return session;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ecc400a0b7bb..4c6441536d55 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -246,6 +246,7 @@ enum {
Opt_cephx_sign_messages,
Opt_tcp_nodelay,
Opt_abort_on_full,
+ Opt_rxbounce,
};
enum {
@@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
+ fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
@@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break;
+ case Opt_rxbounce:
+ opt->flags |= CEPH_OPT_RXBOUNCE;
+ break;
default:
BUG();
@@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,");
+ if (opt->flags & CEPH_OPT_RXBOUNCE)
+ seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,",
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 45eba2dcb67a..d3bb656308b4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
+ if (con->bounce_page) {
+ __free_page(con->bounce_page);
+ con->bounce_page = NULL;
+ }
if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 2cb5ffdf071a..6b014eca3a13 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con)
{
- struct ceph_msg *msg = con->in_msg;
- struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- if (!msg->num_data_items)
- return -EIO;
-
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->total_resid) {
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */
}
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ struct page *page;
+ size_t off, len;
+ u32 crc;
+ int ret;
+
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ crc = con->in_data_crc;
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
+ page = ceph_msg_data_next(cursor, &off, &len, NULL);
+ ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+ if (ret <= 0) {
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ crc = crc32c(crc, page_address(con->bounce_page), ret);
+ memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+ ceph_msg_data_advance(cursor, ret);
+ }
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
/*
* read (part of) a message.
*/
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */
if (data_len) {
- ret = read_partial_msg_data(con);
+ if (!m->num_data_items)
+ return -EIO;
+
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ ret = read_partial_msg_data_bounce(con);
+ else
+ ret = read_partial_msg_data(con);
if (ret <= 0)
return ret;
}
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index c4099b641b38..c81379f93ad5 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -57,8 +57,9 @@
#define IN_S_HANDLE_CONTROL_REMAINDER 3
#define IN_S_PREPARE_READ_DATA 4
#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_HANDLE_EPILOGUE 6
-#define IN_S_FINISH_SKIP 7
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_HANDLE_EPILOGUE 7
+#define IN_S_FINISH_SKIP 8
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
{
+ struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ int tail_len;
int ret;
+ tail_len = tail_onwire_len(con->in_msg, true);
+ ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt, 0, tail_len,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
con->v2.in_buf, true);
if (ret)
goto out;
- ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
- tail_onwire_len(con->in_msg, true));
+ dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+ con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+ ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+ if (ret)
+ goto out;
+
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
out:
sg_free_table(&sgt);
+ sg_free_table(&enc_sgt);
return ret;
}
@@ -1733,54 +1753,157 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
return 0;
}
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
- con->in_data_crc = -1;
+ con->in_data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
data_len(con->in_msg));
get_bvec_at(&con->v2.in_cursor, &bv);
- set_in_bvec(con, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ set_in_bvec(con, &bv);
+ } else {
+ set_in_bvec(con, &bv);
+ }
con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+ return 0;
}
static void prepare_read_data_cont(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+
+ get_bvec_at(&con->v2.in_cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
con->v2.in_bvec.bv_page,
con->v2.in_bvec.bv_offset,
con->v2.in_bvec.bv_len);
+ }
ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
if (con->v2.in_cursor.total_resid) {
get_bvec_at(&con->v2.in_cursor, &bv);
- set_in_bvec(con, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ set_in_bvec(con, &bv);
+ } else {
+ set_in_bvec(con, &bv);
+ }
WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
return;
}
/*
- * We've read all data. Prepare to read data padding (if any)
- * and epilogue.
+ * We've read all data. Prepare to read epilogue.
*/
reset_in_kvecs(con);
- if (con_secure(con)) {
- if (need_padding(data_len(con->in_msg)))
- add_in_kvec(con, DATA_PAD(con->v2.in_buf),
- padding_len(data_len(con->in_msg)));
- add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ if (!front_len(msg) && !middle_len(msg)) {
+ WARN_ON(!data_len(msg));
+ return prepare_read_data(con);
+ }
+
+ reset_in_kvecs(con);
+ if (front_len(msg)) {
+ add_in_kvec(con, msg->front.iov_base, front_len(msg));
+ WARN_ON(msg->front.iov_len != front_len(msg));
+ }
+ if (middle_len(msg)) {
+ add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+ WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+ }
+
+ if (data_len(msg)) {
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+ return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+ struct bio_vec bv;
+
+ dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+ con->v2.in_enc_resid);
+ WARN_ON(!con->v2.in_enc_resid);
+
+ bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+ bv.bv_offset = 0;
+ bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+ set_in_bvec(con, &bv);
+ con->v2.in_enc_i++;
+ con->v2.in_enc_resid -= bv.bv_len;
+
+ if (con->v2.in_enc_resid) {
+ con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+ return;
+ }
+
+ /*
+ * We are set to read the last piece of ciphertext (ending
+ * with epilogue) + auth tag.
+ */
+ WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+ struct page **enc_pages;
+ int enc_page_cnt;
+ int tail_len;
+
+ tail_len = tail_onwire_len(con->in_msg, true);
+ WARN_ON(!tail_len);
+
+ enc_page_cnt = calc_pages_for(0, tail_len);
+ enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+ if (IS_ERR(enc_pages))
+ return PTR_ERR(enc_pages);
+
+ WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = enc_pages;
+ con->v2.in_enc_page_cnt = enc_page_cnt;
+ con->v2.in_enc_resid = tail_len;
+ con->v2.in_enc_i = 0;
+
+ prepare_read_enc_page(con);
+ return 0;
+}
+
static void __finish_skip(struct ceph_connection *con)
{
con->in_seq++;
@@ -2589,47 +2712,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
}
msg = con->in_msg; /* set in process_message_header() */
- if (!front_len(msg) && !middle_len(msg)) {
- if (!data_len(msg))
- return process_message(con);
-
- prepare_read_data(con);
- return 0;
- }
-
- reset_in_kvecs(con);
if (front_len(msg)) {
WARN_ON(front_len(msg) > msg->front_alloc_len);
- add_in_kvec(con, msg->front.iov_base, front_len(msg));
msg->front.iov_len = front_len(msg);
-
- if (con_secure(con) && need_padding(front_len(msg)))
- add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
- padding_len(front_len(msg)));
} else {
msg->front.iov_len = 0;
}
if (middle_len(msg)) {
WARN_ON(middle_len(msg) > msg->middle->alloc_len);
- add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
msg->middle->vec.iov_len = middle_len(msg);
-
- if (con_secure(con) && need_padding(middle_len(msg)))
- add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
- padding_len(middle_len(msg)));
} else if (msg->middle) {
msg->middle->vec.iov_len = 0;
}
- if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
- } else {
- add_in_kvec(con, con->v2.in_buf,
- con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
- CEPH_EPILOGUE_PLAIN_LEN);
- con->v2.in_state = IN_S_HANDLE_EPILOGUE;
- }
- return 0;
+ if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+ return process_message(con);
+
+ if (con_secure(con))
+ return prepare_read_tail_secure(con);
+
+ return prepare_read_tail_plain(con);
}
static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2819,7 @@ static int handle_epilogue(struct ceph_connection *con)
int ret;
if (con_secure(con)) {
- ret = decrypt_message(con);
+ ret = decrypt_tail(con);
if (ret) {
if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2887,16 @@ static int populate_in_iter(struct ceph_connection *con)
ret = handle_control_remainder(con);
break;
case IN_S_PREPARE_READ_DATA:
- prepare_read_data(con);
- ret = 0;
+ ret = prepare_read_data(con);
break;
case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0;
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ prepare_read_enc_page(con);
+ ret = 0;
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3326,20 +3431,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
static void revoke_at_prepare_read_data(struct ceph_connection *con)
{
- int remaining; /* data + [data padding] + epilogue */
+ int remaining;
int resid;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
- if (con_secure(con))
- remaining = padded_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
remaining);
con->v2.in_iter.count -= resid;
@@ -3350,8 +3451,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
{
int recved, resid; /* current piece of data */
- int remaining; /* [data padding] + epilogue */
+ int remaining;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3465,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
ceph_msg_data_advance(&con->v2.in_cursor, recved);
WARN_ON(resid > con->v2.in_cursor.total_resid);
- if (con_secure(con))
- remaining = padding_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
con->v2.in_cursor.total_resid, remaining);
con->v2.in_iter.count -= resid;
@@ -3376,11 +3473,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+ int resid; /* current enc page (not necessarily data) */
+
+ WARN_ON(!con_secure(con));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+ dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+ con->v2.in_enc_resid);
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + con->v2.in_enc_resid);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
- WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
@@ -3399,6 +3511,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_DATA_CONT:
revoke_at_prepare_read_data_cont(con);
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ revoke_at_prepare_read_enc_page(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
@@ -3432,6 +3547,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
clear_out_sign_kvecs(con);
free_conn_bufs(con);
+ if (con->v2.in_enc_pages) {
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
+ }
if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..d5dc6be2522c 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -283,13 +283,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_lock();
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ struct net_device *dev;
+
/*
* only add a note to our monitor buffer if:
* 1) this is the dev we received on
* 2) its after the last_rx delta
* 3) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
+ /* Paired with WRITE_ONCE() in dropmon_net_event() */
+ dev = READ_ONCE(new_stat->dev);
+ if ((dev == napi->dev) &&
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
@@ -1576,7 +1580,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
mutex_lock(&net_dm_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
- new_stat->dev = NULL;
+
+ /* Paired with READ_ONCE() in trace_napi_poll_hit() */
+ WRITE_ONCE(new_stat->dev, NULL);
+
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..9eb785842258 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2710,6 +2710,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6c2016f7f3d1..ec0bf737b076 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1133,7 +1133,8 @@ out:
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -1154,12 +1155,17 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
- next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
- HZ/100);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
@@ -1571,7 +1577,7 @@ static void neigh_managed_work(struct work_struct *work)
write_lock_bh(&tbl->lock);
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
- neigh_event_send(neigh, NULL);
+ neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
write_unlock_bh(&tbl->lock);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..88cc0ad7d386 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 53ea262ecafd..fbddf966206b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e476403231f0..2fb8eb6791e8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1699,6 +1699,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct Qdisc *qdisc;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1716,6 +1717,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
+ qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1735,8 +1737,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (dev->qdisc &&
- nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
+ (qdisc &&
+ nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -3275,8 +3277,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
unsigned char name_assign_type = NET_NAME_USER;
struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
- const struct rtnl_link_ops *m_ops = NULL;
- struct net_device *master_dev = NULL;
+ const struct rtnl_link_ops *m_ops;
+ struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
struct nlattr *tb[IFLA_MAX + 1];
@@ -3314,6 +3316,8 @@ replay:
else
dev = NULL;
+ master_dev = NULL;
+ m_ops = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
if (master_dev)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0118f0afaa4f..b8138c372535 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -681,7 +681,7 @@ exit:
* while trying to recycle fragments on __skb_frag_unref() we need
* to make one SKB responsible for triggering the recycle path.
* So disable the recycling bit if an SKB is cloned and we have
- * additional references to to the fragmented part of the SKB.
+ * additional references to the fragmented part of the SKB.
* Eventually the last SKB will have the recycling bit set and it's
* dataref set to 0, which will trigger the recycling
*/
@@ -2276,7 +2276,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -4730,7 +4730,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
- serr->ee.ee_data -= sk->sk_tskey;
+ serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
}
err = sock_queue_err_skb(sk, skb);
@@ -6105,7 +6105,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ff806d71921..6eb174805bf0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -879,9 +879,9 @@ int sock_set_timestamping(struct sock *sk, int optname,
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- sk->sk_tskey = tcp_sk(sk)->snd_una;
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
- sk->sk_tskey = 0;
+ atomic_set(&sk->sk_tskey, 0);
}
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d9d0d227092c..c43f7446a75d 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -349,6 +349,7 @@ void dsa_flush_workqueue(void)
{
flush_workqueue(dsa_owq);
}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3d21521453fe..dcad3100b164 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1718,7 +1718,6 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch);
void dsa_switch_shutdown(struct dsa_switch *ds)
{
struct net_device *master, *slave_dev;
- LIST_HEAD(unregister_list);
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
@@ -1729,25 +1728,13 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
slave_dev = dp->slave;
netdev_upper_dev_unlink(master, slave_dev);
- /* Just unlinking ourselves as uppers of the master is not
- * sufficient. When the master net device unregisters, that will
- * also call dev_close, which we will catch as NETDEV_GOING_DOWN
- * and trigger a dev_close on our own devices (dsa_slave_close).
- * In turn, that will call dev_mc_unsync on the master's net
- * device. If the master is also a DSA switch port, this will
- * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
- * its own master. Lockdep will complain about the fact that
- * all cascaded masters have the same dsa_master_addr_list_lock_key,
- * which it normally would not do if the cascaded masters would
- * be in a proper upper/lower relationship, which we've just
- * destroyed.
- * To suppress the lockdep warnings, let's actually unregister
- * the DSA slave interfaces too, to avoid the nonsensical
- * multicast address list synchronization on shutdown.
- */
- unregister_netdevice_queue(slave_dev, &unregister_list);
}
- unregister_netdevice_many(&unregister_list);
+
+ /* Disconnect from further netdevice notifiers on the master,
+ * since netdev_uses_dsa() will now return false.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds)
+ dp->master->dsa_ptr = NULL;
rtnl_unlock();
mutex_unlock(&dsa2_mutex);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 760306f0012f..23c79e91ac67 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -147,7 +147,6 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
-void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 2199104ca7df..880f910b23a9 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -260,11 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev,
dev->dsa_ptr->netdev_ops = ops;
}
+/* Keep the master always promiscuous if the tagging protocol requires that
+ * (garbles MAC DA) or if it doesn't support unicast filtering, case in which
+ * it would revert to promiscuous mode as soon as we call dev_uc_add() on it
+ * anyway.
+ */
static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
{
const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
- if (!ops->promisc_on_master)
+ if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master)
return;
ASSERT_RTNL();
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bd78192e0e47..1a40c52f5a42 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -395,10 +395,17 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
.tree_index = dp->ds->dst->index,
.sw_index = dp->ds->index,
.port = dp->index,
- .bridge = *dp->bridge,
};
int err;
+ /* If the port could not be offloaded to begin with, then
+ * there is nothing to do.
+ */
+ if (!dp->bridge)
+ return;
+
+ info.bridge = *dp->bridge;
+
/* Here the port is already unbridged. Reflect the current configuration
* so that drivers can program their chips accordingly.
*/
@@ -781,9 +788,15 @@ int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_uc_add(cpu_dp->master, addr);
- if (err)
- return err;
+ /* Avoid a call to __dev_set_promiscuity() on the master, which
+ * requires rtnl_lock(), since we can't guarantee that is held here,
+ * and we can't take it either.
+ */
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_add(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
}
@@ -800,9 +813,11 @@ int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_uc_del(cpu_dp->master, addr);
- if (err)
- return err;
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_del(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index cb548188f813..98d7d7120bab 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -77,7 +77,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
- __be16 *lan9303_tag;
u16 lan9303_tag1;
unsigned int source_port;
@@ -87,14 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- lan9303_tag = dsa_etype_header_pos_rx(skb);
-
- if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
- return NULL;
+ if (skb_vlan_tag_present(skb)) {
+ lan9303_tag1 = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &lan9303_tag1);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- lan9303_tag1 = ntohs(lan9303_tag[1]);
source_port = lan9303_tag1 & 0x3;
skb->dev = dsa_master_find_slave(dev, 0, source_port);
@@ -103,13 +103,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* remove the special VLAN tag between the MAC addresses
- * and the current ethertype field.
- */
- skb_pull_rcsum(skb, 2 + 2);
-
- dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
-
if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
dsa_default_offload_fwd_mark(skb);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 277124f206e0..e0b072aecf0f 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c465bac1eb0..72fde2888ad2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1376,8 +1376,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..85117b45216d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -436,6 +436,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net))
goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..78e40ea42e58 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -16,10 +16,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
- u8 offload:1,
- trap:1,
- offload_failed:1,
- unused:5;
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b4589861b84c..2dd375f7407b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -525,9 +525,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.dst_len = dst_len;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..f7f74d5c14da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1047,19 +1047,23 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
- fa_match->offload_failed == fri->offload_failed)
+ /* These are paired with the WRITE_ONCE() happening in this function.
+ * The reason is that we are only protected by RCU at this point.
+ */
+ if (READ_ONCE(fa_match->offload) == fri->offload &&
+ READ_ONCE(fa_match->trap) == fri->trap &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload = fri->offload;
- fa_match->trap = fri->trap;
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
- fa_match->offload_failed == fri->offload_failed)
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload_failed = fri->offload_failed;
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -2297,9 +2301,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.dst_len = KEYLENGTH - fa->fa_slen;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57c1d8431386..7911916a480b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
- if (iter.frag)
- ip_options_fragment(iter.frag);
-
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
+ bool first_frag = (iter.offset == 0);
+
IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
+ if (first_frag && IPCB(skb)->opt.optlen) {
+ /* ipcb->opt is not populated for frags
+ * coming from __ip_make_skb(),
+ * ip_options_fragment() needs optlen
+ */
+ IPCB(iter.frag)->opt.optlen =
+ IPCB(skb)->opt.optlen;
+ ip_options_fragment(iter.frag);
+ ip_send_check(iter.iph);
+ }
}
skb->tstamp = tstamp;
@@ -975,7 +991,7 @@ static int __ip_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..29bbe2b08ae9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -256,7 +256,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 67087f95579f..aab384126f61 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,10 +58,6 @@ config NF_TABLES_ARP
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV4
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0e56df3a45e2..3ee947557b88 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9eb5fc247868..9f97b9cbf7b3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff6f91cdb6c4..f33ad1f383b6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3395,8 +3395,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_tos == fri.tos &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
- fri.offload = fa->offload;
- fri.trap = fa->trap;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b75836db19b..02cb275e5487 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
+ sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -936,6 +937,22 @@ void tcp_remove_empty_skb(struct sock *sk)
}
}
+/* skb changing from pure zc to mixed, must charge zc */
+static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
+{
+ if (unlikely(skb_zcopy_pure(skb))) {
+ u32 extra = skb->truesize -
+ SKB_TRUESIZE(skb_end_offset(skb));
+
+ if (!sk_wmem_schedule(sk, extra))
+ return -ENOMEM;
+
+ sk_mem_charge(sk, extra);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+ return 0;
+}
+
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
@@ -971,7 +988,7 @@ new_segment:
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
return NULL;
if (can_coalesce) {
@@ -1319,16 +1336,8 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- /* skb changing from pure zc to mixed, must charge zc */
- if (unlikely(skb_zcopy_pure(skb))) {
- if (!sk_wmem_schedule(sk, skb->data_len))
- goto wait_for_space;
-
- sk_mem_charge(sk, skb->data_len);
- skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
- }
-
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) ||
+ !sk_wmem_schedule(sk, copy))
goto wait_for_space;
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc49a3d551eb..bfe4112e000c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
(mss != tcp_skb_seglen(skb)))
goto out;
+ if (!tcp_skb_can_collapse(prev, skb))
+ goto out;
len = skb->len;
pcount = tcp_skb_pcount(skb);
if (tcp_skb_shift(prev, skb, pcount, len))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b53476e78c84..fec656f5a39e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2095,7 +2095,7 @@ process:
nf_reset_ct(skb);
if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_TCP_FILTER;
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b91003538d87..bc3a043a5d5c 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
- if (node->dev != dev)
+ if (list_entry_is_head(node, &info->shared->devices, list))
return;
list_del(&node->list);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3eee17790a82..6c8ab3e6e6fe 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1839,8 +1839,8 @@ out:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags)
+static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags)
{
struct inet6_ifaddr *ifp;
int err = -EADDRNOTAVAIL;
@@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
__u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
- int create = 0;
+ int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
@@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
unsigned long now;
u32 stored_lft;
- /* Update lifetime (RFC4862 5.5.3 e)
- * We deviate from RFC4862 by honoring all Valid Lifetimes to
- * improve the reaction of SLAAC to renumbering events
- * (draft-gont-6man-slaac-renum-06, Section 4.2)
- */
+ /* update lifetime (RFC2462 5.5.3 e) */
spin_lock_bh(&ifp->lock);
now = jiffies;
if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
-
if (!create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = now;
@@ -4985,6 +4998,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
+ spin_lock_bh(&ifa->lock);
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -5006,6 +5020,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
+ spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 463c37dea449..413f66781e50 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
fn = rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
- fn->fn_sernum = fib6_new_sernum(net);
+ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}
/*
@@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = READ_ONCE(w->root->fn_sernum);
}
} else {
- if (cb->args[5] != w->root->fn_sernum) {
+ int sernum = READ_ONCE(w->root->fn_sernum);
+ if (cb->args[5] != sernum) {
/* Begin at the root if the tree changed */
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = sernum;
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
@@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
/* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
- fn->fn_sernum = sernum;
+ WRITE_ONCE(fn->fn_sernum, sernum);
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
@@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w)
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
- w->node->fn_sernum != c->sernum)
- w->node->fn_sernum = c->sernum;
+ READ_ONCE(w->node->fn_sernum) != c->sernum)
+ WRITE_ONCE(w->node->fn_sernum, c->sernum);
if (!c->func) {
WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
iter->w.args = iter;
- iter->sernum = iter->w.root->fn_sernum;
+ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(net, &iter->w);
}
@@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
- if (iter->sernum != iter->w.root->fn_sernum) {
- iter->sernum = iter->w.root->fn_sernum;
+ int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+ if (iter->sernum != sernum) {
+ iter->sernum = sernum;
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
WARN_ON(iter->w.skip);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index aa673a6a7e43..ceb85c67ce39 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
- if (fl_shared_exclusive(fl) || fl->opt)
+ if (fl_shared_exclusive(fl) || fl->opt) {
+ WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+ }
return fl;
done:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b29e9ba5e113..5f577e21459b 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
}
if (IS_ERR_OR_NULL(segs))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2995f8d89e7e..304a295de84f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1465,7 +1465,7 @@ static int __ip6_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fe786df4f849..97ade833f58c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7cf73e60e619..8a2db926b5eb 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -243,7 +243,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ip6mr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bed8155508c8..a8861db52c18 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1759,7 +1759,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
- if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 97d3d1b36dbc..0ba62f4868f9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -47,10 +47,6 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV6
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b85383606df7..b8d6dc9aeeb6 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
-# flow table support
-obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
-
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ /dev/null
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e6de94203c13..ea1cf414a92e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
+ WRITE_ONCE(fn->fn_sernum, -1);
}
}
rcu_read_unlock();
@@ -5753,11 +5753,11 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (!dst) {
- if (rt->offload)
+ if (READ_ONCE(rt->offload))
rtm->rtm_flags |= RTM_F_OFFLOAD;
- if (rt->trap)
+ if (READ_ONCE(rt->trap))
rtm->rtm_flags |= RTM_F_TRAP;
- if (rt->offload_failed)
+ if (READ_ONCE(rt->offload_failed))
rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
@@ -6215,19 +6215,20 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
struct sk_buff *skb;
int err;
- if (f6i->offload == offload && f6i->trap == trap &&
- f6i->offload_failed == offload_failed)
+ if (READ_ONCE(f6i->offload) == offload &&
+ READ_ONCE(f6i->trap) == trap &&
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload = offload;
- f6i->trap = trap;
+ WRITE_ONCE(f6i->offload, offload);
+ WRITE_ONCE(f6i->trap, trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
- f6i->offload_failed == offload_failed)
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload_failed = offload_failed;
+ WRITE_ONCE(f6i->offload_failed, offload_failed);
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1eeabdf10052..e5ccf17618ab 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -666,7 +666,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_he_6ghz_cap(sdata, skb);
}
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -686,6 +686,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
const struct ieee80211_sband_iftype_data *iftd;
struct ieee80211_prep_tx_info info = {};
+ int ret;
/* we know it's writable, cast away the const */
if (assoc_data->ie_len)
@@ -699,7 +700,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
- return;
+ return -EINVAL;
}
chan = chanctx_conf->def.chan;
rcu_read_unlock();
@@ -750,7 +751,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
(iftd ? iftd->vendor_elems.len : 0),
GFP_KERNEL);
if (!skb)
- return;
+ return -ENOMEM;
skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -1031,15 +1032,22 @@ skip_rates:
skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
}
- if (assoc_data->fils_kek_len &&
- fils_encrypt_assoc_req(skb, assoc_data) < 0) {
- dev_kfree_skb(skb);
- return;
+ if (assoc_data->fils_kek_len) {
+ ret = fils_encrypt_assoc_req(skb, assoc_data);
+ if (ret < 0) {
+ dev_kfree_skb(skb);
+ return ret;
+ }
}
pos = skb_tail_pointer(skb);
kfree(ifmgd->assoc_req_ies);
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+ if (!ifmgd->assoc_req_ies) {
+ dev_kfree_skb(skb);
+ return -ENOMEM;
+ }
+
ifmgd->assoc_req_ies_len = pos - ie_start;
drv_mgd_prepare_tx(local, sdata, &info);
@@ -1049,6 +1057,8 @@ skip_rates:
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_tx_skb(sdata, skb);
+
+ return 0;
}
void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -4497,6 +4507,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
struct ieee80211_local *local = sdata->local;
+ int ret;
sdata_assert_lock(sdata);
@@ -4517,7 +4528,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "associate with %pM (try %d/%d)\n",
assoc_data->bss->bssid, assoc_data->tries,
IEEE80211_ASSOC_MAX_TRIES);
- ieee80211_send_assoc(sdata);
+ ret = ieee80211_send_assoc(sdata);
+ if (ret)
+ return ret;
if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..e52cef750500 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -412,13 +412,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc)
+ if (rc) {
kfree(key);
+ } else {
+ trace_mctp_key_acquire(key);
- trace_mctp_key_acquire(key);
-
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
+ /* we don't need to release key->lock on exit */
+ mctp_key_unref(key);
+ }
key = NULL;
} else {
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 48f75a56f4ae..d6fdc5782d33 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1607,6 +1607,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mpls_dev *mdev;
unsigned int flags;
+ int err;
if (event == NETDEV_REGISTER) {
mdev = mpls_add_dev(dev);
@@ -1621,7 +1622,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
switch (event) {
- int err;
case NETDEV_DOWN:
err = mpls_ifdown(dev, event);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3240b72271a7..7558802a1435 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -35,12 +35,14 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
+ SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
+ SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index ecd3d8b117e0..2966fcb6548b 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -28,12 +28,14 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
+ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
+ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 696b2c4613a7..7bea318ac5f2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -213,6 +213,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->remote = *addr;
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
spin_unlock_bh(&pm->lock);
@@ -253,8 +255,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
mptcp_event_addr_removed(msk, rm_list->ids[i]);
spin_lock_bh(&pm->lock);
- mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
- pm->rm_list_rx = *rm_list;
+ if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
+ pm->rm_list_rx = *rm_list;
+ else
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
spin_unlock_bh(&pm->lock);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 75af1f701e1d..4b5d795383cd 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+static struct mptcp_pm_addr_entry *
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
+ bool lookup_by_id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
+ (lookup_by_id && entry->addr.id == info->id))
+ return entry;
+ }
+ return NULL;
+}
+
static int
lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
{
@@ -532,6 +546,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, msk);
+ /* due to racing events on both ends we can reach here while
+ * previous add address is still running: if we invoke now
+ * mptcp_pm_announce_addr(), that will fail and the
+ * corresponding id will be marked as used.
+ * Instead let the PM machinery reschedule us when the
+ * current address announce will be completed.
+ */
+ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
+ return;
+
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
@@ -636,6 +660,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
unsigned int subflows_max;
+ bool reset_port = false;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
@@ -645,15 +670,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
- if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
+ remote = msk->pm.remote;
+ if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
goto add_addr_echo;
+ /* pick id 0 port, if none is provided the remote address */
+ if (!remote.port) {
+ reset_port = true;
+ remote.port = sk->sk_dport;
+ }
+
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- remote = msk->pm.remote;
- if (!remote.port)
- remote.port = sk->sk_dport;
nr = fill_local_addresses_vec(msk, addrs);
msk->pm.add_addr_accepted++;
@@ -666,8 +695,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
__mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
+ /* be sure to echo exactly the received address */
+ if (reset_port)
+ remote.port = 0;
+
add_addr_echo:
- mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
+ mptcp_pm_announce_addr(msk, &remote, true);
mptcp_pm_nl_addr_send_ack(msk);
}
@@ -777,7 +810,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
@@ -911,6 +944,7 @@ out:
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
struct mptcp_pm_addr_entry *entry)
{
+ int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct mptcp_sock *msk;
struct socket *ssock;
@@ -935,8 +969,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
}
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
- err = kernel_bind(ssock, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (entry->addr.family == AF_INET6)
+ addrlen = sizeof(struct sockaddr_in6);
+#endif
+ err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
if (err) {
pr_warn("kernel_bind error, err=%d", err);
goto out;
@@ -1763,18 +1800,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
- (lookup_by_id && entry->addr.id == addr.addr.id)) {
- mptcp_nl_addr_backup(net, &entry->addr, bkup);
-
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
- }
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
}
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_addr_backup(net, &addr.addr, bkup);
return 0;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0e6b42c76ea0..85317ce38e3f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
- char reset_start[0];
+ struct_group(reset,
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
@@ -458,7 +458,7 @@ struct mptcp_subflow_context {
long delegated_status;
- char reset_end[0];
+ );
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
@@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
static inline void
mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
- memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 894a325d39f2..d6aa5b47031e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1924,15 +1924,17 @@ repeat:
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
- NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
+
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+ if (ret == -NF_DROP)
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
ret = -ret;
goto out;
}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 7f19ee259609..55415f011943 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -20,13 +20,14 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#define HELPER_NAME "netbios-ns"
#define NMBD_PORT 137
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_netbios_ns");
-MODULE_ALIAS_NFCT_HELPER("netbios_ns");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
@@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
}
static struct nf_conntrack_helper helper __read_mostly = {
- .name = "netbios-ns",
+ .name = HELPER_NAME,
.tuple.src.l3num = NFPROTO_IPV4,
.tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ac438370f94a..7032402ffd33 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2311,7 +2311,8 @@ ctnetlink_create_conntrack(struct net *net,
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- /* not in hash table yet so not strictly necessary */
+ /* disable helper auto-assignment for this entry */
+ ct->status |= IPS_HELPER;
RCU_INIT_POINTER(help->helper, helper);
}
} else {
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 2394238d01c9..5a936334b517 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+ /* don't renew timeout on init retransmit so
+ * port reuse by client or NAT middlebox cannot
+ * keep entry alive indefinitely (incl. nat info).
+ */
+ if (new_state == SCTP_CONNTRACK_CLOSED &&
+ old_state == SCTP_CONNTRACK_CLOSED &&
+ nf_ct_is_confirmed(ct))
+ ignore = true;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af5115e127cf..d1582b888c0d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,6 +446,32 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
+static void tcp_init_sender(struct ip_ct_tcp_state *sender,
+ struct ip_ct_tcp_state *receiver,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *tcph,
+ u32 end, u32 win)
+{
+ /* SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, dataoff, tcph, sender);
+ /* RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
+ sender->td_scale = 0;
+ receiver->td_scale = 0;
+ }
+}
+
static bool tcp_in_window(struct nf_conn *ct,
enum ip_conntrack_dir dir,
unsigned int index,
@@ -499,24 +525,9 @@ static bool tcp_in_window(struct nf_conn *ct,
* Initialize sender data.
*/
if (tcph->syn) {
- /*
- * SYN-ACK in reply to a SYN
- * or SYN from reply direction in simultaneous open.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
if (!tcph->ack)
/* Simultaneous open */
return true;
@@ -560,6 +571,18 @@ static bool tcp_in_window(struct nf_conn *ct,
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(skb, dataoff, tcph, sender);
+ } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
+ state->state == TCP_CONNTRACK_SYN_SENT) {
+ /* Retransmitted syn-ack, or syn (simultaneous open).
+ *
+ * Re-init state for this direction, just like for the first
+ * syn(-ack) reply, it might differ in seq, ack or tcp options.
+ */
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
+ if (!tcph->ack)
+ return true;
}
if (!(tcph->ack)) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 77938b1042f3..9cd1d7a62804 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
prule = (struct nft_rule_dp *)ptr;
prule->is_last = 1;
- ptr += offsetof(struct nft_rule_dp, data);
/* blob size does not include the trailer rule */
}
@@ -6552,12 +6551,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
{
struct nft_object *newobj;
struct nft_trans *trans;
- int err;
+ int err = -ENOMEM;
+
+ if (!try_module_get(type->owner))
+ return -ENOENT;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
- return -ENOMEM;
+ goto err_trans;
newobj = nft_obj_init(ctx, type, attr);
if (IS_ERR(newobj)) {
@@ -6574,6 +6576,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
err_free_trans:
kfree(trans);
+err_trans:
+ module_put(type->owner);
return err;
}
@@ -8186,7 +8190,7 @@ static void nft_obj_commit_update(struct nft_trans *trans)
if (obj->ops->update)
obj->ops->update(obj, newobj);
- kfree(newobj);
+ nft_obj_destroy(&trans->ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
@@ -8264,14 +8268,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
void *data, *data_boundary;
struct nft_rule_dp *prule;
struct nft_rule *rule;
- int i;
/* already handled or inactive chain? */
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
- i = 0;
data_size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
@@ -8301,7 +8303,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
return -ENOMEM;
size = 0;
- track.last = last;
+ track.last = nft_expr_last(rule);
nft_rule_for_each_expr(expr, last, rule) {
track.cur = expr;
@@ -8979,7 +8981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- kfree(nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -9639,10 +9641,13 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
static void __nft_release_hook(struct net *net, struct nft_table *table)
{
+ struct nft_flowtable *flowtable;
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
}
static void __nft_release_hooks(struct net *net)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9656c1646222..2d36952b1392 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
- if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+ if (expr->ops->offload_action &&
+ expr->ops->offload_action(expr))
num_actions++;
expr = nft_expr_next(expr);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9d5947ab8d4e..e646e9ee4a98 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -167,12 +167,24 @@ nla_put_failure:
return -1;
}
+static bool nft_byteorder_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ struct nft_byteorder *priv = nft_expr_priv(expr);
+
+ track->regs[priv->dreg].selector = NULL;
+ track->regs[priv->dreg].bitwise = NULL;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_byteorder_ops = {
.type = &nft_byteorder_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
.eval = nft_byteorder_eval,
.init = nft_byteorder_init,
.dump = nft_byteorder_dump,
+ .reduce = nft_byteorder_reduce,
};
struct nft_expr_type nft_byteorder_type __read_mostly = {
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 7d00a1452b1d..3362417ebfdb 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
{
bool invert = false;
u32 flags, limit;
+ int err;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
@@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
priv->limit = limit;
priv->invert = invert;
- return nf_ct_netns_get(ctx->net, ctx->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
+ if (err < 0)
+ goto err_netns;
+
+ return 0;
+err_netns:
+ kfree(priv->list);
+
+ return err;
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 518d96c8c247..5adf8bb628a8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
if (likely(refcount_read(&ct->ct_general.use) == 1)) {
+ refcount_inc(&ct->ct_general.use);
nf_ct_zone_add(ct, &zone);
} else {
- /* previous skb got queued to userspace */
+ /* previous skb got queued to userspace, allocate temporary
+ * one until percpu template can be reused.
+ */
ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
if (!ct) {
regs->verdict.code = NF_DROP;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index bbf3fcba3df4..5b5c607fbf83 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
}
+static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
static struct nft_expr_type nft_dup_netdev_type;
static const struct nft_expr_ops nft_dup_netdev_ops = {
.type = &nft_dup_netdev_type,
@@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
.init = nft_dup_netdev_init,
.dump = nft_dup_netdev_dump,
.offload = nft_dup_netdev_offload,
+ .offload_action = nft_dup_netdev_offload_action,
};
static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index dbe1f2e7dd9e..9e927ab4df15 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
return NULL;
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index fa9301ca6033..619e394a91de 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
}
+static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
struct nft_fwd_neigh {
u8 sreg_dev;
u8 sreg_addr;
@@ -222,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.dump = nft_fwd_netdev_dump,
.validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
+ .offload_action = nft_fwd_netdev_offload_action,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 90c64d27ae53..d0f67d325bdf 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_immediate_offload_action(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ return true;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = {
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
.offload = nft_immediate_offload,
- .offload_flags = NFT_OFFLOAD_F_ACTION,
+ .offload_action = nft_immediate_offload_action,
};
struct nft_expr_type nft_imm_type __read_mostly = {
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c4f308460dd1..a726b623963d 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -340,11 +340,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
+static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, &priv->limit);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_pkts_ops = {
.type = &nft_limit_obj_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.init = nft_limit_obj_pkts_init,
+ .destroy = nft_limit_obj_pkts_destroy,
.eval = nft_limit_obj_pkts_eval,
.dump = nft_limit_obj_pkts_dump,
};
@@ -378,11 +387,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
+static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, priv);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_bytes_ops = {
.type = &nft_limit_obj_type,
.size = sizeof(struct nft_limit_priv),
.init = nft_limit_obj_bytes_init,
+ .destroy = nft_limit_obj_bytes_destroy,
.eval = nft_limit_obj_bytes_eval,
.dump = nft_limit_obj_bytes_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 940fed9a760b..5cc06aef4345 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
{
unsigned int thoff = nft_thoff(pkt);
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
return -1;
switch (pkt->tprot) {
@@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -688,7 +688,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -728,7 +728,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
pkt->tprot == IPPROTO_SCTP &&
skb->ip_summed != CHECKSUM_PARTIAL) {
- if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+ if (pkt->fragoff == 0 &&
+ nft_payload_csum_sctp(skb, nft_thoff(pkt)))
goto err;
}
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index a0109fa1e92d..1133e06f3c40 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
if (err)
goto nf_ct_failure;
err = nf_synproxy_ipv6_init(snet, ctx->net);
- if (err)
+ if (err) {
+ nf_synproxy_ipv4_fini(snet, ctx->net);
goto nf_ct_failure;
+ }
break;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5e6459e11605..7013f55f05d1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par)
{
if (par->family == NFPROTO_IPV4)
nf_defrag_ipv4_disable(par->net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- nf_defrag_ipv4_disable(par->net);
+ nf_defrag_ipv6_disable(par->net);
+#endif
}
static struct xt_match socket_mt_reg[] __read_mostly = {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..780d9e2246f3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
{
+ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+ (__force __wsum)(ipv6_tclass << 12));
+
+ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+ u32 ofl;
+
+ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
+ fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ nh->flow_lbl[0] = (u8)(fl >> 16);
+ nh->flow_lbl[1] = (u8)(fl >> 8);
+ nh->flow_lbl[2] = (u8)fl;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+ (__force __wsum)(new_ttl << 8));
+ nh->hop_limit = new_ttl;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +577,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
+ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bd409ab4cc2..ab87f22cc7ec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
@@ -1788,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -3353,6 +3357,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3932,7 +3937,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6be2672a65ea..df864e692267 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at, rto_j;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
ktime_t now, max_age, oldest, ack_ts;
int ix;
@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- rto_j = call->peer->rto_j;
-
now = ktime_get_real();
- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
+ max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rto_j;
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 10f2bf2e9068..a45c83f22236 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -468,7 +468,7 @@ done:
if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 32563cef85bf..ca03e7284254 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -274,7 +274,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
err = tc_setup_action(&fl_action->action, actions);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "Failed to setup tc actions for offload\n");
+ "Failed to setup tc actions for offload");
goto fl_err;
}
@@ -1037,6 +1037,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ int repeat_ttl;
if (jmp_prgcnt > 0) {
jmp_prgcnt -= 1;
@@ -1045,11 +1046,17 @@ restart_act_graph:
if (tc_act_skip_sw(a->tcfa_flags))
continue;
+
+ repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
-
+ if (unlikely(ret == TC_ACT_REPEAT)) {
+ if (--repeat_ttl != 0)
+ goto repeat;
+ /* suspicious opcode, stop pipeline */
+ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+ return TC_ACT_OK;
+ }
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..33e70d60f0bf 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -533,11 +533,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct nf_conn *ct;
u8 dir;
- /* Previously seen or loopback */
- ct = nf_ct_get(skb, &ctinfo);
- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
- return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..5ce1208a6ea3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1044,7 +1044,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
/* Find qdisc */
if (!*parent) {
- *q = dev->qdisc;
+ *q = rcu_dereference(dev->qdisc);
*parent = (*q)->handle;
} else {
*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
@@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool prio_allocate;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
+ struct Qdisc *q;
struct tcf_chain_info chain_info;
- struct tcf_chain *chain = NULL;
+ struct tcf_chain *chain;
struct tcf_block *block;
struct tcf_proto *tp;
unsigned long cl;
@@ -1976,6 +1976,8 @@ replay:
tp = NULL;
cl = 0;
block = NULL;
+ q = NULL;
+ chain = NULL;
flags = 0;
if (prio == 0) {
@@ -2585,7 +2587,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
parent = tcm->tcm_parent;
if (!parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
@@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
struct tcmsg *t;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
- struct tcf_chain *chain = NULL;
+ struct Qdisc *q;
+ struct tcf_chain *chain;
struct tcf_block *block;
unsigned long cl;
int err;
@@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
return -EPERM;
replay:
+ q = NULL;
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2959,7 +2962,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
if (!tcm->tcm_parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2cb496c84878..e3c0e8ea2dbb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -1082,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
skip:
if (!ingress) {
notify_and_destroy(net, skb, n, classid,
- dev->qdisc, new);
+ rtnl_dereference(dev->qdisc), new);
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
- dev->qdisc = new ? : &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
if (new && new->ops->attach)
new->ops->attach(new);
@@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = -ENOENT;
if (!ops) {
- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
+ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1451,7 +1451,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
if (!q) {
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
@@ -1540,7 +1540,7 @@ replay:
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
/* It may be default qdisc, ignore it */
@@ -1762,7 +1762,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
+ skb, cb, &q_idx, s_q_idx,
true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -2033,7 +2034,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
} else if (qid1) {
qid = qid1;
} else if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
/* Now qid is genuine qdisc handle consistent
* both with parent and child.
@@ -2044,7 +2045,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
portid = TC_H_MAKE(qid, portid);
} else {
if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
}
/* OK. Locate qdisc */
@@ -2205,7 +2206,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
+ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
+ skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f893d9a81b01..5bab9f8b8f45 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1164,30 +1164,33 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
- dev->qdisc = qdisc;
+ rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc);
}
}
+ qdisc = rtnl_dereference(dev->qdisc);
/* Detect default qdisc setup/init failed and fallback to "noqueue" */
- if (dev->qdisc == &noop_qdisc) {
+ if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
}
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc, false);
+ if (qdisc != &noop_qdisc)
+ qdisc_hash_add(qdisc, false);
#endif
}
@@ -1217,7 +1220,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc)
+ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
@@ -1383,7 +1386,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
void dev_qdisc_change_real_num_tx(struct net_device *dev,
unsigned int new_real_tx)
{
- struct Qdisc *qdisc = dev->qdisc;
+ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
if (qdisc->ops->change_real_num_tx)
qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
@@ -1447,7 +1450,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev)
{
- dev->qdisc = &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
@@ -1475,8 +1478,8 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_put(dev->qdisc);
- dev->qdisc = &noop_qdisc;
+ qdisc_put(rtnl_dereference(dev->qdisc));
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer));
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9267922ea9c3..23a9d6242429 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
+ if (q->offload) {
+ /* Options not supported by the offload. */
+ if (hopt->rate.overhead || hopt->ceil.overhead) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
+ goto failure;
+ }
+ if (hopt->rate.mpu || hopt->ceil.mpu) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
+ goto failure;
+ }
+ if (hopt->quantum) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
+ goto failure;
+ }
+ if (hopt->prio) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
+ goto failure;
+ }
+ }
+
/* Keeping backward compatible with rate_table based iproute2 tc */
if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 961854e56736..306d9e8cd1dd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -566,12 +566,118 @@ static void smc_stat_fallback(struct smc_sock *smc)
mutex_unlock(&net->smc.mutex_fback_rsn);
}
-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+/* must be called under rcu read lock */
+static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
{
- wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
- wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
- unsigned long flags;
+ struct socket_wq *wq;
+ __poll_t flags;
+ wq = rcu_dereference(smc->sk.sk_wq);
+ if (!skwq_has_sleeper(wq))
+ return;
+
+ /* wake up smc sk->sk_wq */
+ if (!key) {
+ /* sk_state_change */
+ wake_up_interruptible_all(&wq->wait);
+ } else {
+ flags = key_to_poll(key);
+ if (flags & (EPOLLIN | EPOLLOUT))
+ /* sk_data_ready or sk_write_space */
+ wake_up_interruptible_sync_poll(&wq->wait, flags);
+ else if (flags & EPOLLERR)
+ /* sk_error_report */
+ wake_up_interruptible_poll(&wq->wait, flags);
+ }
+}
+
+static int smc_fback_mark_woken(wait_queue_entry_t *wait,
+ unsigned int mode, int sync, void *key)
+{
+ struct smc_mark_woken *mark =
+ container_of(wait, struct smc_mark_woken, wait_entry);
+
+ mark->woken = true;
+ mark->key = key;
+ return 0;
+}
+
+static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
+ void (*clcsock_callback)(struct sock *sk))
+{
+ struct smc_mark_woken mark = { .woken = false };
+ struct socket_wq *wq;
+
+ init_waitqueue_func_entry(&mark.wait_entry,
+ smc_fback_mark_woken);
+ rcu_read_lock();
+ wq = rcu_dereference(clcsk->sk_wq);
+ if (!wq)
+ goto out;
+ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+ clcsock_callback(clcsk);
+ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+
+ if (mark.woken)
+ smc_fback_wakeup_waitqueue(smc, mark.key);
+out:
+ rcu_read_unlock();
+}
+
+static void smc_fback_state_change(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+}
+
+static void smc_fback_data_ready(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+}
+
+static void smc_fback_write_space(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+}
+
+static void smc_fback_error_report(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+}
+
+static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+{
+ struct sock *clcsk;
+ int rc = 0;
+
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ rc = -EBADF;
+ goto out;
+ }
+ clcsk = smc->clcsock->sk;
+
+ if (smc->use_fallback)
+ goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -582,22 +688,41 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
- /* There may be some entries remaining in
- * smc socket->wq, which should be removed
- * to clcsocket->wq during the fallback.
+ /* There might be some wait entries remaining
+ * in smc sk->sk_wq and they should be woken up
+ * as clcsock's wait queue is woken up.
*/
- spin_lock_irqsave(&smc_wait->lock, flags);
- spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
- list_splice_init(&smc_wait->head, &clc_wait->head);
- spin_unlock(&clc_wait->lock);
- spin_unlock_irqrestore(&smc_wait->lock, flags);
+ smc->clcsk_state_change = clcsk->sk_state_change;
+ smc->clcsk_data_ready = clcsk->sk_data_ready;
+ smc->clcsk_write_space = clcsk->sk_write_space;
+ smc->clcsk_error_report = clcsk->sk_error_report;
+
+ clcsk->sk_state_change = smc_fback_state_change;
+ clcsk->sk_data_ready = smc_fback_data_ready;
+ clcsk->sk_write_space = smc_fback_write_space;
+ clcsk->sk_error_report = smc_fback_error_report;
+
+ smc->clcsock->sk->sk_user_data =
+ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
}
+out:
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc, reason_code);
+ struct net *net = sock_net(&smc->sk);
+ int rc = 0;
+
+ rc = smc_switch_to_fallback(smc, reason_code);
+ if (rc) { /* fallback fails */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
+ return rc;
+ }
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -1518,11 +1643,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first);
- if (reason_code < 0) { /* error, no fallback possible */
+ if (reason_code < 0 ||
+ smc_switch_to_fallback(new_smc, reason_code)) {
+ /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1964,8 +2090,11 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
- smc_listen_out_connected(new_smc);
+ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+ if (rc)
+ smc_listen_out_err(new_smc);
+ else
+ smc_listen_out_connected(new_smc);
return;
}
@@ -2094,10 +2223,9 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc;
+ struct smc_sock *lsmc =
+ smc_clcsock_user_data(listen_clcsock);
- lsmc = (struct smc_sock *)
- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
if (!lsmc)
return;
lsmc->clcsk_data_ready(listen_clcsock);
@@ -2254,7 +2382,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ if (rc)
+ goto out;
} else {
rc = -EINVAL;
goto out;
@@ -2447,6 +2577,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
if (unlikely(!smc->clcsock->ops->setsockopt))
rc = -EOPNOTSUPP;
else
@@ -2456,6 +2591,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk_error_report(sk);
}
+ mutex_unlock(&smc->clcsock_release_lock);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2472,7 +2608,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2515,13 +2651,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
+ int rc;
smc = smc_sk(sock->sk);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
/* socket options apply to the CLC socket */
- if (unlikely(!smc->clcsock->ops->getsockopt))
+ if (unlikely(!smc->clcsock->ops->getsockopt)) {
+ mutex_unlock(&smc->clcsock_release_lock);
return -EOPNOTSUPP;
- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ }
+ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
static int smc_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 3d0b8e300deb..37b2001a0255 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -139,6 +139,12 @@ enum smc_urg_state {
SMC_URG_READ = 3, /* data was already read */
};
+struct smc_mark_woken {
+ bool woken;
+ void *key;
+ wait_queue_entry_t wait_entry;
+};
+
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
@@ -228,8 +234,14 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
+ void (*clcsk_state_change)(struct sock *sk);
+ /* original stat_change fct. */
void (*clcsk_data_ready)(struct sock *sk);
- /* original data_ready fct. **/
+ /* original data_ready fct. */
+ void (*clcsk_write_space)(struct sock *sk);
+ /* original write_space fct. */
+ void (*clcsk_error_report)(struct sock *sk);
+ /* original error_report fct. */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
@@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk)
+{
+ return (struct smc_sock *)
+ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index b8898c787d23..1fca2f90a9c7 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
- struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net);
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = link->ibport,
.lnk[0].link_id = link->link_id,
- .lnk[0].net_cookie = net->net_cookie,
};
memcpy(linfo.lnk[0].ibname,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 291f1484a1b7..29f0a559d884 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -113,7 +113,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
pnettable = &sn->pnettable;
/* remove table entry */
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
@@ -131,7 +131,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
rc = 0;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
/* if this is not the initial namespace, stop here */
if (net != &init_net)
@@ -192,7 +192,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
@@ -206,7 +206,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -224,7 +224,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
@@ -237,7 +237,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -368,12 +368,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
- new_pe->ndev = ndev;
- if (ndev)
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
@@ -382,10 +379,15 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
}
}
if (new_netdev) {
+ if (ndev) {
+ new_pe->ndev = ndev;
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
+ GFP_ATOMIC);
+ }
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
goto out_put;
}
@@ -446,7 +448,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
new_pe->ib_port = ib_port;
new_ibdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -456,9 +458,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
}
if (new_ibdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
}
return (new_ibdev) ? 0 : -EEXIST;
@@ -603,7 +605,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
pnettable = &sn->pnettable;
/* dump pnettable entries */
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
@@ -618,7 +620,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return idx;
}
@@ -862,7 +864,7 @@ int smc_pnet_net_init(struct net *net)
struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
- rwlock_init(&pnettable->lock);
+ mutex_init(&pnettable->lock);
INIT_LIST_HEAD(&pnetids_ndev->list);
rwlock_init(&pnetids_ndev->lock);
@@ -942,7 +944,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
@@ -951,7 +953,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1154,7 +1156,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
@@ -1164,7 +1166,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1183,7 +1185,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -1192,7 +1194,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 14039272f7e4..80a88eea4949 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -29,7 +29,7 @@ struct smc_link_group;
* @pnetlist: List of PNETIDs
*/
struct smc_pnettable {
- rwlock_t lock;
+ struct mutex lock;
struct list_head pnetlist;
};
diff --git a/net/socket.c b/net/socket.c
index 50cf75730fd7..982eecad464c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3448,7 +3448,7 @@ EXPORT_SYMBOL(kernel_connect);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is bound.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
@@ -3463,7 +3463,7 @@ EXPORT_SYMBOL(kernel_getsockname);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is connected.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index fe97f3106536..4a4082bb22ad 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
if (ret)
return ret;
- if (!ret) {
- *buf_in = buf;
- *body_size = toksize;
- }
+ *buf_in = buf;
+ *body_size = toksize;
return ret;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a312ea2bc440..c83fe618767c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
- int ret = 0;
+ int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
@@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
+ ident = xprt->xprt_class->ident;
rcu_read_unlock();
+ if (!xprtargs->ident)
+ xprtargs->ident = ident;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ee5336d73fdd..35588f0afa86 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_unlink(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index b1aea3419218..05c758da6a92 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -115,11 +115,14 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
}
sock = container_of(xprt, struct sock_xprt, xprt);
- if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock == NULL ||
+ kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
goto out;
ret = sprintf(buf, "%pISc\n", &saddr);
out:
+ mutex_unlock(&sock->recv_mutex);
xprt_put(xprt);
return ret + 1;
}
@@ -295,8 +298,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
online = 1;
else if (!strncmp(buf, "remove", 6))
remove = 1;
- else
- return -EINVAL;
+ else {
+ count = -EINVAL;
+ goto out_put;
+ }
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
count = -EINTR;
@@ -307,25 +312,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
goto release_tasks;
}
if (offline) {
- set_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive--;
- spin_unlock(&xps->xps_lock);
+ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (online) {
- clear_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive++;
- spin_unlock(&xps->xps_lock);
+ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (remove) {
if (test_bit(XPRT_OFFLINE, &xprt->state)) {
- set_bit(XPRT_REMOVE, &xprt->state);
- xprt_force_disconnect(xprt);
- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
- if (!xprt->sending.qlen &&
- !xprt->pending.qlen &&
- !xprt->backlog.qlen &&
- !atomic_long_read(&xprt->queuelen))
- rpc_xprt_switch_remove_xprt(xps, xprt);
+ if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) {
+ xprt_force_disconnect(xprt);
+ if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+ if (!xprt->sending.qlen &&
+ !xprt->pending.qlen &&
+ !xprt->backlog.qlen &&
+ !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt);
+ }
}
} else {
count = -EINVAL;
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 17f174d6ea3b..faba7136dd9a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -13,10 +13,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
#undef RPCRDMA_BACKCHANNEL_DEBUG
/**
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ff699307e820..515dd7a66a04 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -45,10 +45,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_mr *mr)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8035a983c8ce..281ddb87ac8d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -54,10 +54,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..42e375dbdadb 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -60,10 +60,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/*
* tunables
*/
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3d3673ba9e1e..7b5fce2faa10 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,17 +63,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-/*
- * Globals/Macros
- */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/*
- * internal functions
- */
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
@@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_connect_status = -ENETUNREACH;
goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
- sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
ep->re_connect_status = -ENOTCONN;
@@ -291,8 +278,6 @@ disconnected:
break;
}
- dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap,
- ep->re_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_attr.qp_type = IB_QPT_RC;
ep->re_attr.port_num = ~0;
- dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
- "iovs: send %d recv %d\n",
- __func__,
- ep->re_attr.cap.max_send_wr,
- ep->re_attr.cap.max_recv_wr,
- ep->re_attr.cap.max_send_sge,
- ep->re_attr.cap.max_recv_sge);
-
ep->re_send_batch = ep->re_max_requests >> 3;
ep->re_send_count = ep->re_send_batch;
init_waitqueue_head(&ep->re_connect_wait);
@@ -436,6 +413,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.send_cq)) {
rc = PTR_ERR(ep->re_attr.send_cq);
+ ep->re_attr.send_cq = NULL;
goto out_destroy;
}
@@ -444,6 +422,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.recv_cq)) {
rc = PTR_ERR(ep->re_attr.recv_cq);
+ ep->re_attr.recv_cq = NULL;
goto out_destroy;
}
ep->re_receive_count = 0;
@@ -482,6 +461,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_pd = ib_alloc_pd(device, 0);
if (IS_ERR(ep->re_pd)) {
rc = PTR_ERR(ep->re_pd);
+ ep->re_pd = NULL;
goto out_destroy;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d8ee06a9650a..0f39e08ee580 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1641,7 +1641,12 @@ static int xs_get_srcport(struct sock_xprt *transport)
unsigned short get_srcport(struct rpc_xprt *xprt)
{
struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
- return xs_sock_getport(sock->sock);
+ unsigned short ret = 0;
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock)
+ ret = xs_sock_getport(sock->sock);
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
}
EXPORT_SYMBOL(get_srcport);
@@ -1910,7 +1915,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
- if (RPC_IS_ASYNC(task)) {
+ if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
* filesystem namespace of the process making the rpc
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 9325479295b8..f09316a9035f 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2276,7 +2276,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
struct tipc_aead_key *skey = NULL;
u16 key_gen = msg_key_gen(hdr);
- u16 size = msg_data_sz(hdr);
+ u32 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
unsigned int keylen;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 8d9e09f48f4c..1e14d7f8f28f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
bool reply = msg_probe(hdr), retransmitted = false;
- u16 dlen = msg_data_sz(hdr), glen = 0;
+ u32 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
@@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
void *data;
trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
@@ -2309,7 +2313,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Receive Gap ACK blocks from peer if any */
glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
-
+ if(glen > dlen)
+ break;
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 407619697292..2f4d23238a7e 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index bda902caa814..8267b751a526 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -313,7 +313,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
ua.sr.type, ua.sr.lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 01396dd1c899..1d8ba233d047 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9947b7dfe1d2..6ef95ce565bd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 flags = n->action_flags;
struct list_head *publ_list;
struct tipc_uaddr ua;
- u32 bearer_id;
+ u32 bearer_id, node;
if (likely(!flags)) {
write_unlock_bh(&n->lock);
@@ -413,7 +413,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
TIPC_LINK_STATE, n->addr, n->addr);
sk.ref = n->link_id;
- sk.node = n->addr;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
bearer_id = n->link_id & 0xffff;
publ_list = &n->publ_list;
@@ -423,17 +424,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, sk.node, n->capabilities);
+ tipc_named_node_up(net, node, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- tipc_mon_peer_up(net, sk.node, bearer_id);
+ tipc_mon_peer_up(net, node, bearer_id);
tipc_nametbl_publish(net, &ua, &sk, sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
- tipc_mon_peer_down(net, sk.node, bearer_id);
+ tipc_mon_peer_down(net, node, bearer_id);
tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3e63c83e641c..7545321c3440 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3749,7 +3749,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3235261f138d..38baeb189d4e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1401,6 +1401,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
+ vsock_remove_connected(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3a54c8e6b6c6..f08d4b3bb148 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -332,29 +332,20 @@ static void cfg80211_event_work(struct work_struct *work)
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev, *tmp;
- bool found = false;
ASSERT_RTNL();
- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
if (wdev->nl_owner_dead) {
if (wdev->netdev)
dev_close(wdev->netdev);
- found = true;
- }
- }
-
- if (!found)
- return;
- wiphy_lock(&rdev->wiphy);
- list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
- if (wdev->nl_owner_dead) {
+ wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
rdev_del_virtual_intf(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
}
}
- wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_destroy_iface_wk(struct work_struct *work)