aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/9p/trans_fd.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/caif/caif_dev.c5
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c10
-rw-r--r--net/caif/cfctrl.c4
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/ethtool.c15
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/dccp/minisocks.c6
-rw-r--r--net/dccp/proto.c5
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/inet_timewait_sock.c6
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c17
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_bbr.c12
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_ipv4.c61
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_rate.c10
-rw-r--r--net/ipv4/tcp_recovery.c28
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/exthdrs.c9
-rw-r--r--net/ipv6/ip6_fib.c73
-rw-r--r--net/ipv6/ip6_gre.c58
-rw-r--r--net/ipv6/ip6_output.c17
-rw-r--r--net/ipv6/ip6_tunnel.c26
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/kcm/kcmsock.c68
-rw-r--r--net/mac80211/ht.c7
-rw-r--r--net/mac80211/mesh_hwmp.c15
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c128
-rw-r--r--net/netfilter/nf_conntrack_netlink.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_tables_api.c15
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_bpf.c6
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/openvswitch/flow_netlink.c16
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/rds/rdma.c6
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/ar-internal.h103
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c229
-rw-r--r--net/rxrpc/call_object.c62
-rw-r--r--net/rxrpc/conn_client.c54
-rw-r--r--net/rxrpc/conn_event.c124
-rw-r--r--net/rxrpc/conn_object.c76
-rw-r--r--net/rxrpc/input.c76
-rw-r--r--net/rxrpc/misc.c19
-rw-r--r--net/rxrpc/net_ns.c33
-rw-r--r--net/rxrpc/output.c43
-rw-r--r--net/rxrpc/recvmsg.c12
-rw-r--r--net/rxrpc/sendmsg.c126
-rw-r--r--net/rxrpc/sysctl.c60
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_sample.c14
-rw-r--r--net/sched/cls_api.c20
-rw-r--r--net/sched/cls_bpf.c116
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_generic.c7
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_ingress.c15
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sctp/chunk.c11
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/outqueue.c19
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/socket.c45
-rw-r--r--net/sctp/stream.c101
-rw-r--r--net/sctp/stream_sched.c25
-rw-r--r--net/sctp/stream_sched_prio.c7
-rw-r--r--net/sctp/stream_sched_rr.c7
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/socket.c119
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c5
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/xprt.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c6
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/group.c71
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/server.c3
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/vmw_vsock/hyperv_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c14
-rw-r--r--net/wireless/Kconfig7
-rw-r--r--net/wireless/Makefile39
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/xfrm/xfrm_input.c69
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c26
156 files changed, 2158 insertions, 1046 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430f..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 985046ae4231..80f5c79053a4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
if (IS_ERR(file)) {
pr_err("%s (%d): failed to map fd\n",
__func__, task_pid_nr(current));
- sock_release(csocket);
kfree(p);
return PTR_ERR(file);
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..bbe8414b6ee7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
if_num = if_incoming->if_num;
orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
} else {
neigh_rq_count = 0;
}
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..e0e2bfcd6b3e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
}
orig_gw = batadv_gw_node_get(bat_priv, orig_node);
- if (!orig_node)
+ if (!orig_gw)
goto out;
if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..ebe6e38934e4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
*/
if (skb->priority >= 256 && skb->priority <= 263)
frag_header.priority = skb->priority - 256;
+ else
+ frag_header.priority = 0;
ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..ebc4e2241c77 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
/**
* batadv_tp_sender_timeout - timer that fires in case of packet loss
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
*
* If fired it means that there was packet loss.
* Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
/**
* batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
* reached without received ack
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
*/
static void batadv_tp_receiver_shutdown(struct timer_list *t)
{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
struct net_bridge *br = netdev_priv(dev);
int err;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(&br->lock);
br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
spin_unlock_bh(&br->lock);
}
- err = register_netdevice(dev);
- if (err)
- return err;
-
err = br_changelink(dev, tb, data, extack);
if (err)
- unregister_netdevice(dev);
+ br_dev_delete(dev, NULL);
+
return err;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strncpy(caifd->layer.name, dev->name,
- sizeof(caifd->layer.name) - 1);
- caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+ strlcpy(caifd->layer.name, dev->name,
+ sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
cfcnfg_add_phy_layer(cfg,
dev,
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..1a082a946045 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strncpy(layer->name, dev->name,
- sizeof(layer->name) - 1);
- layer->name[sizeof(layer->name) - 1] = 0;
+ strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d8..8f00bea093b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
- sizeof(l->u.rfm.volume)-1);
- l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+ strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strncpy(l->u.utility.name, s->sockaddr.u.util.service,
- sizeof(l->u.utility.name)-1);
- l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+ strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76..655ed7032150 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strncpy(utility_name, param->u.utility.name,
- UTILITY_NAME_LENGTH - 1);
+ strlcpy(utility_name, param->u.utility.name,
+ UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/core/dev.c b/net/core/dev.c
index 07ed21d64f92..0e0ba36eeac9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return p ? -ENFILE : -EEXIST;
+ return -ENFILE;
}
static int dev_alloc_name_ns(struct net *net,
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- return dev_alloc_name_ns(net, dev, name);
+ BUG_ON(!net);
+
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (strchr(name, '%'))
+ return dev_alloc_name_ns(net, dev, name);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
+
+ return 0;
}
EXPORT_SYMBOL(dev_get_valid_name);
@@ -3904,7 +3916,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
goto do_drop;
- if (troom > 0 && __skb_linearize(skb))
+ if (skb_linearize(skb))
goto do_drop;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e..8225416911ae 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..d339ef170df6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
- /* We are guaranteed to never error here with cBPF to eBPF
- * transitions, since there's no issue with type compatibility
- * checks on program arrays.
- */
fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err_free;
kfree(old_prog);
return fp;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
- get_net(peer);
+ peer = maybe_get_net(peer);
spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..778d7f03404a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
{
struct net *net;
- net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
if (!net)
return ERR_PTR(-EINVAL);
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns.
*/
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
return ERR_PTR(-EACCES);
}
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
ifla_policy, NULL) >= 0) {
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
tgt_net = net;
netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..08f574081315 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
int i, new_frags;
u32 d_off;
- if (!num_frags)
- return 0;
-
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!num_frags)
+ goto release;
+
new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < new_frags; i++) {
page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
skb_shinfo(skb)->nr_frags = new_frags;
+release:
skb_zcopy_clear(skb, false);
return 0;
}
@@ -3654,8 +3655,6 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
- if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
- goto err;
while (pos < offset + len) {
if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
goto err;
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ goto err;
*nskb_frag = *frag;
__skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6..146b50e30659 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..a47ad6cd41c0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
+#else
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219..178bb9833311 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
inet_twsk_put(tw);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06a..9d43c1f40274 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
int err = 0;
const int old_state = sk->sk_state;
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags)
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = NULL;
+ dp->dccps_hc_tx_ccid = NULL;
__skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_write_queue);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 44e3fb7dec8c..1e287420ff49 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
INIT_LIST_HEAD(&dst->list);
list_add_tail(&dsa_tree_list, &dst->list);
- /* Initialize the reference counter to the number of switches, not 1 */
kref_init(&dst->refcount);
- refcount_set(&dst->refcount.refcount, 0);
return dst;
}
@@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst)
kfree(dst);
}
-static struct dsa_switch_tree *dsa_tree_touch(int index)
+static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
{
- struct dsa_switch_tree *dst;
-
- dst = dsa_tree_find(index);
- if (!dst)
- dst = dsa_tree_alloc(index);
+ if (dst)
+ kref_get(&dst->refcount);
return dst;
}
-static void dsa_tree_get(struct dsa_switch_tree *dst)
+static struct dsa_switch_tree *dsa_tree_touch(int index)
{
- kref_get(&dst->refcount);
+ struct dsa_switch_tree *dst;
+
+ dst = dsa_tree_find(index);
+ if (dst)
+ return dsa_tree_get(dst);
+ else
+ return dsa_tree_alloc(index);
}
static void dsa_tree_release(struct kref *ref)
@@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref)
static void dsa_tree_put(struct dsa_switch_tree *dst)
{
- kref_put(&dst->refcount, dsa_tree_release);
+ if (dst)
+ kref_put(&dst->refcount, dsa_tree_release);
}
static bool dsa_port_is_dsa(struct dsa_port *port)
@@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds)
mutex_lock(&dsa2_mutex);
err = dsa_switch_probe(ds);
+ dsa_tree_put(ds->dst);
mutex_unlock(&dsa2_mutex);
return err;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..a95a55f79137 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
#include <linux/of_net.h>
#include <linux/of_mdio.h>
#include <linux/mdio.h>
-#include <linux/list.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..7a93359fbc72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
- u32 val;
+ u32 fi_val, val;
if (!type)
continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
val = nla_get_u32(nla);
}
- if (fi->fib_metrics->metrics[type - 1] != val)
+ fi_val = fi->fib_metrics->metrics[type - 1];
+ if (type == RTAX_FEATURES)
+ fi_val &= ~DST_FEATURE_ECN_CA;
+
+ if (fi_val != val)
return false;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..726f6b608274 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (inet_ifa_match(fl4->saddr, ifa))
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -538,7 +562,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c690cd0d9b3f..b563e0c46bac 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
}
/*
- * Enter the time wait state.
+ * Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
*/
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
- spin_lock_bh(&bhead->lock);
+ spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- spin_unlock_bh(lock);
+ spin_unlock(lock);
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
len = gre_hdr_len + sizeof(*ershdr);
if (unlikely(!pskb_may_pull(skb, len)))
- return -ENOMEM;
+ return PACKET_REJECT;
iph = ip_hdr(skb);
ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &gre_tap_netdev_ops;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..5ddb1cb52bd4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..0c3c944a7b72 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..2e0d339028bb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..69060e3abe85 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
-#ifdef CONFIG_PROC_FS
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+#ifdef CONFIG_PROC_FS
proc_remove(cn->procdir);
cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
+ WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..5e570aa9e43b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -615,12 +622,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -645,7 +652,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags, &ipc.sockc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317e6c97..f08eebe60446 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 69ee877574d0..8322f26e770e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -110,7 +110,8 @@ struct bbr {
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
- full_bw_cnt:3, /* number of rounds without large bw gains */
+ full_bw_reached:1, /* reached full bw in Startup? */
+ full_bw_cnt:2, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk)
{
const struct bbr *bbr = inet_csk_ca(sk);
- return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+ return bbr->full_bw_reached;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk,
return;
}
++bbr->full_bw_cnt;
+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk)
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
+ bbr->full_bw_reached = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp = 0;
@@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
+ bbr->full_bw_cnt = 0;
+ bbr_reset_lt_bw_sampling(sk);
return tcp_sk(sk)->snd_cwnd;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 734cfc8ff76e..45f750e85714 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
- if (m == 0)
- m = 1;
-
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
+ if (!delta_us)
+ delta_us = 1;
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ u32 delta_us;
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
tcp_rcv_rtt_update(tp, delta_us, 0);
}
}
@@ -579,6 +581,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
+ tcp_mstamp_refresh(tp);
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
@@ -1941,6 +1944,8 @@ void tcp_enter_loss(struct sock *sk)
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
+ /* Mark SACK reneging until we recover from this loss event. */
+ tp->is_sack_reneg = 1;
}
tcp_clear_all_retrans_hints(tp);
@@ -2326,6 +2331,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
}
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->undo_marker = 0;
+ tp->rack.advanced = 1; /* Force RACK to re-exam losses */
}
static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2364,6 +2370,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
return true;
}
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
return false;
}
@@ -2397,8 +2404,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
- if (frto_undo || tcp_is_sack(tp))
+ if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+ }
return true;
}
return false;
@@ -3495,6 +3504,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
struct tcp_sacktag_state sack_state;
struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
+ bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
@@ -3611,7 +3621,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, sack_state.rate);
+ tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c6..94e28350f420 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_filter);
+static void tcp_v4_restore_cb(struct sk_buff *skb)
+{
+ memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
+ sizeof(struct inet_skb_parm));
+}
+
+static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
+ const struct tcphdr *th)
+{
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+ sizeof(struct inet_skb_parm));
+ barrier();
+
+ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+ skb->len - th->doff * 4);
+ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+ TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->has_rxtstamp =
+ skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+}
+
/*
* From tcp_input.c
*/
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
- * barrier() makes sure compiler wont play fool^Waliasing games.
- */
- memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
- sizeof(struct inet_skb_parm));
- barrier();
-
- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff * 4);
- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
- TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->has_rxtstamp =
- skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
-
lookup:
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ process:
sock_hold(sk);
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb))
+ if (!tcp_filter(sk, skb)) {
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
+ tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false);
+ }
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
}
if (nsk == sk) {
reqsk_put(req);
+ tcp_v4_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
goto discard_and_relse;
@@ -1712,6 +1727,7 @@ process:
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
+ tcp_v4_fill_cb(skb, iph, th);
skb->dev = NULL;
@@ -1742,6 +1758,8 @@ no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
+ tcp_v4_fill_cb(skb, iph, th);
+
if (tcp_checksum_complete(skb)) {
csum_error:
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ do_time_wait:
goto discard_it;
}
+ tcp_v4_fill_cb(skb, iph, th);
+
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
@@ -1784,6 +1804,7 @@ do_time_wait:
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
+ tcp_v4_restore_cb(skb);
refcounted = false;
goto process;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4..b079b619b60c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
inet_twsk_put(tw);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 3330a370d306..c61240e43923 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct rate_sample *rs)
+ bool is_sack_reneg, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
- /* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp) {
+ /* Return an invalid sample if no timing information is available or
+ * in recovery from loss with SACK reneging. Rate samples taken during
+ * a SACK reneging event may overestimate bw by including packets that
+ * were SACKed before the reneg.
+ */
+ if (!rs->prior_mstamp || is_sack_reneg) {
rs->delivered = -1;
rs->interval_us = -1;
return;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d3ea89020c69..3a81720ac0c4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+ if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
+ min_rtt != ~0U) {
reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
}
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
*/
remaining = tp->rack.rtt_us + reo_wnd -
tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
- if (remaining < 0) {
+ if (remaining <= 0) {
tcp_rack_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
} else {
- /* Record maximum wait time (+1 to avoid 0) */
- *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
+ /* Record maximum wait time */
+ *reo_timeout = max_t(u32, *reo_timeout, remaining);
}
}
}
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
{
u32 rtt_us;
- if (tp->rack.mstamp &&
- !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
- end_seq, tp->rack.end_seq))
- return;
-
rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
- if (sacked & TCPCB_RETRANS) {
+ if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
* retransmission) was sacked.
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
* so it's at least one RTT (i.e., retransmission is at least
* an RTT later).
*/
- if (rtt_us < tcp_min_rtt(tp))
- return;
+ return;
}
- tp->rack.rtt_us = rtt_us;
- tp->rack.mstamp = xmit_time;
- tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
+ tp->rack.rtt_us = rtt_us;
+ if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
+ end_seq, tp->rack.end_seq)) {
+ tp->rack.mstamp = xmit_time;
+ tp->rack.end_seq = end_seq;
+ }
}
/* We have waited long enough to accommodate reordering. Mark the expired
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..968fda198376 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
+ tcp_mstamp_refresh(tcp_sk(sk));
tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
@@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
+ tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
if (tp->linger2 >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
iph->tos, skb->dev))
goto drop;
}
- return dst_input(skb);
+
+ if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+ goto drop;
+
+ return 0;
drop:
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(net);
np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535..bc68eb661970 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
sr_phdr->segments[0] = **addr_p;
*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
+ if (sr_ihdr->hdrlen > hops * 2) {
+ int tlvs_offset, tlvs_length;
+
+ tlvs_offset = (1 + hops * 2) << 3;
+ tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+ memcpy((char *)sr_phdr + tlvs_offset,
+ (char *)sr_ihdr + tlvs_offset, tlvs_length);
+ }
+
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
struct net *net = NULL;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..9dcc3924a975 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
rt6_release(leaf);
+ /* remove null_entry in the root node */
+ } else if (fn->fn_flags & RTN_TL_ROOT &&
+ rcu_access_pointer(fn->leaf) ==
+ net->ipv6.ip6_null_entry) {
+ RCU_INIT_POINTER(fn->leaf, NULL);
}
return fn;
@@ -1241,23 +1246,28 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
- lockdep_is_held(&table->tb6_lock));
- if (pn != fn && pn_leaf == rt) {
- pn_leaf = NULL;
- RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
- }
- if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
- pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (pn != fn) {
+ struct rt6_info *pn_leaf =
+ rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ atomic_dec(&rt->rt6i_ref);
}
+ if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table,
+ pn);
+#if RT6_DEBUG >= 2
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf =
+ info->nl_net->ipv6.ip6_null_entry;
+ }
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
- rcu_assign_pointer(pn->leaf, pn_leaf);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
+ }
}
#endif
goto failure;
@@ -1265,13 +1275,17 @@ out:
return err;
failure:
- /* fn->leaf could be NULL if fn is an intermediate node and we
- * failed to add the new route to it in both subtree creation
- * failure and fib6_add_rt2node() failure case.
- * In both cases, fib6_repair_tree() should be called to fix
- * fn->leaf.
+ /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+ * 1. fn is an intermediate node and we failed to add the new
+ * route to it in both subtree creation failure and fib6_add_rt2node()
+ * failure case.
+ * 2. fn is the root node in the table and we fail to add the first
+ * default route to it.
*/
- if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ if (fn &&
+ (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+ (fn->fn_flags & RTN_TL_ROOT &&
+ !rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
@@ -1526,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
struct fib6_walker *w;
int iter = 0;
+ /* Set fn->leaf to null_entry for root node. */
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+ return fn;
+ }
+
for (;;) {
struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- /* If it was last route, expunge its radix tree node */
+ /* If it was last route, call fib6_repair_tree() to:
+ * 1. For root node, put back null_entry as how the table was created.
+ * 2. For other nodes, expunge its radix tree node.
+ */
if (!rcu_access_pointer(fn->leaf)) {
- fn->fn_flags &= ~RTN_RTINFO;
- net->ipv6.rt6_stats->fib_route_nodes--;
+ if (!(fn->fn_flags & RTN_TL_ROOT)) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ }
fn = fib6_repair_tree(net, table, fn);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..772695960890 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
eth_random_addr(dev->perm_addr);
}
+#define GRE6_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+
+ dev->features |= GRE6_FEATURES;
+ dev->hw_features |= GRE6_FEATURES;
+
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+ nt->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ ip6gre_tnl_init_features(dev);
+
return 0;
}
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
-#define GRE6_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM)
-
static void ip6gre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6gre_tap_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
- dev->features |= GRE6_FEATURES;
- dev->hw_features |= GRE6_FEATURES;
-
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- (nt->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
-
err = register_netdevice(dev);
if (err)
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..688ba5f7516b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1727,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
-
+ }
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2..9a7cf355bc8c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -904,7 +904,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
if (t->parms.collect_md) {
tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
- return 0;
+ goto drop;
}
ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
@@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
@@ -1123,8 +1124,13 @@ route_lookup:
max_headroom += 8;
mtu -= 8;
}
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ } else if (mtu < 576) {
+ mtu = 576;
+ }
+
if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
@@ -1671,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
- if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < ETH_MIN_MTU)
+ if (tnl->parms.proto == IPPROTO_IPV6) {
+ if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
} else {
- if (new_mtu < IPV6_MIN_MTU)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..844642682b83 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..1d7ae9366335 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target masquerade_tg6_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV6,
.checkentry = masquerade_tg6_checkentry,
+ .destroy = masquerade_tg6_destroy,
.target = masquerade_tg6,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
}
rt->dst.flags |= DST_HOST;
+ rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- if (!fibmatch)
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_input_lookup(net, dev, &fl6, flags);
rcu_read_unlock();
} else {
fl6.flowi6_oif = oif;
- if (!fibmatch)
- dst = ip6_route_output(net, NULL, &fl6);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_output(net, NULL, &fl6);
}
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (fibmatch && rt->dst.from) {
+ struct rt6_info *ort = container_of(rt->dst.from,
+ struct rt6_info, dst);
+
+ dst_hold(&ort->dst);
+ ip6_rt_put(rt);
+ rt = ort;
+ }
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
ip6_rt_put(rt);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe2..d7dc23c1b2ca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
if (t->parms.link != p->link || t->fwmark != fwmark) {
t->parms.link = p->link;
t->fwmark = fwmark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe..7178476b3d2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
@@ -1454,7 +1454,6 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- tcp_v6_fill_cb(skb, hdr, th);
if (tcp_v6_inbound_md5_hash(sk, skb)) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1467,8 +1466,12 @@ process:
sock_hold(sk);
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb))
+ if (!tcp_filter(sk, skb)) {
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);
+ tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false);
+ }
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
@@ -1492,8 +1495,6 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- tcp_v6_fill_cb(skb, hdr, th);
-
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
@@ -1501,6 +1502,7 @@ process:
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
+ tcp_v6_fill_cb(skb, hdr, th);
skb->dev = NULL;
@@ -1590,7 +1592,6 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_restore_cb(skb);
tcp_v6_send_reset(sk, skb);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
}
EXPORT_SYMBOL(xfrm6_rcv_spi);
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (xfrm_trans_queue(skb, ip6_rcv_finish))
+ __kfree_skb(skb);
+ return -1;
+}
+
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_rcv_finish);
+ xfrm6_transport_finish2);
return -1;
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b750a22c4b9..d4e98f20fc2a 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1625,60 +1625,30 @@ static struct proto kcm_proto = {
};
/* Clone a kcm socket. */
-static int kcm_clone(struct socket *osock, struct kcm_clone *info,
- struct socket **newsockp)
+static struct file *kcm_clone(struct socket *osock)
{
struct socket *newsock;
struct sock *newsk;
- struct file *newfile;
- int err, newfd;
- err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out;
+ return ERR_PTR(-ENFILE);
newsock->type = osock->type;
newsock->ops = osock->ops;
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(0);
- if (unlikely(newfd < 0)) {
- err = newfd;
- goto out_fd_fail;
- }
-
- newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (IS_ERR(newfile)) {
- err = PTR_ERR(newfile);
- goto out_sock_alloc_fail;
- }
-
newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
&kcm_proto, true);
if (!newsk) {
- err = -ENOMEM;
- goto out_sk_alloc_fail;
+ sock_release(newsock);
+ return ERR_PTR(-ENOMEM);
}
-
sock_init_data(newsock, newsk);
init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
- fd_install(newfd, newfile);
- *newsockp = newsock;
- info->fd = newfd;
-
- return 0;
-
-out_sk_alloc_fail:
- fput(newfile);
-out_sock_alloc_fail:
- put_unused_fd(newfd);
-out_fd_fail:
- sock_release(newsock);
-out:
- return err;
+ return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
}
static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1708,17 +1678,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
case SIOCKCMCLONE: {
struct kcm_clone info;
- struct socket *newsock = NULL;
-
- err = kcm_clone(sock, &info, &newsock);
- if (!err) {
- if (copy_to_user((void __user *)arg, &info,
- sizeof(info))) {
- err = -EFAULT;
- sys_close(info.fd);
- }
- }
+ struct file *file;
+
+ info.fd = get_unused_fd_flags(0);
+ if (unlikely(info.fd < 0))
+ return info.fd;
+ file = kcm_clone(sock);
+ if (IS_ERR(file)) {
+ put_unused_fd(info.fd);
+ return PTR_ERR(file);
+ }
+ if (copy_to_user((void __user *)arg, &info,
+ sizeof(info))) {
+ put_unused_fd(info.fd);
+ fput(file);
+ return -EFAULT;
+ }
+ fd_install(info.fd, file);
+ err = 0;
break;
}
default:
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 41f5e48f8021..1621b6ab17ba 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
int i;
mutex_lock(&sta->ampdu_mlme.mtx);
- for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- ___ieee80211_stop_tx_ba_session(sta, i, reason);
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
WLAN_REASON_QSTA_LEAVE_QBSS,
reason != AGG_STOP_DESTROY_STA &&
reason != AGG_STOP_PEER_REQUEST);
- }
+
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
+ ___ieee80211_stop_tx_ba_session(sta, i, reason);
mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 4f7826d7b47c..4394463a0c2e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
struct mesh_path *mpath;
u8 ttl, flags, hopcount;
const u8 *orig_addr;
- u32 orig_sn, metric, metric_txsta, interval;
+ u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval;
bool root_is_gate;
ttl = rann->rann_ttl;
@@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
interval = le32_to_cpu(rann->rann_interval);
hopcount = rann->rann_hopcount;
hopcount++;
- metric = le32_to_cpu(rann->rann_metric);
+ orig_metric = le32_to_cpu(rann->rann_metric);
/* Ignore our own RANNs */
if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
return;
}
- metric_txsta = airtime_link_metric_get(local, sta);
+ last_hop_metric = airtime_link_metric_get(local, sta);
+ new_metric = orig_metric + last_hop_metric;
+ if (new_metric < orig_metric)
+ new_metric = MAX_METRIC;
mpath = mesh_path_lookup(sdata, orig_addr);
if (!mpath) {
@@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
}
if (!(SN_LT(mpath->sn, orig_sn)) &&
- !(mpath->sn == orig_sn && metric < mpath->rann_metric)) {
+ !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) {
rcu_read_unlock();
return;
}
@@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
}
mpath->sn = orig_sn;
- mpath->rann_metric = metric + metric_txsta;
+ mpath->rann_metric = new_metric;
mpath->is_root = true;
/* Recording RANNs sender address to send individually
* addressed PREQs destined for root mesh STA */
@@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
orig_sn, 0, NULL, 0, broadcast_addr,
hopcount, ttl, interval,
- metric + metric_txsta, 0, sdata);
+ new_metric, 0, sdata);
}
rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 04460440d731..c244691deab9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif);
+ skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
if (!skb)
return;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbe..4daafb07602f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
}
return true;
case NL80211_IFTYPE_MESH_POINT:
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+ return false;
if (multicast)
return true;
return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7b8154474b9e..3160954fc406 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
EXPORT_SYMBOL(ieee80211_pspoll_get);
struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ bool qos_ok)
{
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_if_managed *ifmgd;
struct ieee80211_local *local;
struct sk_buff *skb;
+ bool qos = false;
if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
return NULL;
@@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
ifmgd = &sdata->u.mgd;
local = sdata->local;
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc));
+ if (qos_ok) {
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ sta = sta_info_get(sdata, ifmgd->bssid);
+ qos = sta && sta->sta.wme;
+ rcu_read_unlock();
+ }
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(*nullfunc) + 2);
if (!skb)
return NULL;
@@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
IEEE80211_STYPE_NULLFUNC |
IEEE80211_FCTL_TODS);
+ if (qos) {
+ __le16 qos = cpu_to_le16(7);
+
+ BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
+ IEEE80211_STYPE_NULLFUNC) !=
+ IEEE80211_STYPE_QOS_NULLFUNC);
+ nullfunc->frame_control |=
+ cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
+ skb->priority = 7;
+ skb_set_queue_mapping(skb, IEEE80211_AC_VO);
+ skb_put_data(skb, &qos, sizeof(qos));
+ }
+
memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..dc6347342e34 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
-#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
static unsigned int get_len(struct bitstr *bs);
static unsigned int get_bit(struct bitstr *bs);
static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
return v;
}
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+ bits += bs->bit;
+ bytes += bits / BITS_PER_BYTE;
+ if (bits % BITS_PER_BYTE > 0)
+ bytes++;
+
+ if (*bs->cur + bytes > *bs->end)
+ return 1;
+
+ return 0;
+}
+
/****************************************************************************/
static unsigned int get_bit(struct bitstr *bs)
{
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
INC_BIT(bs);
-
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
+
len = *bs->cur++;
bs->cur += len;
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
- CHECK_BOUND(bs, 0);
return H323_ERROR_NONE;
}
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
bs->cur += 2;
break;
case CONS: /* 64K < Range < 4G */
+ if (nf_h323_error_boundary(bs, 0, 2))
+ return H323_ERROR_BOUND;
len = get_bits(bs, 2) + 1;
BYTE_ALIGN(bs);
if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
break;
case UNCO:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
bs->cur += len;
break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
INC_BITS(bs, f->sz);
}
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
len = f->lb;
break;
case WORD: /* 2-byte length */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) << 8;
len += (*bs->cur++) + f->lb;
break;
case SEMI:
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
break;
default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len >> 3;
bs->bit = len & 7;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
/* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
INC_BITS(bs, (len << 2));
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
break;
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len << 1;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Extensible? */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
ext = (f->attr & EXT) ? get_bit(bs) : 0;
/* Get fields bitmap */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Decode */
if (son->attr & OPEN) { /* Open field */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
/* Get the extension bitmap */
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
bmp2_len = get_bits(bs, 7) + 1;
- CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+ if (nf_h323_error_boundary(bs, 0, bmp2_len))
+ return H323_ERROR_BOUND;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
/* Check Range */
if (i >= f->ub) { /* Newer Version? */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
continue;
}
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
if (!((0x80000000 >> opt) & bmp2)) /* Not present */
continue;
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
break;
case WORD:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
count <<= 8;
count += *bs->cur++;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = get_len(bs);
break;
default:
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
count = get_bits(bs, f->sz);
break;
}
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
for (i = 0; i < count; i++) {
if (son->attr & OPEN) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Decode the choice index number */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
if ((f->attr & EXT) && get_bit(bs)) {
ext = 1;
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
type = get_bits(bs, 7) + f->lb;
} else {
ext = 0;
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
type = get_bits(bs, f->sz);
if (type >= f->lb)
return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
/* Check Range */
if (type >= f->ub) { /* Newer version? */
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
return H323_ERROR_NONE;
}
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
if (ext || (son->attr & OPEN)) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..382d49792f42 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
static int ctnetlink_change_timeout(struct nf_conn *ct,
const struct nlattr * const cda[])
{
- u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+ u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
- ct->timeout = nfct_time_stamp + timeout * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = nfct_time_stamp + (u32)timeout;
if (test_bit(IPS_DYING_BIT, &ct->status))
return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
int err = -EINVAL;
struct nf_conntrack_helper *helper;
struct nf_conn_tstamp *tstamp;
+ u64 timeout;
ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = (u32)timeout + nfct_time_stamp;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..37ef35b861f2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
timeout = timeouts[TCP_CONNTRACK_UNACK];
+ else if (ct->proto.tcp.last_win == 0 &&
+ timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+ timeout = timeouts[TCP_CONNTRACK_RETRANS];
else
timeout = timeouts[new_state];
spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..07bd4138c84e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain[0] &&
+ if (ctx && ctx->chain &&
strcmp(ctx->chain, chain->name) != 0)
continue;
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_filter *filter = cb->data;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}
@@ -5847,6 +5849,12 @@ static int __net_init nf_tables_init_net(struct net *net)
return 0;
}
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
@@ -5917,6 +5925,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
+#include <linux/capability.h>
#include <net/netlink.h>
#include <net/sock.h>
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
int ret = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
bool tuple_set = false;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth, *n;
int j = 0, ret;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..e955bec0acc6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
static void __net_exit nfnl_log_net_exit(struct net *net)
{
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
+ unsigned int i;
+
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
#endif
nf_log_unset(net, &nfulnl_logger);
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
}
static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..c09b36755ed7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
static void __net_exit nfnl_queue_net_exit(struct net *net)
{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int i;
+
nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
}
static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
[NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
[NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
};
static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..55802e97f906 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
return 0;
}
+static void __net_exit xt_net_exit(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
+ .exit = xt_net_exit,
};
static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index fa2ca0a13619..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
{
struct sock_fprog_kern program;
+ if (len > XT_BPF_MAX_NUM_INSTR)
+ return -EINVAL;
+
program.len = len;
program.filter = insns;
@@ -52,6 +55,9 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
{
+ if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+ return -EINVAL;
+
*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
return PTR_ERR_OR_ZERO(*ret);
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/capability.h>
#include <linux/if.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *kf = NULL, *sf;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *sf;
int err = -ENOENT;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..79cc1bf36e4a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99cfafc2a139..ef38e5aecd28 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
- unsigned short gso_type = skb_shinfo(skb)->gso_type;
+ unsigned int gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
int err;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -EINVAL;
skb_reset_network_header(skb);
+ key->eth.type = skb->protocol;
} else {
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
- skb->protocol = parse_ethertype(skb);
- if (unlikely(skb->protocol == htons(0)))
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
+ /* Multiple tagged packets need to retain TPID to satisfy
+ * skb_vlan_pop(), which will later shift the ethertype into
+ * skb->protocol.
+ */
+ if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+ skb->protocol = key->eth.cvlan.tpid;
+ else
+ skb->protocol = key->eth.type;
+
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
}
skb_reset_mac_len(skb);
- key->eth.type = skb->protocol;
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc424798ba6f..624ea74353dd 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2241,14 +2241,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
- return ERR_PTR(-EINVAL);
- }
+ WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@@ -2321,12 +2318,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = ksize(*sfa) * 2;
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+ if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ OVS_NLERR(log, "Flow action size exceeds max %u",
+ MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
+ }
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = nla_alloc_flow_actions(new_acts_size, log);
+ acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return (void *)acts;
@@ -3059,7 +3059,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
{
int err;
- *sfa = nla_alloc_flow_actions(nla_len(attr), log);
+ *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 737092ca9b4e..da215e5c1399 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
- po->rollover = rollover;
}
if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
@@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
+ po->rollover = rollover;
+ rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
__fanout_link(sk, po);
err = 0;
@@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
out:
- if (err && rollover) {
- kfree_rcu(rollover, rcu);
- po->rollover = NULL;
- }
+ kfree(rollover);
mutex_unlock(&fanout_mutex);
return err;
}
@@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk)
list_del(&f->list);
else
f = NULL;
-
- if (po->rollover) {
- kfree_rcu(po->rollover, rcu);
- po->rollover = NULL;
- }
}
mutex_unlock(&fanout_mutex);
@@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock)
synchronize_net();
if (f) {
+ kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3097,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
if (need_rehook) {
if (po->running) {
rcu_read_unlock();
+ /* prevents packet_notifier() from calling
+ * register_prot_hook()
+ */
+ po->num = 0;
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3105,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
dev->ifindex);
}
+ BUG_ON(po->running);
po->num = proto;
po->prot_hook.type = proto;
@@ -3843,7 +3842,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
- struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3922,18 +3920,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- rcu_read_lock();
- rollover = rcu_dereference(po->rollover);
- if (rollover) {
- rstats.tp_all = atomic_long_read(&rollover->num);
- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
- }
- rcu_read_unlock();
- if (!rollover)
+ if (!po->rollover)
return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 562fbc155006..a1d2b2319ae9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -95,7 +95,6 @@ struct packet_fanout {
struct packet_rollover {
int sock;
- struct rcu_head rcu;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8886f15abe90..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
long i;
int ret;
- if (rs->rs_bound_addr == 0) {
+ if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..f72466c63f0c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
continue;
if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ if (cmsg->cmsg_len <
+ CMSG_LEN(sizeof(struct rds_rdma_args)))
+ return -EINVAL;
args = CMSG_DATA(cmsg);
*rdma_bytes += args->remote_vec.bytes;
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9b5c46b052fd..dcd818fa837e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
bool upgrade)
{
struct rxrpc_conn_parameters cp;
+ struct rxrpc_call_params p;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
@@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
if (key && !key->payload.data[0])
key = NULL; /* a no-security key */
+ memset(&p, 0, sizeof(p));
+ p.user_call_ID = user_call_ID;
+ p.tx_total_len = tx_total_len;
+
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
cp.key = key;
@@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.exclusive = false;
cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
- gfp);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
@@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
static int rxrpc_release_sock(struct sock *sk)
{
struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
@@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk)
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
+ /* We want to kill off all connections from a service socket
+ * as fast as possible because we can't share these; client
+ * sockets, on the other hand, can share an endpoint.
+ */
+ switch (sk->sk_state) {
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_BOUND2:
+ case RXRPC_SERVER_LISTENING:
+ case RXRPC_SERVER_LISTEN_DISABLED:
+ rx->local->service_closed = true;
+ break;
+ }
+
spin_lock_bh(&sk->sk_receive_queue.lock);
sk->sk_state = RXRPC_CLOSE;
spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_release_calls_on_socket(rx);
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+ rxrpc_queue_work(&rxnet->client_conn_reaper);
rxrpc_put_local(rx->local);
rx->local = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b2151993d384..416688381eb7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -79,17 +79,20 @@ struct rxrpc_net {
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
- struct delayed_work service_conn_reaper;
+ struct work_struct service_conn_reaper;
+ struct timer_list service_conn_reap_timer;
unsigned int nr_client_conns;
unsigned int nr_active_client_conns;
bool kill_all_client_conns;
+ bool live;
spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
struct list_head waiting_client_conns;
struct list_head active_client_conns;
struct list_head idle_client_conns;
- struct delayed_work client_conn_reaper;
+ struct work_struct client_conn_reaper;
+ struct timer_list client_conn_reap_timer;
struct list_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -265,6 +268,7 @@ struct rxrpc_local {
rwlock_t services_lock; /* lock for services list */
int debug_id; /* debug ID for printks */
bool dead;
+ bool service_closed; /* Service socket closed */
struct sockaddr_rxrpc srx; /* local address */
};
@@ -338,8 +342,17 @@ enum rxrpc_conn_flag {
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
+ RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */
+ RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */
+ RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */
+ RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */
};
+#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_1) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_2) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_3))
+
/*
* Events that can be raised upon a connection.
*/
@@ -393,6 +406,7 @@ struct rxrpc_connection {
#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
struct list_head waiting_calls; /* Calls waiting for channels */
struct rxrpc_channel {
+ unsigned long final_ack_at; /* Time at which to issue final ACK */
struct rxrpc_call __rcu *call; /* Active call */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
@@ -404,6 +418,7 @@ struct rxrpc_connection {
};
} channels[RXRPC_MAXCALLS];
+ struct timer_list timer; /* Conn event timer */
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
@@ -457,9 +472,10 @@ enum rxrpc_call_flag {
enum rxrpc_call_event {
RXRPC_CALL_EV_ACK, /* need to generate ACK */
RXRPC_CALL_EV_ABORT, /* need to generate abort */
- RXRPC_CALL_EV_TIMER, /* Timer expired */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
RXRPC_CALL_EV_PING, /* Ping send required */
+ RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
+ RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
};
/*
@@ -503,10 +519,16 @@ struct rxrpc_call {
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct mutex user_mutex; /* User access mutex */
- ktime_t ack_at; /* When deferred ACK needs to happen */
- ktime_t resend_at; /* When next resend needs to happen */
- ktime_t ping_at; /* When next to send a ping */
- ktime_t expire_at; /* When the call times out */
+ unsigned long ack_at; /* When deferred ACK needs to happen */
+ unsigned long ack_lost_at; /* When ACK is figured as lost */
+ unsigned long resend_at; /* When next resend needs to happen */
+ unsigned long ping_at; /* When next to send a ping */
+ unsigned long keepalive_at; /* When next to send a keepalive ping */
+ unsigned long expect_rx_by; /* When we expect to get a packet by */
+ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
+ unsigned long expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -609,6 +631,8 @@ struct rxrpc_call {
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
+ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
+ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
};
/*
@@ -632,6 +656,35 @@ struct rxrpc_ack_summary {
u8 cumulative_acks;
};
+/*
+ * sendmsg() cmsg-specified parameters.
+ */
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+};
+
+struct rxrpc_call_params {
+ s64 tx_total_len; /* Total Tx data length (if send data) */
+ unsigned long user_call_ID; /* User's call ID */
+ struct {
+ u32 hard; /* Maximum lifetime (sec) */
+ u32 idle; /* Max time since last data packet (msec) */
+ u32 normal; /* Max time since last call packet (msec) */
+ } timeouts;
+ u8 nr_timeouts; /* Number of timeouts specified */
+};
+
+struct rxrpc_send_params {
+ struct rxrpc_call_params call;
+ u32 abort_code; /* Abort code to Tx (if abort) */
+ enum rxrpc_command command : 8; /* The command to implement */
+ bool exclusive; /* Shared or exclusive call */
+ bool upgrade; /* If the connection is upgradeable */
+};
+
#include <trace/events/rxrpc.h>
/*
@@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
-void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
+static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ trace_rxrpc_timer(call, why, now);
+ timer_reduce(&call->timer, expire_at);
+}
+
/*
* call_object.c
*/
@@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime;
extern struct kmem_cache *rxrpc_call_jar;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
-struct rxrpc_call *rxrpc_alloc_call(gfp_t);
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
- unsigned long, s64, gfp_t);
+ struct rxrpc_call_params *, gfp_t);
int rxrpc_retry_client_call(struct rxrpc_sock *,
struct rxrpc_call *,
struct rxrpc_conn_parameters *,
@@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
*/
extern unsigned int rxrpc_max_client_connections;
extern unsigned int rxrpc_reap_client_connections;
-extern unsigned int rxrpc_conn_idle_client_expiry;
-extern unsigned int rxrpc_conn_idle_client_fast_expiry;
+extern unsigned long rxrpc_conn_idle_client_expiry;
+extern unsigned long rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
@@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *);
* conn_object.c
*/
extern unsigned int rxrpc_connection_expiry;
+extern unsigned int rxrpc_closed_conn_expiry;
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
@@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
rxrpc_put_service_conn(conn);
}
+static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
+ unsigned long expire_at)
+{
+ timer_reduce(&conn->timer, expire_at);
+}
+
/*
* conn_service.c
*/
@@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local)
* misc.c
*/
extern unsigned int rxrpc_max_backlog __read_mostly;
-extern unsigned int rxrpc_requested_ack_delay;
-extern unsigned int rxrpc_soft_ack_delay;
-extern unsigned int rxrpc_idle_ack_delay;
+extern unsigned long rxrpc_requested_ack_delay;
+extern unsigned long rxrpc_soft_ack_delay;
+extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
-extern unsigned int rxrpc_resend_timeout;
+extern unsigned long rxrpc_resend_timeout;
extern const s8 rxrpc_ack_priority[];
@@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
+int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
void rxrpc_reject_packets(struct rxrpc_local *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index cbd1701e813a..3028298ca561 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
/* Now it gets complicated, because calls get registered with the
* socket here, particularly if a user ID is preassigned by the user.
*/
- call = rxrpc_alloc_call(gfp);
+ call = rxrpc_alloc_call(rx, gfp);
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3574508baf9a..ad2ab1103189 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,80 +22,6 @@
#include "ar-internal.h"
/*
- * Set the timer
- */
-void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- ktime_t now)
-{
- unsigned long t_j, now_j = jiffies;
- ktime_t t;
- bool queue = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- t = call->expire_at;
- if (!ktime_after(t, now)) {
- trace_rxrpc_timer(call, why, now, now_j);
- queue = true;
- goto out;
- }
-
- if (!ktime_after(call->resend_at, now)) {
- call->resend_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- queue = true;
- } else if (ktime_before(call->resend_at, t)) {
- t = call->resend_at;
- }
-
- if (!ktime_after(call->ack_at, now)) {
- call->ack_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
- queue = true;
- } else if (ktime_before(call->ack_at, t)) {
- t = call->ack_at;
- }
-
- if (!ktime_after(call->ping_at, now)) {
- call->ping_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
- queue = true;
- } else if (ktime_before(call->ping_at, t)) {
- t = call->ping_at;
- }
-
- t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
- t_j += jiffies;
-
- /* We have to make sure that the calculated jiffies value falls
- * at or after the nsec value, or we may loop ceaselessly
- * because the timer times out, but we haven't reached the nsec
- * timeout yet.
- */
- t_j++;
-
- if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
- mod_timer(&call->timer, t_j);
- trace_rxrpc_timer(call, why, now, now_j);
- }
- }
-
-out:
- if (queue)
- rxrpc_queue_call(call);
-}
-
-/*
- * Set the timer
- */
-void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- ktime_t now)
-{
- read_lock_bh(&call->state_lock);
- __rxrpc_set_timer(call, why, now);
- read_unlock_bh(&call->state_lock);
-}
-
-/*
* Propose a PING ACK be sent.
*/
static void rxrpc_propose_ping(struct rxrpc_call *call,
@@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call,
!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
rxrpc_queue_call(call);
} else {
- ktime_t now = ktime_get_real();
- ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay);
+ unsigned long now = jiffies;
+ unsigned long ping_at = now + rxrpc_idle_ack_delay;
- if (ktime_before(ping_at, call->ping_at)) {
- call->ping_at = ping_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now);
+ if (time_before(ping_at, call->ping_at)) {
+ WRITE_ONCE(call->ping_at, ping_at);
+ rxrpc_reduce_call_timer(call, ping_at, now,
+ rxrpc_timer_set_for_ping);
}
}
}
@@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
enum rxrpc_propose_ack_trace why)
{
enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
- unsigned int expiry = rxrpc_soft_ack_delay;
- ktime_t now, ack_at;
+ unsigned long expiry = rxrpc_soft_ack_delay;
s8 prior = rxrpc_ack_priority[ack_reason];
/* Pings are handled specially because we don't want to accidentally
@@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
background)
rxrpc_queue_call(call);
} else {
- now = ktime_get_real();
- ack_at = ktime_add_ms(now, expiry);
- if (ktime_before(ack_at, call->ack_at)) {
- call->ack_at = ack_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now);
+ unsigned long now = jiffies, ack_at;
+
+ if (call->peer->rtt_usage > 0)
+ ack_at = nsecs_to_jiffies(call->peer->rtt);
+ else
+ ack_at = expiry;
+
+ ack_at += now;
+ if (time_before(ack_at, call->ack_at)) {
+ WRITE_ONCE(call->ack_at, ack_at);
+ rxrpc_reduce_call_timer(call, ack_at, now,
+ rxrpc_timer_set_for_ack);
}
}
@@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
+static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
- ktime_t max_age, oldest, ack_ts;
+ ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
int ix;
u8 annotation, anno_type, retrans = 0, unacked = 0;
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- max_age = ktime_sub_ms(now, rxrpc_resend_timeout);
+ if (call->peer->rtt_usage > 1)
+ timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
+ else
+ timeout = ms_to_ktime(rxrpc_resend_timeout);
+ min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
+ if (ktime_before(timeout, min_timeo))
+ timeout = min_timeo;
+
+ now = ktime_get_real();
+ max_age = ktime_sub(now, timeout);
spin_lock_bh(&call->lock);
@@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
- call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
+ resend_at += jiffies + rxrpc_resend_timeout;
+ WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
rxrpc_congestion_timeout(call);
@@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
* retransmitting data.
*/
if (!retrans) {
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_ns(ack_ts) < call->peer->rtt)
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
- rxrpc_send_ack_packet(call, true);
+ rxrpc_send_ack_packet(call, true, NULL);
goto out;
}
@@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work)
{
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, processor);
- ktime_t now;
+ rxrpc_serial_t *send_ack;
+ unsigned long now, next, t;
rxrpc_see_call(call);
@@ -384,22 +331,89 @@ recheck_state:
goto out_put;
}
- now = ktime_get_real();
- if (ktime_before(call->expire_at, now)) {
+ /* Work out if any timeouts tripped */
+ now = jiffies;
+ t = READ_ONCE(call->expect_rx_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->expect_req_by);
+ if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->expect_term_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->ack_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
+ cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_ACK, &call->events);
+ }
+
+ t = READ_ONCE(call->ack_lost_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
+ cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
+ }
+
+ t = READ_ONCE(call->keepalive_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
+ cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
+ rxrpc_propose_ack_ping_for_keepalive);
+ set_bit(RXRPC_CALL_EV_PING, &call->events);
+ }
+
+ t = READ_ONCE(call->ping_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
+ cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_PING, &call->events);
+ }
+
+ t = READ_ONCE(call->resend_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
+ cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_RESEND, &call->events);
+ }
+
+ /* Process events */
+ if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+ send_ack = NULL;
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
+ call->acks_lost_top = call->tx_top;
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_lost_ack);
+ send_ack = &call->acks_lost_ping;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
+ send_ack) {
if (call->ackr_reason) {
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, send_ack);
goto recheck_state;
}
}
if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
- rxrpc_send_ack_packet(call, true);
+ rxrpc_send_ack_packet(call, true, NULL);
goto recheck_state;
}
@@ -408,7 +422,24 @@ recheck_state:
goto recheck_state;
}
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
+ /* Make sure the timer is restarted */
+ next = call->expect_rx_by;
+
+#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+
+ set(call->expect_req_by);
+ set(call->expect_term_by);
+ set(call->ack_at);
+ set(call->ack_lost_at);
+ set(call->resend_at);
+ set(call->keepalive_at);
+ set(call->ping_at);
+
+ now = jiffies;
+ if (time_after_eq(now, next))
+ goto recheck_state;
+
+ rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
/* other events may have been raised since we started checking */
if (call->events && call->state < RXRPC_CALL_COMPLETE) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 994dc2df57e4..0b2db38dd32d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -51,10 +51,14 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE)
- rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real());
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
+ rxrpc_queue_call(call);
+ }
}
+static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -95,7 +99,7 @@ found_extant_call:
/*
* allocate a new call
*/
-struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
{
struct rxrpc_call *call;
@@ -114,6 +118,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
goto nomem_2;
mutex_init(&call->user_mutex);
+
+ /* Prevent lockdep reporting a deadlock false positive between the afs
+ * filesystem and sys_sendmsg() via the mmap sem.
+ */
+ if (rx->sk.sk_kern_sock)
+ lockdep_set_class(&call->user_mutex,
+ &rxrpc_call_user_mutex_lock_class_key);
+
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
INIT_WORK(&call->processor, &rxrpc_process_call);
INIT_LIST_HEAD(&call->link);
@@ -128,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -150,7 +164,8 @@ nomem:
/*
* Allocate a new client call.
*/
-static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
+static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
struct rxrpc_call *call;
@@ -158,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
_enter("");
- call = rxrpc_alloc_call(gfp);
+ call = rxrpc_alloc_call(rx, gfp);
if (!call)
return ERR_PTR(-ENOMEM);
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -177,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
*/
static void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- ktime_t now = ktime_get_real(), expire_at;
-
- expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
- call->expire_at = expire_at;
- call->ack_at = expire_at;
- call->ping_at = expire_at;
- call->resend_at = expire_at;
- call->timer.expires = jiffies + LONG_MAX / 2;
- rxrpc_set_timer(call, rxrpc_timer_begin, now);
+ unsigned long now = jiffies;
+ unsigned long j = now + MAX_JIFFY_OFFSET;
+
+ call->ack_at = j;
+ call->ack_lost_at = j;
+ call->resend_at = j;
+ call->ping_at = j;
+ call->expect_rx_by = j;
+ call->expect_req_by = j;
+ call->expect_term_by = j;
+ call->timer.expires = now;
}
/*
@@ -196,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
struct sockaddr_rxrpc *srx,
- unsigned long user_call_ID,
- s64 tx_total_len,
+ struct rxrpc_call_params *p,
gfp_t gfp)
__releases(&rx->sk.sk_lock.slock)
{
@@ -207,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
const void *here = __builtin_return_address(0);
int ret;
- _enter("%p,%lx", rx, user_call_ID);
+ _enter("%p,%lx", rx, p->user_call_ID);
- call = rxrpc_alloc_client_call(srx, gfp);
+ call = rxrpc_alloc_client_call(rx, srx, gfp);
if (IS_ERR(call)) {
release_sock(&rx->sk);
_leave(" = %ld", PTR_ERR(call));
return call;
}
- call->tx_total_len = tx_total_len;
+ call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
- here, (const void *)user_call_ID);
+ here, (const void *)p->user_call_ID);
/* We need to protect a partially set up call against the user as we
* will be acting outside the socket lock.
@@ -234,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
parent = *pp;
xcall = rb_entry(parent, struct rxrpc_call, sock_node);
- if (user_call_ID < xcall->user_call_ID)
+ if (p->user_call_ID < xcall->user_call_ID)
pp = &(*pp)->rb_left;
- else if (user_call_ID > xcall->user_call_ID)
+ else if (p->user_call_ID > xcall->user_call_ID)
pp = &(*pp)->rb_right;
else
goto error_dup_user_ID;
}
rcu_assign_pointer(call->socket, rx);
- call->user_call_ID = user_call_ID;
+ call->user_call_ID = p->user_call_ID;
__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
rxrpc_get_call(call, rxrpc_call_got_userid);
rb_link_node(&call->sock_node, parent, pp);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 5f9624bd311c..7f74ca3059f8 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -85,8 +85,8 @@
__read_mostly unsigned int rxrpc_max_client_connections = 1000;
__read_mostly unsigned int rxrpc_reap_client_connections = 900;
-__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
-__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
/*
* We use machine-unique IDs for our client connections.
@@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
+ /* Cancel the final ACK on the previous call if it hasn't been sent yet
+ * as the DATA packet will implicitly ACK it.
+ */
+ clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+
write_lock_bh(&call->state_lock);
if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
@@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call,
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work);
+ rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
rxrpc_cull_active_client_conns(rxnet);
ret = rxrpc_get_client_conn(call, cp, srx, gfp);
@@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
}
/*
+ * Set the reap timer.
+ */
+static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
+{
+ unsigned long now = jiffies;
+ unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
+
+ if (rxnet->live)
+ timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
+}
+
+/*
* Disconnect a client call.
*/
void rxrpc_disconnect_client_call(struct rxrpc_call *call)
@@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
goto out_2;
}
+ /* Schedule the final ACK to be transmitted in a short while so that it
+ * can be skipped if we find a follow-on call. The first DATA packet
+ * of the follow on call will implicitly ACK this call.
+ */
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ unsigned long final_ack_at = jiffies + 2;
+
+ WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
+ set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+ rxrpc_reduce_conn_timer(conn, final_ack_at);
+ }
+
/* Things are more complex and we need the cache lock. We might be
* able to simply idle the conn or it might now be lurking on the wait
* list. It might even get moved back to the active list whilst we're
@@ -878,9 +908,7 @@ idle_connection:
list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
if (rxnet->idle_client_conns.next == &conn->cache_link &&
!rxnet->kill_all_client_conns)
- queue_delayed_work(rxrpc_workqueue,
- &rxnet->client_conn_reaper,
- rxrpc_conn_idle_client_expiry);
+ rxrpc_set_client_reap_timer(rxnet);
} else {
trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
@@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet =
- container_of(to_delayed_work(work),
- struct rxrpc_net, client_conn_reaper);
+ container_of(work, struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
bool did_discard = false;
@@ -1061,6 +1088,8 @@ next:
expiry = rxrpc_conn_idle_client_expiry;
if (nr_conns > rxrpc_reap_client_connections)
expiry = rxrpc_conn_idle_client_fast_expiry;
+ if (conn->params.local->service_closed)
+ expiry = rxrpc_closed_conn_expiry * HZ;
conn_expires_at = conn->idle_timestamp + expiry;
@@ -1096,9 +1125,8 @@ not_yet_expired:
*/
_debug("not yet");
if (!rxnet->kill_all_client_conns)
- queue_delayed_work(rxrpc_workqueue,
- &rxnet->client_conn_reaper,
- conn_expires_at - now);
+ timer_reduce(&rxnet->client_conn_reap_timer,
+ conn_expires_at);
out:
spin_unlock(&rxnet->client_conn_cache_lock);
@@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
rxnet->kill_all_client_conns = true;
spin_unlock(&rxnet->client_conn_cache_lock);
- cancel_delayed_work(&rxnet->client_conn_reaper);
+ del_timer_sync(&rxnet->client_conn_reap_timer);
- if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0))
+ if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
_debug("destroy: queue failed");
_leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 59a51a56e7c8..4ca11be6be3c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -24,31 +24,28 @@
* Retransmit terminal ACK or ABORT of the previous call.
*/
static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int channel)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
struct msghdr msg;
- struct kvec iov;
+ struct kvec iov[3];
struct {
struct rxrpc_wire_header whdr;
union {
- struct {
- __be32 code;
- } abort;
- struct {
- struct rxrpc_ackpacket ack;
- u8 padding[3];
- struct rxrpc_ackinfo info;
- };
+ __be32 abort_code;
+ struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
+ struct rxrpc_ackinfo ack_info;
size_t len;
- u32 serial, mtu, call_id;
+ int ioc;
+ u32 serial, mtu, call_id, padding;
_enter("%d", conn->debug_id);
- chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+ chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
@@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
call_id = READ_ONCE(chan->last_call);
/* Sync with __rxrpc_disconnect_call() */
smp_rmb();
- if (call_id != sp->hdr.callNumber)
+ if (skb && call_id != sp->hdr.callNumber)
return;
msg.msg_name = &conn->params.peer->srx.transport;
@@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
msg.msg_controllen = 0;
msg.msg_flags = 0;
- pkt.whdr.epoch = htonl(sp->hdr.epoch);
- pkt.whdr.cid = htonl(sp->hdr.cid);
- pkt.whdr.callNumber = htonl(sp->hdr.callNumber);
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt.whdr);
+ iov[1].iov_base = &padding;
+ iov[1].iov_len = 3;
+ iov[2].iov_base = &ack_info;
+ iov[2].iov_len = sizeof(ack_info);
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(conn->proto.cid);
+ pkt.whdr.callNumber = htonl(call_id);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
@@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
len = sizeof(pkt.whdr);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
- pkt.abort.code = htonl(chan->last_abort);
- len += sizeof(pkt.abort);
+ pkt.abort_code = htonl(chan->last_abort);
+ iov[0].iov_len += sizeof(pkt.abort_code);
+ len += sizeof(pkt.abort_code);
+ ioc = 1;
break;
case RXRPC_PACKET_TYPE_ACK:
mtu = conn->params.peer->if_mtu;
mtu -= conn->params.peer->hdrsize;
pkt.ack.bufferSpace = 0;
- pkt.ack.maxSkew = htons(skb->priority);
- pkt.ack.firstPacket = htonl(chan->last_seq);
- pkt.ack.previousPacket = htonl(chan->last_seq - 1);
- pkt.ack.serial = htonl(sp->hdr.serial);
- pkt.ack.reason = RXRPC_ACK_DUPLICATE;
+ pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
+ pkt.ack.firstPacket = htonl(chan->last_seq + 1);
+ pkt.ack.previousPacket = htonl(chan->last_seq);
+ pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
+ pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- pkt.info.rxMTU = htonl(rxrpc_rx_mtu);
- pkt.info.maxMTU = htonl(mtu);
- pkt.info.rwind = htonl(rxrpc_rx_window_size);
- pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ ack_info.rxMTU = htonl(rxrpc_rx_mtu);
+ ack_info.maxMTU = htonl(mtu);
+ ack_info.rwind = htonl(rxrpc_rx_window_size);
+ ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
- len += sizeof(pkt.ack) + sizeof(pkt.info);
+ padding = 0;
+ iov[0].iov_len += sizeof(pkt.ack);
+ len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ ioc = 3;
break;
+
+ default:
+ return;
}
/* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
if (READ_ONCE(chan->last_call) != call_id)
return;
- iov.iov_base = &pkt;
- iov.iov_len = len;
-
serial = atomic_inc_return(&conn->serial);
pkt.whdr.serial = htonl(serial);
@@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
}
- kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+ kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
_leave("");
return;
}
@@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb);
+ rxrpc_conn_retransmit_call(conn, skb,
+ sp->hdr.cid & RXRPC_CHANNELMASK);
return 0;
case RXRPC_PACKET_TYPE_BUSY:
@@ -379,6 +389,48 @@ abort:
}
/*
+ * Process delayed final ACKs that we haven't subsumed into a subsequent call.
+ */
+static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
+{
+ unsigned long j = jiffies, next_j;
+ unsigned int channel;
+ bool set;
+
+again:
+ next_j = j + LONG_MAX;
+ set = false;
+ for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ unsigned long ack_at;
+
+ if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
+ continue;
+
+ smp_rmb(); /* vs rxrpc_disconnect_client_call */
+ ack_at = READ_ONCE(chan->final_ack_at);
+
+ if (time_before(j, ack_at)) {
+ if (time_before(ack_at, next_j)) {
+ next_j = ack_at;
+ set = true;
+ }
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
+ &conn->flags))
+ rxrpc_conn_retransmit_call(conn, NULL, channel);
+ }
+
+ j = jiffies;
+ if (time_before_eq(next_j, j))
+ goto again;
+ if (set)
+ rxrpc_reduce_conn_timer(conn, next_j);
+}
+
+/*
* connection-level event processor
*/
void rxrpc_process_connection(struct work_struct *work)
@@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work)
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn);
+
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index fe575798592f..c628351eb900 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -20,10 +20,19 @@
/*
* Time till a connection expires after last use (in seconds).
*/
-unsigned int rxrpc_connection_expiry = 10 * 60;
+unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
+unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
static void rxrpc_destroy_connection(struct rcu_head *);
+static void rxrpc_connection_timer(struct timer_list *timer)
+{
+ struct rxrpc_connection *conn =
+ container_of(timer, struct rxrpc_connection, timer);
+
+ rxrpc_queue_conn(conn);
+}
+
/*
* allocate a new connection
*/
@@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
INIT_LIST_HEAD(&conn->cache_link);
spin_lock_init(&conn->channel_lock);
INIT_LIST_HEAD(&conn->waiting_calls);
+ timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
@@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
}
/*
+ * Set the service connection reap timer.
+ */
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at)
+{
+ if (rxnet->live)
+ timer_reduce(&rxnet->service_conn_reap_timer, reap_at);
+}
+
+/*
* Release a service connection
*/
void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet;
const void *here = __builtin_return_address(0);
int n;
n = atomic_dec_return(&conn->usage);
trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
ASSERTCMP(n, >=, 0);
- if (n == 0) {
- rxnet = conn->params.local->rxnet;
- rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0);
- }
+ if (n == 1)
+ rxrpc_set_service_reap_timer(conn->params.local->rxnet,
+ jiffies + rxrpc_connection_expiry);
}
/*
@@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
_net("DESTROY CONN %d", conn->debug_id);
+ del_timer_sync(&conn->timer);
rxrpc_purge_queue(&conn->rx_queue);
conn->security->clear(conn);
@@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
struct rxrpc_net *rxnet =
- container_of(to_delayed_work(work),
- struct rxrpc_net, service_conn_reaper);
- unsigned long reap_older_than, earliest, idle_timestamp, now;
+ container_of(work, struct rxrpc_net, service_conn_reaper);
+ unsigned long expire_at, earliest, idle_timestamp, now;
LIST_HEAD(graveyard);
_enter("");
now = jiffies;
- reap_older_than = now - rxrpc_connection_expiry * HZ;
- earliest = ULONG_MAX;
+ earliest = now + MAX_JIFFY_OFFSET;
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
@@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
- idle_timestamp = READ_ONCE(conn->idle_timestamp);
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long)reap_older_than - (long)idle_timestamp);
-
- if (time_after(idle_timestamp, reap_older_than)) {
- if (time_before(idle_timestamp, earliest))
- earliest = idle_timestamp;
- continue;
+ if (rxnet->live) {
+ idle_timestamp = READ_ONCE(conn->idle_timestamp);
+ expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
+ if (conn->params.local->service_closed)
+ expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
+
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long)expire_at - (long)now);
+
+ if (time_before(now, expire_at)) {
+ if (time_before(expire_at, earliest))
+ earliest = expire_at;
+ continue;
+ }
}
/* The usage count sits at 1 whilst the object is unused on the
@@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
*/
if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
continue;
+ trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
if (rxrpc_conn_is_client(conn))
BUG();
@@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
}
write_unlock(&rxnet->conn_lock);
- if (earliest != ULONG_MAX) {
- _debug("reschedule reaper %ld", (long) earliest - now);
+ if (earliest != now + MAX_JIFFY_OFFSET) {
+ _debug("reschedule reaper %ld", (long)earliest - (long)now);
ASSERT(time_after(earliest, now));
- rxrpc_queue_delayed_work(&rxnet->client_conn_reaper,
- earliest - now);
+ rxrpc_set_service_reap_timer(rxnet, earliest);
}
while (!list_empty(&graveyard)) {
@@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
rxrpc_destroy_all_client_connections(rxnet);
- rxrpc_connection_expiry = 0;
- cancel_delayed_work(&rxnet->client_conn_reaper);
- rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
+ del_timer_sync(&rxnet->service_conn_reap_timer);
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
flush_workqueue(rxrpc_workqueue);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1b592073ec96..6fc61400337f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -318,16 +318,18 @@ bad_state:
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
struct rxrpc_ack_summary summary = { 0 };
+ unsigned long now, timo;
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
spin_lock_bh(&call->lock);
call->ackr_reason = 0;
- call->resend_at = call->expire_at;
- call->ack_at = call->expire_at;
spin_unlock_bh(&call->lock);
- rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
- ktime_get_real());
+ now = jiffies;
+ timo = now + MAX_JIFFY_OFFSET;
+ WRITE_ONCE(call->resend_at, timo);
+ WRITE_ONCE(call->ack_at, timo);
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
@@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
if (state >= RXRPC_CALL_COMPLETE)
return;
+ if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ unsigned long timo = READ_ONCE(call->next_req_timo);
+ unsigned long now, expect_req_by;
+
+ if (timo) {
+ now = jiffies;
+ expect_req_by = now + timo;
+ WRITE_ONCE(call->expect_req_by, expect_req_by);
+ rxrpc_reduce_call_timer(call, expect_req_by, now,
+ rxrpc_timer_set_for_idle);
+ }
+ }
+
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
@@ -616,6 +631,43 @@ found:
}
/*
+ * Process the response to a ping that we sent to find out if we lost an ACK.
+ *
+ * If we got back a ping response that indicates a lower tx_top than what we
+ * had at the time of the ping transmission, we adjudge all the DATA packets
+ * sent between the response tx_top and the ping-time tx_top to have been lost.
+ */
+static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
+{
+ rxrpc_seq_t top, bottom, seq;
+ bool resend = false;
+
+ spin_lock_bh(&call->lock);
+
+ bottom = call->tx_hard_ack + 1;
+ top = call->acks_lost_top;
+ if (before(bottom, top)) {
+ for (seq = bottom; before_eq(seq, top); seq++) {
+ int ix = seq & RXRPC_RXTX_BUFF_MASK;
+ u8 annotation = call->rxtx_annotations[ix];
+ u8 anno_type = annotation & RXRPC_TX_ANNO_MASK;
+
+ if (anno_type != RXRPC_TX_ANNO_UNACK)
+ continue;
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ annotation |= RXRPC_TX_ANNO_RETRANS;
+ call->rxtx_annotations[ix] = annotation;
+ resend = true;
+ }
+ }
+
+ spin_unlock_bh(&call->lock);
+
+ if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ rxrpc_queue_call(call);
+}
+
+/*
* Process a ping response.
*/
static void rxrpc_input_ping_response(struct rxrpc_call *call,
@@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
smp_rmb();
ping_serial = call->ping_serial;
+ if (orig_serial == call->acks_lost_ping)
+ rxrpc_input_check_for_lost_ack(call);
+
if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
before(orig_serial, ping_serial))
return;
@@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
struct sk_buff *skb, u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned long timo;
_enter("%p,%p", call, skb);
+ timo = READ_ONCE(call->next_rx_timo);
+ if (timo) {
+ unsigned long now = jiffies, expect_rx_by;
+
+ expect_rx_by = jiffies + timo;
+ WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+ rxrpc_reduce_call_timer(call, expect_rx_by, now,
+ rxrpc_timer_set_for_normal);
+ }
+
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
rxrpc_input_data(call, skb, skew);
@@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
goto reupgrade;
conn->service_id = sp->hdr.serviceId;
}
-
+
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1a2d4b112064..c1d9e7fd7448 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,33 +21,28 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * Maximum lifetime of a call (in mx).
- */
-unsigned int rxrpc_max_call_lifetime = 60 * 1000;
-
-/*
* How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in ms).
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
*/
-unsigned int rxrpc_requested_ack_delay = 1;
+unsigned long rxrpc_requested_ack_delay = 1;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in ms).
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned int rxrpc_soft_ack_delay = 1 * 1000;
+unsigned long rxrpc_soft_ack_delay = HZ;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in ms).
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned int rxrpc_idle_ack_delay = 0.5 * 1000;
+unsigned long rxrpc_idle_ack_delay = HZ / 2;
/*
* Receive window size in packets. This indicates the maximum number of
@@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4;
/*
* Time till packet resend (in milliseconds).
*/
-unsigned int rxrpc_resend_timeout = 4 * 1000;
+unsigned long rxrpc_resend_timeout = 4 * HZ;
const s8 rxrpc_ack_priority[] = {
[0] = 0,
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 7edceb8522f5..f18c9248e0d4 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -14,6 +14,24 @@
unsigned int rxrpc_net_id;
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, client_conn_reap_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->client_conn_reaper);
+}
+
+static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, service_conn_reap_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+}
+
/*
* Initialise a per-network namespace record.
*/
@@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net)
struct rxrpc_net *rxnet = rxrpc_net(net);
int ret;
+ rxnet->live = true;
get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
rxnet->epoch |= RXRPC_RANDOM_EPOCH;
@@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
- INIT_DELAYED_WORK(&rxnet->service_conn_reaper,
- rxrpc_service_connection_reaper);
+ INIT_WORK(&rxnet->service_conn_reaper,
+ rxrpc_service_connection_reaper);
+ timer_setup(&rxnet->service_conn_reap_timer,
+ rxrpc_service_conn_reap_timeout, 0);
rxnet->nr_client_conns = 0;
rxnet->nr_active_client_conns = 0;
@@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->waiting_client_conns);
INIT_LIST_HEAD(&rxnet->active_client_conns);
INIT_LIST_HEAD(&rxnet->idle_client_conns);
- INIT_DELAYED_WORK(&rxnet->client_conn_reaper,
- rxrpc_discard_expired_client_conns);
+ INIT_WORK(&rxnet->client_conn_reaper,
+ rxrpc_discard_expired_client_conns);
+ timer_setup(&rxnet->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
INIT_LIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
@@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net)
return 0;
err_proc:
+ rxnet->live = false;
return ret;
}
@@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net)
{
struct rxrpc_net *rxnet = rxrpc_net(net);
+ rxnet->live = false;
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_locals(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f47659c7b224..42410e910aff 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -33,6 +33,24 @@ struct rxrpc_abort_buffer {
};
/*
+ * Arrange for a keepalive ping a certain time after we last transmitted. This
+ * lets the far side know we're still interested in this call and helps keep
+ * the route through any intervening firewall open.
+ *
+ * Receiving a response to the ping will prevent the ->expect_rx_by timer from
+ * expiring.
+ */
+static void rxrpc_set_keepalive(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+
+ keepalive_at += now;
+ WRITE_ONCE(call->keepalive_at, keepalive_at);
+ rxrpc_reduce_call_timer(call, keepalive_at, now,
+ rxrpc_timer_set_for_keepalive);
+}
+
+/*
* Fill out an ACK packet.
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
@@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
/*
* Send an ACK call packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
+ rxrpc_serial_t *_serial)
{
struct rxrpc_connection *conn = NULL;
struct rxrpc_ack_buffer *pkt;
@@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
ntohl(pkt->ack.firstPacket),
ntohl(pkt->ack.serial),
pkt->ack.reason, pkt->ack.nAcks);
+ if (_serial)
+ *_serial = serial;
if (ping) {
call->ping_serial = serial;
@@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
call->ackr_seen = top;
spin_unlock_bh(&call->lock);
}
+
+ rxrpc_set_keepalive(call);
}
out:
@@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
* ACKs if a DATA packet appears to have been lost.
*/
if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
- (retrans ||
+ (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
+ retrans ||
call->cong_mode == RXRPC_CALL_SLOW_START ||
(call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
@@ -370,8 +394,23 @@ done:
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = now;
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
+ if (call->peer->rtt_usage > 1) {
+ unsigned long nowj = jiffies, ack_lost_at;
+
+ ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
+ if (ack_lost_at < 1)
+ ack_lost_at = 1;
+
+ ack_lost_at += nowj;
+ WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+ rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+ rxrpc_timer_set_for_lost_ack);
+ }
}
}
+
+ rxrpc_set_keepalive(call);
+
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8510a98b87e1..cc21e8db25b0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
+#if 0 // TODO: May want to transmit final ACK under some circumstances anyway
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
rxrpc_propose_ack_terminal_ack);
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, NULL);
}
+#endif
write_lock_bh(&call->state_lock);
@@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->tx_phase = true;
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- call->ack_at = call->expire_at;
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
write_unlock_bh(&call->state_lock);
rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
rxrpc_propose_ack_processing_op);
@@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
after_eq(top, call->ackr_seen + 2) ||
(hard_ack == top && after(hard_ack, call->ackr_consumed)))
rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
- true, false,
+ true, true,
rxrpc_propose_ack_rotate_rx);
- if (call->ackr_reason)
- rxrpc_send_ack_packet(call, false);
+ if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
+ rxrpc_send_ack_packet(call, false, NULL);
}
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 7d2595582c09..09f2a3e05221 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -21,22 +21,6 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-enum rxrpc_command {
- RXRPC_CMD_SEND_DATA, /* send data message */
- RXRPC_CMD_SEND_ABORT, /* request abort generation */
- RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
- RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
-};
-
-struct rxrpc_send_params {
- s64 tx_total_len; /* Total Tx data length (if send data) */
- unsigned long user_call_ID; /* User's call ID */
- u32 abort_code; /* Abort code to Tx (if abort) */
- enum rxrpc_command command : 8; /* The command to implement */
- bool exclusive; /* Shared or exclusive call */
- bool upgrade; /* If the connection is upgradeable */
-};
-
/*
* Wait for space to appear in the Tx queue or a signal to occur.
*/
@@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned long now;
rxrpc_seq_t seq = sp->hdr.seq;
int ret, ix;
u8 annotation = RXRPC_TX_ANNO_UNACK;
@@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
break;
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- call->ack_at = call->expire_at;
+ now = jiffies;
+ WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
- __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply,
- ktime_get_real());
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
if (!last)
break;
/* Fall through */
@@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
- ktime_t now = ktime_get_real(), resend_at;
-
- resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
-
- if (ktime_before(resend_at, call->resend_at)) {
- call->resend_at = resend_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
- }
+ unsigned long now = jiffies, resend_at;
+
+ if (call->peer->rtt_usage > 1)
+ resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
+ else
+ resend_at = rxrpc_resend_timeout;
+ if (resend_at < 1)
+ resend_at = 1;
+
+ resend_at += now;
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
}
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
@@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, NULL);
if (!skb) {
size_t size, chunk, max, space;
@@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
if (msg->msg_flags & MSG_CMSG_COMPAT) {
if (len != sizeof(u32))
return -EINVAL;
- p->user_call_ID = *(u32 *)CMSG_DATA(cmsg);
+ p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg);
} else {
if (len != sizeof(unsigned long))
return -EINVAL;
- p->user_call_ID = *(unsigned long *)
+ p->call.user_call_ID = *(unsigned long *)
CMSG_DATA(cmsg);
}
got_user_ID = true;
@@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
break;
case RXRPC_TX_LENGTH:
- if (p->tx_total_len != -1 || len != sizeof(__s64))
+ if (p->call.tx_total_len != -1 || len != sizeof(__s64))
+ return -EINVAL;
+ p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
+ if (p->call.tx_total_len < 0)
return -EINVAL;
- p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
- if (p->tx_total_len < 0)
+ break;
+
+ case RXRPC_SET_CALL_TIMEOUT:
+ if (len & 3 || len < 4 || len > 12)
return -EINVAL;
+ memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len);
+ p->call.nr_timeouts = len / 4;
+ if (p->call.timeouts.hard > INT_MAX / HZ)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000)
+ return -ERANGE;
break;
default:
@@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
if (!got_user_ID)
return -EINVAL;
- if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
+ if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
return -EINVAL;
_leave(" = 0");
return 0;
@@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID,
- p->tx_total_len, GFP_KERNEL);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
/* The socket is now unlocked */
_leave(" = %p\n", call);
@@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
{
enum rxrpc_call_state state;
struct rxrpc_call *call;
+ unsigned long now, j;
int ret;
struct rxrpc_send_params p = {
- .tx_total_len = -1,
- .user_call_ID = 0,
- .abort_code = 0,
- .command = RXRPC_CMD_SEND_DATA,
- .exclusive = false,
- .upgrade = true,
+ .call.tx_total_len = -1,
+ .call.user_call_ID = 0,
+ .call.nr_timeouts = 0,
+ .abort_code = 0,
+ .command = RXRPC_CMD_SEND_DATA,
+ .exclusive = false,
+ .upgrade = false,
};
_enter("");
@@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -EINVAL;
if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
goto error_release_sock;
- call = rxrpc_accept_call(rx, p.user_call_ID, NULL);
+ call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL);
/* The socket is now unlocked. */
if (IS_ERR(call))
return PTR_ERR(call);
- rxrpc_put_call(call, rxrpc_call_put);
- return 0;
+ ret = 0;
+ goto out_put_unlock;
}
- call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID);
+ call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID);
if (!call) {
ret = -EBADSLT;
if (p.command != RXRPC_CMD_SEND_DATA)
@@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
goto error_put;
}
- if (p.tx_total_len != -1) {
+ if (p.call.tx_total_len != -1) {
ret = -EINVAL;
if (call->tx_total_len != -1 ||
call->tx_pending ||
call->tx_top != 0)
goto error_put;
- call->tx_total_len = p.tx_total_len;
+ call->tx_total_len = p.call.tx_total_len;
+ }
+ }
+
+ switch (p.call.nr_timeouts) {
+ case 3:
+ j = msecs_to_jiffies(p.call.timeouts.normal);
+ if (p.call.timeouts.normal > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_rx_timo, j);
+ /* Fall through */
+ case 2:
+ j = msecs_to_jiffies(p.call.timeouts.idle);
+ if (p.call.timeouts.idle > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_req_timo, j);
+ /* Fall through */
+ case 1:
+ if (p.call.timeouts.hard > 0) {
+ j = msecs_to_jiffies(p.call.timeouts.hard);
+ now = jiffies;
+ j += now;
+ WRITE_ONCE(call->expect_term_by, j);
+ rxrpc_reduce_call_timer(call, j, now,
+ rxrpc_timer_set_for_hard);
}
+ break;
}
state = READ_ONCE(call->state);
@@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_data(rx, call, msg, len, NULL);
}
+out_put_unlock:
mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 34c706d2f79c..4a7af7aff37d 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,6 +21,8 @@ static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
+static const unsigned long one_jiffy = 1;
+static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
/*
* RxRPC operating parameters.
@@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
- /* Values measured in milliseconds */
+ /* Values measured in milliseconds but used in jiffies */
{
.procname = "req_ack_delay",
.data = &rxrpc_requested_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&zero,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_ack_delay",
.data = &rxrpc_idle_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
- },
- {
- .procname = "resend_timeout",
- .data = &rxrpc_resend_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_conn_expiry",
.data = &rxrpc_conn_idle_client_expiry,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_conn_fast_expiry",
.data = &rxrpc_conn_idle_client_fast_expiry,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
-
- /* Values measured in seconds but used in jiffies */
{
- .procname = "max_call_lifetime",
- .data = &rxrpc_max_call_lifetime,
- .maxlen = sizeof(unsigned int),
+ .procname = "resend_timeout",
+ .data = &rxrpc_resend_timeout,
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
/* Non-time values */
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..a0ac42b3ed06 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbmark_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..08b61849c2a2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32f..9438969290a6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+static void tcf_sample_cleanup(struct tc_action *a, int bind)
{
- struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+ struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
- psample_group = rcu_dereference_protected(s->psample_group, 1);
+ psample_group = rtnl_dereference(s->psample_group);
RCU_INIT_POINTER(s->psample_group, NULL);
psample_group_put(psample_group);
}
-static void tcf_sample_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_sample *s = to_sample(a);
-
- call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
-}
-
static bool tcf_sample_dev_ok_push(struct net_device *dev)
{
switch (dev->type) {
@@ -264,7 +257,6 @@ static int __init sample_init_module(void)
static void __exit sample_cleanup_module(void)
{
- rcu_barrier();
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7d97f612c9b9..b9d63d2246e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
-#include <linux/err.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -336,7 +335,8 @@ static void tcf_block_put_final(struct work_struct *work)
struct tcf_chain *chain, *tmp;
rtnl_lock();
- /* Only chain 0 should be still here. */
+
+ /* At this point, all the chains should have refcnt == 1. */
list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
tcf_chain_put(chain);
rtnl_unlock();
@@ -344,15 +344,23 @@ static void tcf_block_put_final(struct work_struct *work)
}
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
- * actions should be all removed after flushing. However, filters are now
- * destroyed in tc filter workqueue with RTNL lock, they can not race here.
+ * actions should be all removed after flushing.
*/
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain, *tmp;
+ struct tcf_chain *chain;
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ if (!block)
+ return;
+ /* Hold a refcnt for all chains, except 0, so that they don't disappear
+ * while we are iterating.
+ */
+ list_for_each_entry(chain, &block->chain_list, list)
+ if (chain->index)
+ tcf_chain_hold(chain);
+
+ list_for_each_entry(chain, &block->chain_list, list)
tcf_chain_flush(chain);
tcf_block_offload_unbind(block, q, ei);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a9f3e317055c..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
- bool offloaded;
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- enum tc_clsbpf_command cmd)
+ struct cls_bpf_prog *oldprog)
{
- bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
struct tcf_block *block = tp->chain->block;
- bool skip_sw = tc_skip_sw(prog->gen_flags);
struct tc_cls_bpf_offload cls_bpf = {};
+ struct cls_bpf_prog *obj;
+ bool skip_sw;
int err;
+ skip_sw = prog && tc_skip_sw(prog->gen_flags);
+ obj = prog ?: oldprog;
+
tc_cls_common_offload_init(&cls_bpf.common, tp);
- cls_bpf.command = cmd;
- cls_bpf.exts = &prog->exts;
- cls_bpf.prog = prog->filter;
- cls_bpf.name = prog->bpf_name;
- cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
+ cls_bpf.command = TC_CLSBPF_OFFLOAD;
+ cls_bpf.exts = &obj->exts;
+ cls_bpf.prog = prog ? prog->filter : NULL;
+ cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+ cls_bpf.name = obj->bpf_name;
+ cls_bpf.exts_integrated = obj->exts_integrated;
+ cls_bpf.gen_flags = obj->gen_flags;
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
- if (addorrep) {
+ if (prog) {
if (err < 0) {
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ cls_bpf_offload_cmd(tp, oldprog, prog);
return err;
} else if (err > 0) {
prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
}
}
- if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+ if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
return -EINVAL;
return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- struct cls_bpf_prog *obj = prog;
- enum tc_clsbpf_command cmd;
- bool skip_sw;
- int ret;
-
- skip_sw = tc_skip_sw(prog->gen_flags) ||
- (oldprog && tc_skip_sw(oldprog->gen_flags));
-
- if (oldprog && oldprog->offloaded) {
- if (!tc_skip_hw(prog->gen_flags)) {
- cmd = TC_CLSBPF_REPLACE;
- } else if (!tc_skip_sw(prog->gen_flags)) {
- obj = oldprog;
- cmd = TC_CLSBPF_DESTROY;
- } else {
- return -EINVAL;
- }
- } else {
- if (tc_skip_hw(prog->gen_flags))
- return skip_sw ? -EINVAL : 0;
- cmd = TC_CLSBPF_ADD;
- }
-
- ret = cls_bpf_offload_cmd(tp, obj, cmd);
- if (ret)
- return ret;
+ if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+ return -EINVAL;
- obj->offloaded = true;
- if (oldprog)
- oldprog->offloaded = false;
+ if (prog && tc_skip_hw(prog->gen_flags))
+ prog = NULL;
+ if (oldprog && tc_skip_hw(oldprog->gen_flags))
+ oldprog = NULL;
+ if (!prog && !oldprog)
+ return 0;
- return 0;
+ return cls_bpf_offload_cmd(tp, prog, oldprog);
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
{
int err;
- if (!prog->offloaded)
- return;
-
- err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
- if (err) {
+ err = cls_bpf_offload_cmd(tp, NULL, prog);
+ if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
- return;
- }
-
- prog->offloaded = false;
}
static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
- if (!prog->offloaded)
- return;
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_bpf_offload cls_bpf = {};
+
+ tc_cls_common_offload_init(&cls_bpf.common, tp);
+ cls_bpf.command = TC_CLSBPF_STATS;
+ cls_bpf.exts = &prog->exts;
+ cls_bpf.prog = prog->filter;
+ cls_bpf.name = prog->bpf_name;
+ cls_bpf.exts_integrated = prog->exts_integrated;
+ cls_bpf.gen_flags = prog->gen_flags;
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
static int cls_bpf_init(struct tcf_proto *tp)
@@ -258,11 +241,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
return 0;
}
-static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
+static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
{
- tcf_exts_destroy(&prog->exts);
- tcf_exts_put_net(&prog->exts);
-
if (cls_bpf_is_ebpf(prog))
bpf_prog_put(prog->filter);
else
@@ -270,6 +250,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
kfree(prog->bpf_name);
kfree(prog->bpf_ops);
+}
+
+static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
+{
+ tcf_exts_destroy(&prog->exts);
+ tcf_exts_put_net(&prog->exts);
+
+ cls_bpf_free_parms(prog);
kfree(prog);
}
@@ -514,12 +502,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout_idr;
ret = cls_bpf_offload(tp, prog, oldprog);
- if (ret) {
- if (!oldprog)
- idr_remove_ext(&head->handle_idr, prog->handle);
- __cls_bpf_delete_prog(prog);
- return ret;
- }
+ if (ret)
+ goto errout_parms;
if (!tc_in_hw(prog->gen_flags))
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -537,6 +521,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
*arg = prog;
return 0;
+errout_parms:
+ cls_bpf_free_parms(prog);
errout_idr:
if (!oldprog)
idr_remove_ext(&head->handle_idr, prog->handle);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..507859cdd1cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#include <linux/netdevice.h>
#include <linux/idr.h>
struct tc_u_knode {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..0f1eab99ff4e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
tcm->tcm_info = refcount_read(&q->refcnt);
if (nla_put_string(skb, TCA_KIND, q->ops->id))
goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
+ goto nla_put_failure;
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
qlen = q->q.qlen;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6361be7881f1..525eb3a6d625 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
return -EINVAL;
+ err = tcf_block_get(&q->link.block, &q->link.filter_list, sch);
+ if (err)
+ goto put_rtab;
+
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
- goto put_rtab;
+ goto put_block;
q->link.sibling = &q->link;
q->link.common.classid = sch->handle;
@@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
cbq_addprio(q, &q->link);
return 0;
+put_block:
+ tcf_block_put(q->link.block);
+
put_rtab:
qdisc_put_rtab(q->link.R_tab);
return err;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index b30a2c70bd48..531250fceb9e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
+
if (ctl->limit > CHOKE_MAX_QUEUE)
return -EINVAL;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3839cbbdc32b..661c7144b53a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
@@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
+ else if (netif_is_macvlan(dev))
+ dev = macvlan_dev_real_dev(dev);
res = netdev_get_tx_queue(dev, 0)->trans_start;
for (i = 1; i < dev->num_tx_queues; i++) {
val = netdev_get_tx_queue(dev, i)->trans_start;
@@ -1037,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ /* Wait for flying RCU callback before it is freed. */
+ rcu_barrier_bh();
return;
}
@@ -1052,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
rcu_assign_pointer(*miniqp->p_miniq, miniq);
if (miniq_old)
- /* This is counterpart of the rcu barrier above. We need to
+ /* This is counterpart of the rcu barriers above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 17c7130454bd..bc30f9186ac6 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
+
if (!q) {
table->tab[dp] = q = *prealloc;
*prealloc = NULL;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..fc1286f499c1 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
+ net_inc_ingress_queue();
+
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
if (err)
return err;
- net_inc_ingress_queue();
sch->flags |= TCQ_F_CPUSTATS;
return 0;
@@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
+ net_inc_ingress_queue();
+ net_inc_egress_queue();
+
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -190,18 +194,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
if (err)
- goto err_egress_block_get;
-
- net_inc_ingress_queue();
- net_inc_egress_queue();
+ return err;
sch->flags |= TCQ_F_CPUSTATS;
return 0;
-
-err_egress_block_get:
- tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
- return err;
}
static void clsact_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7f8ea9e297c3..f0747eb87dc4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
.handle = sch->handle,
.parent = sch->parent,
};
+ int err;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.command = TC_RED_DESTROY;
}
- return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+
+ if (!err && enable)
+ sch->flags |= TCQ_F_OFFLOADED;
+ else
+ sch->flags &= ~TCQ_F_OFFLOADED;
+
+ return err;
}
static void red_destroy(struct Qdisc *sch)
@@ -212,6 +220,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
if (ctl->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
@@ -272,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
return red_change(sch, opt);
}
-static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_red_qopt_offload hw_stats = {
@@ -284,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
.stats.qstats = &sch->qstats,
},
};
- int err;
- opt->flags &= ~TC_RED_OFFLOADED;
- if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
- return 0;
-
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
- &hw_stats);
- if (err == -EOPNOTSUPP)
+ if (!(sch->flags & TCQ_F_OFFLOADED))
return 0;
- if (!err)
- opt->flags |= TC_RED_OFFLOADED;
-
- return err;
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
}
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -317,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
- err = red_dump_offload(sch, &opt);
+ err = red_dump_offload_stats(sch, &opt);
if (err)
goto nla_put_failure;
@@ -345,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
- if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ if (sch->flags & TCQ_F_OFFLOADED) {
struct red_stats hw_stats = {0};
struct tc_red_qopt_offload hw_stats_request = {
.command = TC_RED_XSTATS,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 890f4a4564e7..930e5bd26d3d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+ if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+ ctl_v1->Wlog))
+ return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
@@ -724,6 +727,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
int i;
int err;
+ q->sch = sch;
timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
err = tcf_block_get(&q->block, &q->filter_list, sch);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9..7f8baa48e7c2 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
msg->send_failed = 0;
msg->send_error = 0;
msg->can_delay = 1;
+ msg->abandoned = 0;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
}
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (!chunk->asoc->peer.prsctp_capable)
return 0;
+ if (chunk->msg->abandoned)
+ return 1;
+
+ if (!chunk->has_tsn &&
+ !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
+ return 0;
+
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
struct sctp_stream_out *streamout =
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
}
+ chunk->msg->abandoned = 1;
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ chunk->msg->abandoned = 1;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
time_after(jiffies, chunk->msg->expires_at)) {
+ chunk->msg->abandoned = 1;
return 1;
}
/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
case SCTP_CID_AUTH:
return "AUTH";
+ case SCTP_CID_RECONF:
+ return "RECONF";
+
default:
break;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
return;
}
- if (t->param_flags & SPP_PMTUD_ENABLE) {
- /* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
-
- /* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
- }
+ if (!(t->param_flags & SPP_PMTUD_ENABLE))
+ /* We can't allow retransmitting in such case, as the
+ * retransmission would be sized just as before, and thus we
+ * would get another icmp, and retransmit again.
+ */
+ return;
- /* Retransmit with the new pmtu setting.
- * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
- * Needed will never be sent, but if a message was sent before
- * PMTU discovery was disabled that was larger than the PMTU, it
- * would not be fragmented, so it must be re-transmitted fragmented.
+ /* Update transports view of the MTU. Return if no update was needed.
+ * If an update wasn't needed/possible, it also doesn't make sense to
+ * try to retransmit now.
*/
+ if (!sctp_transport_update_pmtu(t, pmtu))
+ return;
+
+ /* Update association pmtu. */
+ sctp_assoc_sync_pmtu(asoc);
+
+ /* Retransmit with the new pmtu setting. */
sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7..7d67feeeffc1 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
struct sctp_stream_out *streamout;
- if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+ if (!chk->msg->abandoned &&
+ (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
continue;
+ chk->msg->abandoned = 1;
list_del_init(&chk->transmitted_list);
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
@@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
- if (!chk->tsn_gap_acked) {
+ if (queue != &asoc->outqueue.retransmit &&
+ !chk->tsn_gap_acked) {
if (chk->transport)
chk->transport->flight_size -=
sctp_data_size(chk);
@@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->sched->unsched_all(&asoc->stream);
list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
- if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+ if (!chk->msg->abandoned &&
+ (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
+ !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
continue;
+ chk->msg->abandoned = 1;
sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
/* If this chunk has not been acked, stop
* considering it as 'outstanding'.
*/
- if (!tchunk->tsn_gap_acked) {
+ if (transmitted_queue != &q->retransmit &&
+ !tchunk->tsn_gap_acked) {
if (tchunk->transport)
tchunk->transport->flight_size -=
sctp_data_size(tchunk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f5172c21349b..6a38c2503649 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1499,6 +1499,7 @@ static __init int sctp_init(void)
INIT_LIST_HEAD(&sctp_address_families);
sctp_v4_pf_init();
sctp_v6_pf_init();
+ sctp_sched_ops_init();
status = register_pernet_subsys(&sctp_defaults_ops);
if (status)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3204a9b29407..9b01e994f661 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
list_for_each_entry(chunk, &t->transmitted, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->retransmit, list)
+ list_for_each_entry(chunk, &q->retransmit, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->sacked, list)
+ list_for_each_entry(chunk, &q->sacked, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->abandoned, list)
+ list_for_each_entry(chunk, &q->abandoned, transmitted_list)
cb(chunk);
list_for_each_entry(chunk, &q->out_chunk_list, list)
@@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
- GFP_ATOMIC);
+ GFP_USER | __GFP_NOWARN);
if (!event)
return -ENOMEM;
@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
if (optlen < sizeof(struct sctp_hmacalgo))
return -EINVAL;
+ optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+ SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen);
if (IS_ERR(hmacs))
@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
+ /* authkey->sca_keylength is u16, so optlen can't be bigger than
+ * this.
+ */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen);
if (IS_ERR(authkey))
@@ -3891,13 +3898,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen < sizeof(struct sctp_reset_streams))
+ if (optlen < sizeof(*params))
return -EINVAL;
+ /* srs_number_streams is u16, so optlen can't be bigger than this. */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen);
if (IS_ERR(params))
return PTR_ERR(params);
+ if (params->srs_number_streams * sizeof(__u16) >
+ optlen - sizeof(*params))
+ goto out;
+
asoc = sctp_id2assoc(sk, params->srs_assoc_id);
if (!asoc)
goto out;
@@ -4494,7 +4508,7 @@ static int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4538,7 +4552,7 @@ static void sctp_destroy_sock(struct sock *sk)
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
@@ -5011,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+ if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
return -EFAULT;
return 0;
}
@@ -5080,7 +5094,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p
*newfile = sock_alloc_file(newsock, 0, NULL);
if (IS_ERR(*newfile)) {
put_unused_fd(retval);
- sock_release(newsock);
retval = PTR_ERR(*newfile);
*newfile = NULL;
return retval;
@@ -5642,6 +5655,9 @@ copy_getaddrs:
err = -EFAULT;
goto out;
}
+ /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+ * but we can't change it anymore.
+ */
if (put_user(bytes_copied, optlen))
err = -EFAULT;
out:
@@ -6078,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
params.assoc_id = 0;
} else if (len >= sizeof(struct sctp_assoc_value)) {
len = sizeof(struct sctp_assoc_value);
- if (copy_from_user(&params, optval, sizeof(params)))
+ if (copy_from_user(&params, optval, len))
return -EFAULT;
} else
return -EINVAL;
@@ -6248,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+ len = sizeof(struct sctp_authkeyid);
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6260,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
else
val.scact_keynumber = ep->active_key_id;
- len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &val, len))
@@ -6286,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
@@ -6331,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index a11db21dc8a0..524dfeb94c41 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
*/
/* Mark as failed send. */
- sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+ sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM);
if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sctp_stream_outq_migrate(stream, NULL, outcnt);
sched->sched_all(stream);
- i = sctp_stream_alloc_out(stream, outcnt, gfp);
- if (i)
- return i;
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+ goto out;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ in:
if (!incnt)
goto out;
- i = sctp_stream_alloc_in(stream, incnt, gfp);
- if (i) {
- ret = -ENOMEM;
- goto free;
+ ret = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (ret) {
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+ stream->outcnt = 0;
+ goto out;
}
stream->incnt = incnt;
- goto out;
-free:
- sched->free(stream);
- kfree(stream->out);
- stream->out = NULL;
out:
return ret;
}
@@ -254,6 +252,30 @@ static int sctp_send_reconf(struct sctp_association *asoc,
return retval;
}
+static bool sctp_stream_outq_is_empty(struct sctp_stream *stream,
+ __u16 str_nums, __be16 *str_list)
+{
+ struct sctp_association *asoc;
+ __u16 i;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ if (!asoc->outqueue.out_qlen)
+ return true;
+
+ if (!str_nums)
+ return false;
+
+ for (i = 0; i < str_nums; i++) {
+ __u16 sid = ntohs(str_list[i]);
+
+ if (stream->out[sid].ext &&
+ !list_empty(&stream->out[sid].ext->outq))
+ return false;
+ }
+
+ return true;
+}
+
int sctp_send_reset_streams(struct sctp_association *asoc,
struct sctp_reset_streams *params)
{
@@ -317,6 +339,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
for (i = 0; i < str_nums; i++)
nstr_list[i] = htons(str_list[i]);
+ if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
+ retval = -EAGAIN;
+ goto out;
+ }
+
chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
kfree(nstr_list);
@@ -377,6 +404,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
if (asoc->strreset_outstanding)
return -EINPROGRESS;
+ if (!sctp_outq_is_empty(&asoc->outqueue))
+ return -EAGAIN;
+
chunk = sctp_make_strreset_tsnreq(asoc);
if (!chunk)
return -ENOMEM;
@@ -563,7 +593,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
flags = SCTP_STREAM_RESET_INCOMING_SSN;
}
- nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2;
+ nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
if (nums) {
str_p = outreq->list_of_streams;
for (i = 0; i < nums; i++) {
@@ -627,7 +657,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
goto out;
}
- nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2;
+ nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16);
str_p = inreq->list_of_streams;
for (i = 0; i < nums; i++) {
if (ntohs(str_p[i]) >= stream->outcnt) {
@@ -636,6 +666,12 @@ struct sctp_chunk *sctp_process_strreset_inreq(
}
}
+ if (!sctp_stream_outq_is_empty(stream, nums, str_p)) {
+ result = SCTP_STRRESET_IN_PROGRESS;
+ asoc->strreset_inseq--;
+ goto err;
+ }
+
chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0);
if (!chunk)
goto out;
@@ -687,12 +723,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
i = asoc->strreset_inseq - request_seq - 1;
result = asoc->strreset_result[i];
if (result == SCTP_STRRESET_PERFORMED) {
- next_tsn = asoc->next_tsn;
+ next_tsn = asoc->ctsn_ack_point + 1;
init_tsn =
sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
}
goto err;
}
+
+ if (!sctp_outq_is_empty(&asoc->outqueue)) {
+ result = SCTP_STRRESET_IN_PROGRESS;
+ goto err;
+ }
+
asoc->strreset_inseq++;
if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
@@ -703,9 +745,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
goto out;
}
- /* G3: The same processing as though a SACK chunk with no gap report
- * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
- * received MUST be performed.
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
*/
max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
@@ -720,10 +763,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
init_tsn, GFP_ATOMIC);
- /* G4: The same processing as though a FWD-TSN chunk (as defined in
- * [RFC3758]) with all streams affected and a new cumulative TSN
- * ACK of the Receiver's Next TSN minus 1 were received MUST be
- * performed.
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
*/
sctp_outq_free(&asoc->outqueue);
@@ -927,7 +969,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
outreq = (struct sctp_strreset_outreq *)req;
str_p = outreq->list_of_streams;
- nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) /
+ sizeof(__u16);
if (result == SCTP_STRRESET_PERFORMED) {
if (nums) {
@@ -956,7 +999,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
inreq = (struct sctp_strreset_inreq *)req;
str_p = inreq->list_of_streams;
- nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
+ sizeof(__u16);
*evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
nums, str_p, GFP_ATOMIC);
@@ -975,6 +1019,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
if (result == SCTP_STRRESET_PERFORMED) {
__u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
&asoc->peer.tsn_map);
+ LIST_HEAD(temp);
sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
@@ -983,7 +1028,13 @@ struct sctp_chunk *sctp_process_strreset_resp(
SCTP_TSN_MAP_INITIAL,
stsn, GFP_ATOMIC);
+ /* Clean up sacked and abandoned queues only. As the
+ * out_chunk_list may not be empty, splice it to temp,
+ * then get it back after sctp_outq_free is done.
+ */
+ list_splice_init(&asoc->outqueue.out_chunk_list, &temp);
sctp_outq_free(&asoc->outqueue);
+ list_splice_init(&temp, &asoc->outqueue.out_chunk_list);
asoc->next_tsn = rtsn;
asoc->ctsn_ack_point = asoc->next_tsn - 1;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 0b83ec51e43b..d8c162a4089c 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
.unsched_all = sctp_sched_fcfs_unsched_all,
};
+static void sctp_sched_ops_fcfs_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs);
+}
+
/* API to other parts of the stack */
-extern struct sctp_sched_ops sctp_sched_prio;
-extern struct sctp_sched_ops sctp_sched_rr;
+static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1];
-static struct sctp_sched_ops *sctp_sched_ops[] = {
- &sctp_sched_fcfs,
- &sctp_sched_prio,
- &sctp_sched_rr,
-};
+void sctp_sched_ops_register(enum sctp_sched_type sched,
+ struct sctp_sched_ops *sched_ops)
+{
+ sctp_sched_ops[sched] = sched_ops;
+}
+
+void sctp_sched_ops_init(void)
+{
+ sctp_sched_ops_fcfs_init();
+ sctp_sched_ops_prio_init();
+ sctp_sched_ops_rr_init();
+}
int sctp_sched_set_sched(struct sctp_association *asoc,
enum sctp_sched_type sched)
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 384dbf3c8760..7997d35dd0fd 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
sctp_sched_prio_unsched(soute);
}
-struct sctp_sched_ops sctp_sched_prio = {
+static struct sctp_sched_ops sctp_sched_prio = {
.set = sctp_sched_prio_set,
.get = sctp_sched_prio_get,
.init = sctp_sched_prio_init,
@@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = {
.sched_all = sctp_sched_prio_sched_all,
.unsched_all = sctp_sched_prio_unsched_all,
};
+
+void sctp_sched_ops_prio_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio);
+}
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 7612a438c5b9..1155692448f1 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
sctp_sched_rr_unsched(stream, soute);
}
-struct sctp_sched_ops sctp_sched_rr = {
+static struct sctp_sched_ops sctp_sched_rr = {
.set = sctp_sched_rr_set,
.get = sctp_sched_rr_get,
.init = sctp_sched_rr_init,
@@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = {
.sched_all = sctp_sched_rr_sched_all,
.unsched_all = sctp_sched_rr_unsched_all,
};
+
+void sctp_sched_ops_rr_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr);
+}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst = sctp_transport_dst_check(t);
+ bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
- pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
- /* Use default minimum segment size and disable
- * pmtu discovery on this transport.
- */
- t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- } else {
- t->pathmtu = pmtu;
+ pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+ /* Use default minimum segment instead */
+ pmtu = SCTP_DEFAULT_MINSEGMENT;
}
+ pmtu = SCTP_TRUNC4(pmtu);
if (dst) {
dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
}
- if (!dst)
+ if (!dst) {
t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+ dst = t->dst;
+ }
+
+ if (dst) {
+ /* Re-fetch, as under layers may have a higher minimum size */
+ pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ change = t->pathmtu != pmtu;
+ }
+ t->pathmtu = pmtu;
+
+ return change;
}
/* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
gfp_t gfp)
{
- struct sctp_association *asoc;
- __u16 needed, freed;
-
- asoc = ulpq->asoc;
+ struct sctp_association *asoc = ulpq->asoc;
+ __u32 freed = 0;
+ __u16 needed;
- if (chunk) {
- needed = ntohs(chunk->chunk_hdr->length);
- needed -= sizeof(struct sctp_data_chunk);
- } else
- needed = SCTP_DEFAULT_MAXWINDOW;
-
- freed = 0;
+ needed = ntohs(chunk->chunk_hdr->length) -
+ sizeof(struct sctp_data_chunk);
if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
freed = sctp_ulpq_renege_order(ulpq, needed);
- if (freed < needed) {
+ if (freed < needed)
freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
- }
}
/* If able to free enough room, accept this chunk. */
- if (chunk && (freed >= needed)) {
- int retval;
- retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+ if (freed >= needed) {
+ int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
/*
* Enter partial delivery if chunk has not been
* delivered; otherwise, drain the reassembly queue.
diff --git a/net/socket.c b/net/socket.c
index 82433a2200ec..6f05d5c4bf30 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
name.len = strlen(name.name);
}
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
- if (unlikely(!path.dentry))
+ if (unlikely(!path.dentry)) {
+ sock_release(sock);
return ERR_PTR(-ENOMEM);
+ }
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
if (IS_ERR(file)) {
- /* drop dentry, keep inode */
+ /* drop dentry, keep inode for a bit */
ihold(d_inode(path.dentry));
path_put(&path);
+ /* ... and now kill it properly */
+ sock_release(sock);
return file;
}
@@ -1332,19 +1336,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
- goto out;
-
- retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
- if (retval < 0)
- goto out_release;
-
-out:
- /* It may be already another descriptor 8) Not kernel problem. */
- return retval;
+ return retval;
-out_release:
- sock_release(sock);
- return retval;
+ return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
/*
@@ -1368,87 +1362,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
/*
+ * reserve descriptors and make sure we won't fail
+ * to return them to userland.
+ */
+ fd1 = get_unused_fd_flags(flags);
+ if (unlikely(fd1 < 0))
+ return fd1;
+
+ fd2 = get_unused_fd_flags(flags);
+ if (unlikely(fd2 < 0)) {
+ put_unused_fd(fd1);
+ return fd2;
+ }
+
+ err = put_user(fd1, &usockvec[0]);
+ if (err)
+ goto out;
+
+ err = put_user(fd2, &usockvec[1]);
+ if (err)
+ goto out;
+
+ /*
* Obtain the first socket and check if the underlying protocol
* supports the socketpair call.
*/
err = sock_create(family, type, protocol, &sock1);
- if (err < 0)
+ if (unlikely(err < 0))
goto out;
err = sock_create(family, type, protocol, &sock2);
- if (err < 0)
- goto out_release_1;
-
- err = sock1->ops->socketpair(sock1, sock2);
- if (err < 0)
- goto out_release_both;
-
- fd1 = get_unused_fd_flags(flags);
- if (unlikely(fd1 < 0)) {
- err = fd1;
- goto out_release_both;
+ if (unlikely(err < 0)) {
+ sock_release(sock1);
+ goto out;
}
- fd2 = get_unused_fd_flags(flags);
- if (unlikely(fd2 < 0)) {
- err = fd2;
- goto out_put_unused_1;
+ err = sock1->ops->socketpair(sock1, sock2);
+ if (unlikely(err < 0)) {
+ sock_release(sock2);
+ sock_release(sock1);
+ goto out;
}
newfile1 = sock_alloc_file(sock1, flags, NULL);
if (IS_ERR(newfile1)) {
err = PTR_ERR(newfile1);
- goto out_put_unused_both;
+ sock_release(sock2);
+ goto out;
}
newfile2 = sock_alloc_file(sock2, flags, NULL);
if (IS_ERR(newfile2)) {
err = PTR_ERR(newfile2);
- goto out_fput_1;
+ fput(newfile1);
+ goto out;
}
- err = put_user(fd1, &usockvec[0]);
- if (err)
- goto out_fput_both;
-
- err = put_user(fd2, &usockvec[1]);
- if (err)
- goto out_fput_both;
-
audit_fd_pair(fd1, fd2);
fd_install(fd1, newfile1);
fd_install(fd2, newfile2);
- /* fd1 and fd2 may be already another descriptors.
- * Not kernel problem.
- */
-
return 0;
-out_fput_both:
- fput(newfile2);
- fput(newfile1);
- put_unused_fd(fd2);
- put_unused_fd(fd1);
- goto out;
-
-out_fput_1:
- fput(newfile1);
- put_unused_fd(fd2);
- put_unused_fd(fd1);
- sock_release(sock2);
- goto out;
-
-out_put_unused_both:
+out:
put_unused_fd(fd2);
-out_put_unused_1:
put_unused_fd(fd1);
-out_release_both:
- sock_release(sock2);
-out_release_1:
- sock_release(sock1);
-out:
return err;
}
@@ -1564,7 +1543,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
put_unused_fd(newfd);
- sock_release(newsock);
goto out_put;
}
@@ -2643,6 +2621,15 @@ out_fs:
core_initcall(sock_init); /* early initcall */
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ bpf_jit_enable = 1;
+#endif
+ return 0;
+}
+pure_initcall(jit_init);
+
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
* allows a thread in BH context to safely check if the process
* lock is held. In this case, if the lock is held, queue work.
*/
- if (sock_owned_by_user(strp->sk)) {
+ if (sock_owned_by_user_nocheck(strp->sk)) {
queue_work(strp_wq, &strp->work);
return;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
goto out_free_groups;
creds->cr_group_info->gid[i] = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73165e9ca5bf..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -264,7 +264,7 @@ out:
return status;
}
-static struct cache_detail rsi_cache_template = {
+static const struct cache_detail rsi_cache_template = {
.owner = THIS_MODULE,
.hash_size = RSI_HASHMAX,
.name = "auth.rpcsec.init",
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
goto out;
rsci.cred.cr_group_info->gid[i] = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
@@ -524,7 +525,7 @@ out:
return status;
}
-static struct cache_detail rsc_cache_template = {
+static const struct cache_detail rsc_cache_template = {
.owner = THIS_MODULE,
.hash_size = RSC_HASHMAX,
.name = "auth.rpcsec.context",
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 79d55d949d9a..e68943895be4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
}
EXPORT_SYMBOL_GPL(cache_unregister_net);
-struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net)
+struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net)
{
struct cache_detail *cd;
int i;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a801da812f86..e2a4184f3c5d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task)
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ENOBUFS:
@@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task)
/* fall through */
case -ECONNRESET:
case -ECONNABORTED:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EADDRINUSE:
@@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task)
*/
case -ECONNREFUSED:
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
@@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task)
switch (task->tk_status) {
case 0:
/* Success */
+ case -ENETDOWN:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
@@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task)
task->tk_status = 0;
switch(status) {
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f81eaa8e0888..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
ug.gi->gid[i] = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -569,7 +570,7 @@ static int unix_gid_show(struct seq_file *m,
return 0;
}
-static struct cache_detail unix_gid_cache_template = {
+static const struct cache_detail unix_gid_cache_template = {
.owner = THIS_MODULE,
.hash_size = GID_HASHMAX,
.name = "auth.unix.gid",
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
cred->cr_group_info->gid[i] = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
@@ -862,7 +864,7 @@ struct auth_ops svcauth_unix = {
.set_client = svcauth_unix_set_client,
};
-static struct cache_detail ip_map_cache_template = {
+static const struct cache_detail ip_map_cache_template = {
.owner = THIS_MODULE,
.hash_size = IP_HASHMAX,
.name = "auth.unix.ip",
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..33b74fd84051 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ unsigned int connect_cookie;
int status, numreqs;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
} else if (!req->rq_bytes_sent)
return;
+ connect_cookie = xprt->connect_cookie;
req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->stat.sending_u += xprt->sending.qlen;
xprt->stat.pending_u += xprt->pending.qlen;
+ spin_unlock_bh(&xprt->transport_lock);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else {
+ req->rq_connect_cookie = connect_cookie;
+ if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
/*
- * Sleep on the pending queue since
- * we're expecting a reply.
+ * Sleep on the pending queue if we're expecting a reply.
+ * The spinlock ensures atomicity between the test of
+ * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
*/
- if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task))
+ spin_lock(&xprt->recv_lock);
+ if (!req->rq_reply_bytes_recvd) {
rpc_sleep_on(&xprt->pending, task, xprt_timer);
- req->rq_connect_cookie = xprt->connect_cookie;
+ /*
+ * Send an extra queue wakeup call if the
+ * connection was dropped in case the call to
+ * rpc_sleep_on() raced.
+ */
+ if (!xprt_connected(xprt))
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ }
+ spin_unlock(&xprt->recv_lock);
}
- spin_unlock_bh(&xprt->transport_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..a3f2ab283aeb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(rep->rr_xid));
- if (list_empty(&req->rl_registered) &&
- !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
- rpcrdma_complete_rqst(rep);
- else
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..6ee1ad8978f3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
+#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task)
task->tk_pid, __func__, rqst->rq_callsize,
rqst->rq_rcvsize, req);
+ req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..8607c029c0dd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
struct workqueue_struct *recv_wq;
recv_wq = alloc_workqueue("xprtrdma_receive",
- WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+ WQ_MEM_RECLAIM | WQ_HIGHPRI,
0);
if (!recv_wq)
return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..1342f743f1c4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
+ int rl_cpu;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9cc850c2719e..6d0cc3b8f932 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2440,7 +2440,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
*/
case -ECONNREFUSED:
case -ECONNRESET:
+ case -ENETDOWN:
case -ENETUNREACH:
+ case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
/*
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
+ kfree(b);
return -EINVAL;
}
@@ -347,8 +348,10 @@ restart:
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
- if (tipc_mon_create(net, bearer_id))
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
return -ENOMEM;
+ }
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 12777cac638a..5f4ffae807ee 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m)
{
- if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
grp->active_cnt--;
}
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
if (m->window >= ADV_IDLE)
return;
- if (!list_empty(&m->congested))
- return;
+ list_del_init(&m->congested);
/* Sort member into congested members' list */
list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
u16 prev = grp->bc_snd_nxt - 1;
struct tipc_member *m;
struct rb_node *n;
+ u16 ackers = 0;
for (n = rb_first(&grp->members); n; n = rb_next(n)) {
m = container_of(n, struct tipc_member, tree_node);
if (tipc_group_is_enabled(m)) {
tipc_group_update_member(m, len);
m->bc_acked = prev;
+ ackers++;
}
}
/* Mark number of acknowledges to expect, if any */
if (ack)
- grp->bc_ackers = grp->member_cnt;
+ grp->bc_ackers = ackers;
grp->bc_snd_nxt++;
}
@@ -497,6 +499,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
while ((skb = skb_peek(defq))) {
hdr = buf_msg(skb);
mtyp = msg_type(hdr);
+ blks = msg_blocks(hdr);
deliver = true;
ack = false;
update = false;
@@ -546,7 +549,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
if (!update)
continue;
- blks = msg_blocks(hdr);
tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
}
return;
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
int max_active = grp->max_active;
int reclaim_limit = max_active * 3 / 4;
int active_cnt = grp->active_cnt;
- struct tipc_member *m, *rm;
+ struct tipc_member *m, *rm, *pm;
m = tipc_group_find_member(grp, node, port);
if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
}
+ grp->active_cnt--;
+ list_del_init(&m->list);
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
break;
case MBR_RECLAIMING:
case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
} else if (mtyp == GRP_REMIT_MSG) {
msg_set_grp_remitted(hdr, m->window);
}
+ msg_set_dest_droppable(hdr, true);
__skb_queue_tail(xmitq, skb);
}
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
__skb_queue_tail(inputq, m->event_msg);
}
- if (m->window < ADV_IDLE)
- tipc_group_update_member(m, 0);
- else
- list_del_init(&m->congested);
+ list_del_init(&m->congested);
+ tipc_group_update_member(m, 0);
return;
case GRP_LEAVE_MSG:
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ list_del_init(&m->list);
+ list_del_init(&m->congested);
+ *usr_wakeup = true;
/* Wait until WITHDRAW event is received */
if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
ehdr = buf_msg(m->event_msg);
msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
__skb_queue_tail(inputq, m->event_msg);
- *usr_wakeup = true;
- list_del_init(&m->congested);
return;
case GRP_ADV_MSG:
if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
if (!m || m->state != MBR_RECLAIMING)
return;
- list_del_init(&m->list);
- grp->active_cnt--;
remitted = msg_grp_remitted(hdr);
/* Messages preceding the REMIT still in receive queue */
if (m->advertised > remitted) {
m->state = MBR_REMITTED;
in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
}
/* All messages preceding the REMIT have been read */
if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
m->advertised = ADV_IDLE + in_flight;
+ grp->active_cnt--;
+ list_del_init(&m->list);
/* Set oldest pending member to active and advertise */
if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
*usr_wakeup = true;
m->usr_pending = false;
node_up = tipc_node_is_up(net, node);
-
- /* Hold back event if more messages might be expected */
- if (m->state != MBR_LEAVING && node_up) {
- m->event_msg = skb;
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- } else {
- if (node_up)
+ m->event_msg = NULL;
+
+ if (node_up) {
+ /* Hold back event if a LEAVE msg should be expected */
+ if (m->state != MBR_LEAVING) {
+ m->event_msg = skb;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ } else {
msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- else
+ __skb_queue_tail(inputq, skb);
+ }
+ } else {
+ if (m->state != MBR_LEAVING) {
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+ } else {
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ }
__skb_queue_tail(inputq, skb);
}
+ list_del_init(&m->list);
list_del_init(&m->congested);
}
*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *tmp;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
tn->monitors[bearer_id] = NULL;
list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/server.c b/net/tipc/server.c
index acaef80fb88c..d60c30342327 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
newcon->usr_data = s->tipc_conn_new(newcon->conid);
if (!newcon->usr_data) {
sock_release(newsock);
+ conn_put(newcon);
return -ENOMEM;
}
@@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
s = con->server;
scbr = s->tipc_conn_new(*conid);
if (!scbr) {
- tipc_close_conn(con);
+ conn_put(con);
return false;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..3b4084480377 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
+ case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
- case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
revents |= POLLIN | POLLRDNORM;
break;
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
__skb_dequeue(arrvq);
__skb_queue_tail(inputq, skb);
}
- refcount_dec(&skb->users);
+ kfree_skb(skb);
spin_unlock_bh(&inputq->lock);
continue;
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
goto rcu_out;
}
- tipc_rcv(sock_net(sk), skb, b);
- rcu_read_unlock();
- return 0;
-
rcu_out:
rcu_read_unlock();
out:
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 5583df708b8c..a827547aa102 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk)
lock_sock(sk);
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
vsock_remove_sock(vsk);
release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 391775e3575c..a7a73ffe675b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
/* We should not be sending anymore since the peer won't be
* there to receive, but we can still receive if there is data
- * left in our consume queue.
+ * left in our consume queue. If the local endpoint is a host,
+ * we can't call vsock_stream_has_data, since that may block,
+ * but a host endpoint can't read data once the VM has
+ * detached, so there is no available data in that case.
*/
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = TCP_CLOSE;
-
+ if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
+ vsock_stream_has_data(vsk) <= 0) {
if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
@@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
+ sk->sk_state = TCP_CLOSE;
}
sk->sk_state_change(sk);
}
@@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.4.0-k");
+MODULE_VERSION("1.0.5.0-k");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("vmware_vsock");
MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da91bb547db3..1abcc4fc4df1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -20,6 +20,10 @@ config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
select FW_LOADER
+ # may need to update this when certificates are changed and are
+ # using a different algorithm, though right now they shouldn't
+ # (this is here rather than below to allow it to be a module)
+ select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
---help---
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
@@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR
certificates like in the kernel sources (net/wireless/certs/)
that shall be accepted for a signed regulatory database.
+ Note that you need to also select the correct CRYPTO_<hash> modules
+ for your certificates, and if cfg80211 is built-in they also must be.
+
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 shipped_regdb_certs[] = {' >> $@
- @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
- @echo '};' >> $@
- @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+ @(echo '#include "reg.h"'; \
+ echo 'const u8 shipped_regdb_certs[] = {'; \
+ cat $^ ; \
+ echo '};'; \
+ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+ ) > $@
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 extra_regdb_certs[] = {' >> $@
- @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
- @echo '};' >> $@
- @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
+ @(set -e; \
+ allf=""; \
+ for f in $^ ; do \
+ # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
+ thisf=$$(od -An -v -tx1 < $$f | \
+ sed -e 's/ /\n/g' | \
+ sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
+ sed -e 's/^/0x/;s/$$/,/'); \
+ # file should not be empty - maybe command substitution failed? \
+ test ! -z "$$thisf";\
+ allf=$$allf$$thisf;\
+ done; \
+ ( \
+ echo '#include "reg.h"'; \
+ echo 'const u8 extra_regdb_certs[] = {'; \
+ echo "$$allf"; \
+ echo '};'; \
+ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
+ ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..2b3dbcd40e46 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
case NL80211_IFTYPE_AP:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
if (!ssid_ie)
break;
if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
break;
}
default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
genlmsg_end(msg, hdr);
return 0;
+ nla_put_failure_locked:
+ wdev_unlock(wdev);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -11359,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
break;
case NL80211_NAN_FUNC_FOLLOW_UP:
if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
- !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
err = -EINVAL;
goto out;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..3f6f6f8c9fa5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
*
*/
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <linux/percpu.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
+struct xfrm_trans_tasklet {
+ struct tasklet_struct tasklet;
+ struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+ int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
static struct kmem_cache *secpath_cachep __read_mostly;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
{
int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
u32 mark = skb->mark;
- unsigned int family;
+ unsigned int family = AF_UNSPEC;
int decaps = 0;
int async = 0;
bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (encap_type < 0) {
x = xfrm_input_state(skb);
+
+ if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+ if (x->km.state == XFRM_STATE_ACQ)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+ else
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINSTATEINVALID);
+ goto drop;
+ }
+
family = x->outer_mode->afinfo->family;
/* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
}
EXPORT_SYMBOL(xfrm_input_resume);
+static void xfrm_trans_reinject(unsigned long data)
+{
+ struct xfrm_trans_tasklet *trans = (void *)data;
+ struct sk_buff_head queue;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&queue);
+ skb_queue_splice_init(&trans->queue, &queue);
+
+ while ((skb = __skb_dequeue(&queue)))
+ XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+ int (*finish)(struct net *, struct sock *,
+ struct sk_buff *))
+{
+ struct xfrm_trans_tasklet *trans;
+
+ trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+ if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ return -ENOBUFS;
+
+ XFRM_TRANS_SKB_CB(skb)->finish = finish;
+ skb_queue_tail(&trans->queue, skb);
+ tasklet_schedule(&trans->tasklet);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
void __init xfrm_input_init(void)
{
int err;
+ int i;
init_dummy_netdev(&xfrm_napi_dev);
err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
sizeof(struct sec_path),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+
+ for_each_possible_cpu(i) {
+ struct xfrm_trans_tasklet *trans;
+
+ trans = &per_cpu(xfrm_trans_tasklet, i);
+ __skb_queue_head_init(&trans->queue);
+ tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+ (unsigned long)trans);
+ }
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..70aa5cb0c659 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl, family);
+ bool match;
int err = 0;
+ if (pol->family != family) {
+ pol = NULL;
+ goto out;
+ }
+
+ match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
+ xfrm_pols_put(pols, num_pols);
while (err > 0)
xfrm_state_put(xfrm[--err]);
return xdst;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..500b3391f474 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
if (orig->aead) {
x->aead = xfrm_algo_aead_clone(orig->aead);
+ x->geniv = orig->geniv;
if (!x->aead)
goto error;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..bdb48e5dba04 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
{
+ u16 prev_family;
int i;
if (nr > XFRM_MAX_DEPTH)
return -EINVAL;
+ prev_family = family;
+
for (i = 0; i < nr; i++) {
/* We never validated the ut->family value, so many
* applications simply leave it at zero. The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
if (!ut[i].family)
ut[i].family = family;
+ if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+ (ut[i].family != prev_family))
+ return -EINVAL;
+
+ prev_family = ut[i].family;
+
switch (ut[i].family) {
case AF_INET:
break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
default:
return -EINVAL;
}
+
+ switch (ut[i].id.proto) {
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+#endif
+ case IPSEC_PROTO_ANY:
+ break;
+ default:
+ return -EINVAL;
+ }
+
}
return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
- [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 },
+ [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {