aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds2017-01-27 12:54:16 -0800
committerLinus Torvalds2017-01-27 12:54:16 -0800
commit1b1bc42c1692e9b62756323c675a44cb1a1f9dbd (patch)
treed514a2d8512fc52c15747841e2368f8f99a50787 /net
parent3365135d43f861003555c963b309672d053a2228 (diff)
parent950eabbd6ddedc1b08350b9169a6a51b130ebaaf (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) GTP fixes from Andreas Schultz (missing genl module alias, clear IP DF on transmit). 2) Netfilter needs to reflect the fwmark when sending resets, from Pau Espin Pedrol. 3) nftable dump OOPS fix from Liping Zhang. 4) Fix erroneous setting of VIRTIO_NET_HDR_F_DATA_VALID on transmit, from Rolf Neugebauer. 5) Fix build error of ipt_CLUSTERIP when procfs is disabled, from Arnd Bergmann. 6) Fix regression in handling of NETIF_F_SG in harmonize_features(), from Eric Dumazet. 7) Fix RTNL deadlock wrt. lwtunnel module loading, from David Ahern. 8) tcp_fastopen_create_child() needs to setup tp->max_window, from Alexey Kodanev. 9) Missing kmemdup() failure check in ipv6 segment routing code, from Eric Dumazet. 10) Don't execute unix_bind() under the bindlock, otherwise we deadlock with splice. From WANG Cong. 11) ip6_tnl_parse_tlv_enc_lim() potentially reallocates the skb buffer, therefore callers must reload cached header pointers into that skb. Fix from Eric Dumazet. 12) Fix various bugs in legacy IRQ fallback handling in alx driver, from Tobias Regnery. 13) Do not allow lwtunnel drivers to be unloaded while they are referenced by active instances, from Robert Shearman. 14) Fix truncated PHY LED trigger names, from Geert Uytterhoeven. 15) Fix a few regressions from virtio_net XDP support, from John Fastabend and Jakub Kicinski. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (102 commits) ISDN: eicon: silence misleading array-bounds warning net: phy: micrel: add support for KSZ8795 gtp: fix cross netns recv on gtp socket gtp: clear DF bit on GTP packet tx gtp: add genl family modules alias tcp: don't annotate mark on control socket from tcp_v6_send_response() ravb: unmap descriptors when freeing rings virtio_net: reject XDP programs using header adjustment virtio_net: use dev_kfree_skb for small buffer XDP receive r8152: check rx after napi is enabled r8152: re-schedule napi for tx r8152: avoid start_xmit to schedule napi when napi is disabled r8152: avoid start_xmit to call napi_schedule during autosuspend net: dsa: Bring back device detaching in dsa_slave_suspend() net: phy: leds: Fix truncated LED trigger names net: phy: leds: Break dependency of phy.h on phy_led_triggers.h net: phy: leds: Clear phy_num_led_triggers on failure to avoid crash net-next: ethernet: mediatek: change the compatible string Documentation: devicetree: change the mediatek ethernet compatible string bnxt_en: Fix RTNL lock usage on bnxt_get_port_module_status(). ...
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/fragmentation.c10
-rw-r--r--net/bridge/br_netlink.c33
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/lwt_bpf.c1
-rw-r--r--net/core/lwtunnel.c66
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dsa/slave.c8
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c8
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c15
-rw-r--r--net/ipv4/tcp_fastopen.c1
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ila/ila_lwt.c1
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c36
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c8
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c13
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/seg6.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c1
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/mpls/af_mpls.c48
-rw-r--r--net/mpls/mpls_iptunnel.c1
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/nf_conntrack_core.c44
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_tables_api.c67
-rw-r--r--net/netfilter/nft_dynset.c3
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_lookup.c3
-rw-r--r--net/netfilter/nft_objref.c6
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/sctp/ipv6.c3
-rw-r--r--net/sctp/offload.c2
-rw-r--r--net/sctp/socket.c6
-rw-r--r--net/tipc/node.c9
-rw-r--r--net/tipc/server.c48
-rw-r--r--net/tipc/subscr.c124
-rw-r--r--net/tipc/subscr.h1
-rw-r--r--net/unix/af_unix.c27
52 files changed, 407 insertions, 268 deletions
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9c561e683f4b..0854ebd8613e 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -474,7 +474,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if) {
ret = -EINVAL;
- goto put_primary_if;
+ goto free_skb;
}
/* Create one header to be copied to all fragments */
@@ -502,7 +502,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
if (!skb_fragment) {
ret = -ENOMEM;
- goto free_skb;
+ goto put_primary_if;
}
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
@@ -511,7 +511,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
if (ret != NET_XMIT_SUCCESS) {
ret = NET_XMIT_DROP;
- goto free_skb;
+ goto put_primary_if;
}
frag_header.no++;
@@ -519,7 +519,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
/* The initial check in this function should cover this case */
if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
ret = -EINVAL;
- goto free_skb;
+ goto put_primary_if;
}
}
@@ -527,7 +527,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
if (batadv_skb_head_push(skb, header_size) < 0 ||
pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
ret = -ENOMEM;
- goto free_skb;
+ goto put_primary_if;
}
memcpy(skb->data, &frag_header, header_size);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 71c7453268c1..7109b389ea58 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
-static int br_dev_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-{
- struct net_bridge *br = netdev_priv(dev);
-
- if (tb[IFLA_ADDRESS]) {
- spin_lock_bh(&br->lock);
- br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
- spin_unlock_bh(&br->lock);
- }
-
- return register_netdevice(dev);
-}
-
static int br_port_slave_changelink(struct net_device *brdev,
struct net_device *dev,
struct nlattr *tb[],
@@ -1115,6 +1101,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return 0;
}
+static int br_dev_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_bridge *br = netdev_priv(dev);
+ int err;
+
+ if (tb[IFLA_ADDRESS]) {
+ spin_lock_bh(&br->lock);
+ br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+ spin_unlock_bh(&br->lock);
+ }
+
+ err = br_changelink(dev, tb, data);
+ if (err)
+ return err;
+
+ return register_netdevice(dev);
+}
+
static size_t br_get_size(const struct net_device *brdev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */
diff --git a/net/core/dev.c b/net/core/dev.c
index 07b307b0b414..7f218e095361 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2795,9 +2795,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, type)) {
features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
- } else if (illegal_highdma(skb->dev, skb)) {
- features &= ~NETIF_F_SG;
}
+ if (illegal_highdma(skb->dev, skb))
+ features &= ~NETIF_F_SG;
return features;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e23766c7e3ba..236a21e3c878 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1712,7 +1712,7 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_channels channels, max;
+ struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
u32 max_rx_in_use = 0;
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 71bb3e2eca08..b3eef90b2df9 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -386,6 +386,7 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
.fill_encap = bpf_fill_encap_info,
.get_encap_size = bpf_encap_nlsize,
.cmp_encap = bpf_encap_cmp,
+ .owner = THIS_MODULE,
};
static int __init bpf_lwt_init(void)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index a5d4e866ce88..c23465005f2f 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -26,6 +26,7 @@
#include <net/lwtunnel.h>
#include <net/rtnetlink.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#ifdef CONFIG_MODULES
@@ -114,25 +115,77 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
+ if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
+ ret = ops->build_state(dev, encap, family, cfg, lws);
+ if (ret)
+ module_put(ops->owner);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_valid_encap_type(u16 encap_type)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = -EINVAL;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return ret;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[encap_type]);
+ rcu_read_unlock();
#ifdef CONFIG_MODULES
if (!ops) {
const char *encap_type_str = lwtunnel_encap_str(encap_type);
if (encap_type_str) {
- rcu_read_unlock();
+ __rtnl_unlock();
request_module("rtnl-lwt-%s", encap_type_str);
+ rtnl_lock();
+
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
+ rcu_read_unlock();
}
}
#endif
- if (likely(ops && ops->build_state))
- ret = ops->build_state(dev, encap, family, cfg, lws);
- rcu_read_unlock();
+ return ops ? 0 : -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(lwtunnel_valid_encap_type);
- return ret;
+int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
+{
+ struct rtnexthop *rtnh = (struct rtnexthop *)attr;
+ struct nlattr *nla_entype;
+ struct nlattr *attrs;
+ struct nlattr *nla;
+ u16 encap_type;
+ int attrlen;
+
+ while (rtnh_ok(rtnh, remaining)) {
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ attrs = rtnh_attrs(rtnh);
+ nla = nla_find(attrs, attrlen, RTA_ENCAP);
+ nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+
+ if (nla_entype) {
+ encap_type = nla_get_u16(nla_entype);
+
+ if (lwtunnel_valid_encap_type(encap_type) != 0)
+ return -EOPNOTSUPP;
+ }
+ }
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return 0;
}
-EXPORT_SYMBOL(lwtunnel_build_state);
+EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
void lwtstate_free(struct lwtunnel_state *lws)
{
@@ -144,6 +197,7 @@ void lwtstate_free(struct lwtunnel_state *lws)
} else {
kfree(lws);
}
+ module_put(ops->owner);
}
EXPORT_SYMBOL(lwtstate_free);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index adfc790f7193..c4e879c02186 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -227,7 +227,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -281,7 +281,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
- ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
+ ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 68c9eea00518..7d4596110851 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1105,10 +1105,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
p->phy_interface = p->phy->interface;
- phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
-
- return 0;
+ return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
+ p->phy_interface);
}
static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
@@ -1203,6 +1201,8 @@ int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ netif_device_detach(slave_dev);
+
if (p->phy) {
phy_stop(p->phy);
p->old_pause = -1;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eae0332b0e8c..7db2ad2e82d3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -46,6 +46,7 @@
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
+#include <net/lwtunnel.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -677,6 +678,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_mx_len = nla_len(attr);
break;
case RTA_MULTIPATH:
+ err = lwtunnel_valid_encap_type_attr(nla_data(attr),
+ nla_len(attr));
+ if (err < 0)
+ goto errout;
cfg->fc_mp = nla_data(attr);
cfg->fc_mp_len = nla_len(attr);
break;
@@ -691,6 +696,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
break;
case RTA_ENCAP_TYPE:
cfg->fc_encap_type = nla_get_u16(attr);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ if (err < 0)
+ goto errout;
break;
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index fac275c48108..b67719f45953 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1629,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
if (unlikely(err)) {
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index fed3d29f9eb3..0fd1976ab63b 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
.fill_encap = ip_tun_fill_encap_info,
.get_encap_size = ip_tun_encap_nlsize,
.cmp_encap = ip_tun_cmp_encap,
+ .owner = THIS_MODULE,
};
static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
@@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
.fill_encap = ip6_tun_fill_encap_info,
.get_encap_size = ip6_tun_encap_nlsize,
.cmp_encap = ip_tun_cmp_encap,
+ .owner = THIS_MODULE,
};
void __init ip_tunnel_core_init(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a6b8c1a4102b..0a783cd73faf 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -144,7 +144,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
rcu_read_lock_bh();
c = __clusterip_config_find(net, clusterip);
if (c) {
- if (!c->pde || unlikely(!atomic_inc_not_zero(&c->refcount)))
+#ifdef CONFIG_PROC_FS
+ if (!c->pde)
+ c = NULL;
+ else
+#endif
+ if (unlikely(!atomic_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
atomic_inc(&c->entries);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index f273098e48fd..37fb9552e858 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -63,10 +63,10 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
return dev_match || flags & XT_RPFILTER_LOOSE;
}
-static bool rpfilter_is_local(const struct sk_buff *skb)
+static bool
+rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
- const struct rtable *rt = skb_rtable(skb);
- return rt && (rt->rt_flags & RTCF_LOCAL);
+ return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
@@ -79,7 +79,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
info = par->matchinfo;
invert = info->flags & XT_RPFILTER_INVERT;
- if (rpfilter_is_local(skb))
+ if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;
iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fd8220213afc..146d86105183 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 965b1a161369..2981291910dd 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -26,13 +26,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
-static bool fib4_is_local(const struct sk_buff *skb)
-{
- const struct rtable *rt = skb_rtable(skb);
-
- return rt && (rt->rt_flags & RTCF_LOCAL);
-}
-
#define DSCP_BITS 0xfc
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
@@ -95,8 +88,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
- nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+ nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ nft_in(pkt)->ifindex);
return;
}
@@ -131,7 +126,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
switch (res.type) {
case RTN_UNICAST:
break;
- case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
+ case RTN_LOCAL: /* Should not see RTN_LOCAL here */
return;
default:
break;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f51919535ca7..dd2560c83a85 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
* scaled. So correct it appropriately.
*/
tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+ tp->max_window = tp->snd_wnd;
/* Activate the retrans timer so that SYNACK can be retransmitted.
* The request socket is not added to the ehash
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6c790754ae3e..41dcbd568cbe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5078,7 +5078,7 @@ static void tcp_check_space(struct sock *sk)
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
/* pairs with tcp_poll() */
- smp_mb__after_atomic();
+ smp_mb();
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
tcp_new_space(sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c1e124bc8e1e..f60e88e56255 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5540,8 +5540,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -5550,7 +5549,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
dev_disable_change(idev);
}
}
- rcu_read_unlock();
}
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index a7bc54ab46e2..13b5e85fe0d5 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -238,6 +238,7 @@ static const struct lwtunnel_encap_ops ila_encap_ops = {
.fill_encap = ila_fill_encap_info,
.get_encap_size = ila_encap_nlsize,
.cmp_encap = ila_encap_cmp,
+ .owner = THIS_MODULE,
};
int ila_lwt_init(void)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7396e75e161b..75c308239243 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -176,7 +176,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass);
rcu_read_unlock();
return res;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 75b6108234dd..558631860d91 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -582,6 +582,9 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
return -1;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
+
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 38122d04fadc..2c0df09e9036 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,7 +172,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -240,7 +240,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = mark;
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 753d6d0860fb..ff8ee06491c3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -400,18 +400,19 @@ ip6_tnl_dev_uninit(struct net_device *dev)
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
- __u8 nexthdr = ipv6h->nexthdr;
- __u16 off = sizeof(*ipv6h);
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
+ unsigned int nhoff = raw - skb->data;
+ unsigned int off = nhoff + sizeof(*ipv6h);
+ u8 next, nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
- __u16 optlen = 0;
struct ipv6_opt_hdr *hdr;
- if (raw + off + sizeof(*hdr) > skb->data &&
- !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+ u16 optlen;
+
+ if (!pskb_may_pull(skb, off + sizeof(*hdr)))
break;
- hdr = (struct ipv6_opt_hdr *) (raw + off);
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
if (frag_hdr->frag_off)
@@ -422,20 +423,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
} else {
optlen = ipv6_optlen(hdr);
}
+ /* cache hdr->nexthdr, since pskb_may_pull() might
+ * invalidate hdr
+ */
+ next = hdr->nexthdr;
if (nexthdr == NEXTHDR_DEST) {
- __u16 i = off + 2;
+ u16 i = 2;
+
+ /* Remember : hdr is no longer valid at this point. */
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
/* No more room for encapsulation limit */
- if (i + sizeof (*tel) > off + optlen)
+ if (i + sizeof(*tel) > optlen)
break;
- tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+ tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i;
/* return index of option if found and valid */
if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
tel->length == 1)
- return i;
+ return i + off - nhoff;
/* else jump to next option */
if (tel->type)
i += tel->length + 2;
@@ -443,7 +453,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = hdr->nexthdr;
+ nexthdr = next;
off += optlen;
}
return 0;
@@ -1303,6 +1313,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel = key->label;
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index d5263dc364a9..b12e61b7b16c 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -72,10 +72,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
return ret;
}
-static bool rpfilter_is_local(const struct sk_buff *skb)
+static bool
+rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
- const struct rt6_info *rt = (const void *) skb_dst(skb);
- return rt && (rt->rt6i_flags & RTF_LOCAL);
+ return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct ipv6hdr *iph;
bool invert = info->flags & XT_RPFILTER_INVERT;
- if (rpfilter_is_local(skb))
+ if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;
iph = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 10090400c72f..eedee5d108d9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
@@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
skb_dst_set(nskb, dst);
+ nskb->mark = fl6.flowi6_mark;
+
skb_reserve(nskb, hh_len + dst->header_len);
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
ip6_dst_hoplimit(dst));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c947aad8bcc6..765facf03d45 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -18,13 +18,6 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-static bool fib6_is_local(const struct sk_buff *skb)
-{
- const struct rt6_info *rt = (const void *)skb_dst(skb);
-
- return rt && (rt->rt6i_flags & RTF_LOCAL);
-}
-
static int get_ifindex(const struct net_device *dev)
{
return dev ? dev->ifindex : 0;
@@ -164,8 +157,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
- nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+ nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ nft_in(pkt)->ifindex);
return;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4f6b067c8753..7ea85370c11c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2896,6 +2896,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_MULTIPATH]) {
cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+
+ err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
+ cfg->fc_mp_len);
+ if (err < 0)
+ goto errout;
}
if (tb[RTA_PREF]) {
@@ -2909,9 +2914,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_ENCAP])
cfg->fc_encap = tb[RTA_ENCAP];
- if (tb[RTA_ENCAP_TYPE])
+ if (tb[RTA_ENCAP_TYPE]) {
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ if (err < 0)
+ goto errout;
+ }
+
if (tb[RTA_EXPIRES]) {
unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index b172d85c650a..a855eb325b03 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -176,6 +176,8 @@ static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info)
val = nla_data(info->attrs[SEG6_ATTR_DST]);
t_new = kmemdup(val, sizeof(*val), GFP_KERNEL);
+ if (!t_new)
+ return -ENOMEM;
mutex_lock(&sdata->lock);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 1d60cb132835..c46f8cbf5ab5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -422,6 +422,7 @@ static const struct lwtunnel_encap_ops seg6_iptun_ops = {
.fill_encap = seg6_fill_encap_info,
.get_encap_size = seg6_encap_nlsize,
.cmp_encap = seg6_encap_cmp,
+ .owner = THIS_MODULE,
};
int __init seg6_iptunnel_init(void)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 73bc8fc68acd..cb8929681dc7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -469,7 +469,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -840,7 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9e2641d45587..206698bc93f4 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -40,8 +40,6 @@ void rate_control_rate_init(struct sta_info *sta)
ieee80211_sta_set_rx_nss(sta);
- ieee80211_recalc_min_chandef(sta->sdata);
-
if (!ref)
return;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 15fe97644ffe..5b77377e5a15 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
}
EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
-static u32 mpls_multipath_hash(struct mpls_route *rt,
- struct sk_buff *skb, bool bos)
+static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
{
struct mpls_entry_decoded dec;
+ unsigned int mpls_hdr_len = 0;
struct mpls_shim_hdr *hdr;
bool eli_seen = false;
int label_index;
u32 hash = 0;
- for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
+ for (label_index = 0; label_index < MAX_MP_SELECT_LABELS;
label_index++) {
- if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
+ mpls_hdr_len += sizeof(*hdr);
+ if (!pskb_may_pull(skb, mpls_hdr_len))
break;
/* Read and decode the current label */
@@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mpls_route *rt,
eli_seen = true;
}
- bos = dec.bos;
- if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
- sizeof(struct iphdr))) {
+ if (!dec.bos)
+ continue;
+
+ /* found bottom label; does skb have room for a header? */
+ if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) {
const struct iphdr *v4hdr;
- v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
- label_index);
+ v4hdr = (const struct iphdr *)(hdr + 1);
if (v4hdr->version == 4) {
hash = jhash_3words(ntohl(v4hdr->saddr),
ntohl(v4hdr->daddr),
v4hdr->protocol, hash);
} else if (v4hdr->version == 6 &&
- pskb_may_pull(skb, sizeof(*hdr) * label_index +
- sizeof(struct ipv6hdr))) {
+ pskb_may_pull(skb, mpls_hdr_len +
+ sizeof(struct ipv6hdr))) {
const struct ipv6hdr *v6hdr;
- v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
- label_index);
-
+ v6hdr = (const struct ipv6hdr *)(hdr + 1);
hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
hash = jhash_1word(v6hdr->nexthdr, hash);
}
}
+
+ break;
}
return hash;
}
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
- struct sk_buff *skb, bool bos)
+ struct sk_buff *skb)
{
int alive = ACCESS_ONCE(rt->rt_nhn_alive);
u32 hash = 0;
@@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
if (alive <= 0)
return NULL;
- hash = mpls_multipath_hash(rt, skb, bos);
+ hash = mpls_multipath_hash(rt, skb);
nh_index = hash % alive;
if (alive == rt->rt_nhn)
goto out;
@@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
hdr = mpls_hdr(skb);
dec = mpls_entry_decode(hdr);
- /* Pop the label */
- skb_pull(skb, sizeof(*hdr));
- skb_reset_network_header(skb);
-
- skb_orphan(skb);
-
rt = mpls_route_input_rcu(net, dec.label);
if (!rt)
goto drop;
- nh = mpls_select_multipath(rt, skb, dec.bos);
+ nh = mpls_select_multipath(rt, skb);
if (!nh)
goto drop;
@@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (!mpls_output_possible(out_dev))
goto drop;
+ /* Pop the label */
+ skb_pull(skb, sizeof(*hdr));
+ skb_reset_network_header(skb);
+
+ skb_orphan(skb);
+
if (skb_warn_if_lro(skb))
goto drop;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 2f7ccd934416..1d281c1ff7c1 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -215,6 +215,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = {
.fill_encap = mpls_fill_encap_info,
.get_encap_size = mpls_encap_nlsize,
.cmp_encap = mpls_encap_cmp,
+ .owner = THIS_MODULE,
};
static int __init mpls_iptunnel_init(void)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 63729b489c2c..bbc45f8a7b2d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -494,7 +494,7 @@ config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
- This option adds the "meta" expression that you can use to match
+ This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
config NFT_SET_RBTREE
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3a073cd9fcf4..4e8083c5e01d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -85,11 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 64u
-/* upper bound of scan intervals */
-#define GC_INTERVAL_MAX (2 * HZ)
-/* maximum conntracks to evict per gc run */
-#define GC_MAX_EVICTS 256u
+#define GC_MAX_BUCKETS_DIV 128u
+/* upper bound of full table scan */
+#define GC_MAX_SCAN_JIFFIES (16u * HZ)
+/* desired ratio of entries found to be expired */
+#define GC_EVICT_RATIO 50u
static struct conntrack_gc_work conntrack_gc_work;
@@ -938,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
+ unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
@@ -979,8 +980,7 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched_rcu_qs();
- } while (++buckets < goal &&
- expired_count < GC_MAX_EVICTS);
+ } while (++buckets < goal);
if (gc_work->exiting)
return;
@@ -997,27 +997,25 @@ static void gc_worker(struct work_struct *work)
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
- * Normally, expired_count will be 0, this increases the next_run time
- * to priorize 2) above.
+ * Normally, expire ratio will be close to 0.
*
- * As soon as a timed-out entry is found, move towards 1) and increase
- * the scan frequency.
- * In case we have lots of evictions next scan is done immediately.
+ * As soon as a sizeable fraction of the entries have expired
+ * increase scan frequency.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
- gc_work->next_gc_run = 0;
- next_run = 0;
- } else if (expired_count) {
- gc_work->next_gc_run /= 2U;
- next_run = msecs_to_jiffies(1);
+ if (ratio > GC_EVICT_RATIO) {
+ gc_work->next_gc_run = min_interval;
} else {
- if (gc_work->next_gc_run < GC_INTERVAL_MAX)
- gc_work->next_gc_run += msecs_to_jiffies(1);
+ unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
- next_run = gc_work->next_gc_run;
+ BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
+
+ gc_work->next_gc_run += min_interval;
+ if (gc_work->next_gc_run > max)
+ gc_work->next_gc_run = max;
}
+ next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
@@ -1025,7 +1023,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = GC_INTERVAL_MAX;
+ gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1917,7 +1915,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3dca90dc24ad..ffb9e8ada899 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -13,7 +13,6 @@
/* Internal logging interface, which relies on the real
LOG target modules */
-#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0db5f9782265..1b913760f205 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -928,7 +928,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
}
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
- [NFTA_CHAIN_TABLE] = { .type = NLA_STRING },
+ [NFTA_CHAIN_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
[NFTA_CHAIN_NAME] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
@@ -1854,7 +1855,8 @@ static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
- [NFTA_RULE_TABLE] = { .type = NLA_STRING },
+ [NFTA_RULE_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
@@ -2443,7 +2445,8 @@ nft_select_set_ops(const struct nlattr * const nla[],
}
static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
- [NFTA_SET_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_NAME] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_FLAGS] = { .type = NLA_U32 },
@@ -3084,9 +3087,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
}
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
enum nft_registers dreg;
@@ -3192,8 +3195,10 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
- [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
- [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
+ [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@@ -3303,9 +3308,9 @@ struct nft_set_dump_args {
};
static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
struct nft_set_dump_args *args;
@@ -3317,7 +3322,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
u8 genmask = nft_genmask_cur(net);
- const struct nft_set *set;
+ struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
@@ -3740,10 +3745,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err5;
}
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
+ err = -ENFILE;
+ goto err6;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err6:
+ set->ops->remove(set, &elem);
err5:
kfree(trans);
err4:
@@ -3790,15 +3803,9 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- if (set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
- return -ENFILE;
-
err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
- if (err < 0) {
- atomic_dec(&set->nelems);
+ if (err < 0)
break;
- }
}
return err;
}
@@ -3883,9 +3890,9 @@ err1:
}
static int nft_flush_set(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
struct nft_trans *trans;
int err;
@@ -3899,9 +3906,10 @@ static int nft_flush_set(const struct nft_ctx *ctx,
err = -ENOENT;
goto err1;
}
+ set->ndeact++;
- nft_trans_elem_set(trans) = (struct nft_set *)set;
- nft_trans_elem(trans) = *((struct nft_set_elem *)elem);
+ nft_trans_elem_set(trans) = set;
+ nft_trans_elem(trans) = *elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -4032,8 +4040,10 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
- [NFTA_OBJ_TABLE] = { .type = NLA_STRING },
- [NFTA_OBJ_NAME] = { .type = NLA_STRING },
+ [NFTA_OBJ_TABLE] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
+ [NFTA_OBJ_NAME] = { .type = NLA_STRING,
+ .len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
};
@@ -4262,10 +4272,11 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter->table[0] &&
+ if (filter && filter->table[0] &&
strcmp(filter->table, table->name))
goto cont;
- if (filter->type != NFT_OBJECT_UNSPEC &&
+ if (filter &&
+ filter->type != NFT_OBJECT_UNSPEC &&
obj->type->type != filter->type)
goto cont;
@@ -5009,9 +5020,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
const struct nft_chain *chain);
static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
const struct nft_set_iter *iter,
- const struct nft_set_elem *elem)
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
const struct nft_data *data;
@@ -5035,7 +5046,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
{
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- const struct nft_set *set;
+ struct nft_set *set;
struct nft_set_binding *binding;
struct nft_set_iter iter;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 7de2f46734a4..049ad2d9ee66 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -98,7 +98,8 @@ out:
}
static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
- [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
[NFTA_DYNSET_OP] = { .type = NLA_U32 },
[NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 6271e40a3dd6..6f6e64423643 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -39,7 +39,8 @@ static void nft_log_eval(const struct nft_expr *expr,
static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_GROUP] = { .type = NLA_U16 },
- [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
+ [NFTA_LOG_PREFIX] = { .type = NLA_STRING,
+ .len = NF_LOG_PREFIXLEN - 1 },
[NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
[NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
[NFTA_LOG_LEVEL] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index d4f97fa7e21d..e21aea7e5ec8 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -49,7 +49,8 @@ static void nft_lookup_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
- [NFTA_LOOKUP_SET] = { .type = NLA_STRING },
+ [NFTA_LOOKUP_SET] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 415a65ba2b85..1ae8c49ca4a1 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -193,10 +193,12 @@ nft_objref_select_ops(const struct nft_ctx *ctx,
}
static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = {
- [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING },
+ [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING,
+ .len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 },
[NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 },
- [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING,
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_OBJREF_SET_ID] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 1e20e2bbb6d9..e36069fb76ae 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -212,7 +212,7 @@ static void nft_hash_remove(const struct nft_set *set,
rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
}
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_hash *priv = nft_set_priv(set);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 08376e50f6cd..f06f55ee516d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -221,7 +221,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
- const struct nft_set *set,
+ struct nft_set *set,
struct nft_set_iter *iter)
{
const struct nft_rbtree *priv = nft_set_priv(set);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b9e1a13b4ba3..3d555c79a7b5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1976,7 +1976,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
return -EINVAL;
*len -= sizeof(vnet_hdr);
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le()))
+ if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true))
return -EINVAL;
return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2237,7 +2237,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (po->has_vnet_hdr) {
if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr),
- vio_le())) {
+ vio_le(), true)) {
spin_lock(&sk->sk_receive_queue.lock);
goto drop_n_account;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5ed8e79bf102..64dfd35ccdcc 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -222,7 +222,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
rcu_read_lock();
- res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+ res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 7e869d0cca69..4f5a2b580aa5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
goto out;
}
- segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG);
if (IS_ERR(segs))
goto out;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 318c6786d653..37eeab7899fc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -235,8 +235,12 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
sctp_assoc_t id)
{
struct sctp_association *addr_asoc = NULL, *id_asoc = NULL;
- struct sctp_transport *transport;
+ struct sctp_af *af = sctp_get_af_specific(addr->ss_family);
union sctp_addr *laddr = (union sctp_addr *)addr;
+ struct sctp_transport *transport;
+
+ if (sctp_verify_addr(sk, laddr, af->sockaddr_len))
+ return NULL;
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
laddr,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d2f4c2b08ab..27753325e06e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n)
write_lock_bh(&n->lock);
}
+static void tipc_node_write_unlock_fast(struct tipc_node *n)
+{
+ write_unlock_bh(&n->lock);
+}
+
static void tipc_node_write_unlock(struct tipc_node *n)
{
struct net *net = n->net;
@@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_add_tail(subscr, &n->publ_list);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
@@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_del_init(subscr);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 215849ce453d..3cd6402e812c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -86,12 +86,12 @@ struct outqueue_entry {
static void tipc_recv_work(struct work_struct *work);
static void tipc_send_work(struct work_struct *work);
static void tipc_clean_outqueues(struct tipc_conn *con);
-static void tipc_sock_release(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct sockaddr_tipc *saddr = con->server->saddr;
+ struct tipc_server *s = con->server;
+ struct sockaddr_tipc *saddr = s->saddr;
struct socket *sock = con->sock;
struct sock *sk;
@@ -103,9 +103,13 @@ static void tipc_conn_kref_release(struct kref *kref)
}
saddr->scope = -TIPC_NODE_SCOPE;
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
- tipc_sock_release(con);
sock_release(sock);
con->sock = NULL;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
}
tipc_clean_outqueues(con);
@@ -128,8 +132,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
spin_lock_bh(&s->idr_lock);
con = idr_find(&s->conn_idr, conid);
- if (con)
+ if (con && test_bit(CF_CONNECTED, &con->flags))
conn_get(con);
+ else
+ con = NULL;
spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -186,26 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con)
write_unlock_bh(&sk->sk_callback_lock);
}
-static void tipc_sock_release(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
-
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
-
- tipc_unregister_callbacks(con);
-}
-
static void tipc_close_conn(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+ tipc_unregister_callbacks(con);
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
+ if (con->conid)
+ s->tipc_conn_release(con->conid, con->usr_data);
/* We shouldn't flush pending works as we may be in the
* thread. In fact the races with pending rx/tx work structs
@@ -458,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
if (!con)
return -EINVAL;
+ if (!test_bit(CF_CONNECTED, &con->flags)) {
+ conn_put(con);
+ return 0;
+ }
+
e = tipc_alloc_entry(data, len);
if (!e) {
conn_put(con);
@@ -471,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
- if (test_bit(CF_CONNECTED, &con->flags)) {
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
- } else {
+ if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
- }
return 0;
}
@@ -500,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
int ret;
spin_lock_bh(&con->outqueue_lock);
- while (1) {
+ while (test_bit(CF_CONNECTED, &con->flags)) {
e = list_entry(con->outqueue.next, struct outqueue_entry,
list);
if ((struct list_head *) e == &con->outqueue)
@@ -623,14 +619,12 @@ int tipc_server_start(struct tipc_server *s)
void tipc_server_stop(struct tipc_server *s)
{
struct tipc_conn *con;
- int total = 0;
int id;
spin_lock_bh(&s->idr_lock);
- for (id = 0; total < s->idr_in_use; id++) {
+ for (id = 0; s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
if (con) {
- total++;
spin_unlock_bh(&s->idr_lock);
tipc_close_conn(con);
spin_lock_bh(&s->idr_lock);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0dd02244e21d..9d94e65d0894 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,6 +54,8 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
+static void tipc_subscrp_put(struct tipc_subscription *subscription);
+static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
+ tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
{
struct tipc_subscription *sub = (struct tipc_subscription *)data;
- struct tipc_subscriber *subscriber = sub->subscriber;
/* Notify subscriber of timeout */
tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
TIPC_SUBSCR_TIMEOUT, 0, 0);
- spin_lock_bh(&subscriber->lock);
- tipc_subscrp_delete(sub);
- spin_unlock_bh(&subscriber->lock);
-
- tipc_subscrb_put(subscriber);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrb_kref_release(struct kref *kref)
{
- struct tipc_subscriber *subcriber = container_of(kref,
- struct tipc_subscriber, kref);
-
- kfree(subcriber);
+ kfree(container_of(kref,struct tipc_subscriber, kref));
}
static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
@@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
kref_get(&subscriber->kref);
}
+static void tipc_subscrp_kref_release(struct kref *kref)
+{
+ struct tipc_subscription *sub = container_of(kref,
+ struct tipc_subscription,
+ kref);
+ struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ struct tipc_subscriber *subscriber = sub->subscriber;
+
+ spin_lock_bh(&subscriber->lock);
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ atomic_dec(&tn->subscription_count);
+ spin_unlock_bh(&subscriber->lock);
+ kfree(sub);
+ tipc_subscrb_put(subscriber);
+}
+
+static void tipc_subscrp_put(struct tipc_subscription *subscription)
+{
+ kref_put(&subscription->kref, tipc_subscrp_kref_release);
+}
+
+static void tipc_subscrp_get(struct tipc_subscription *subscription)
+{
+ kref_get(&subscription->kref);
+}
+
+/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
+ * subscriptions for a given subscriber.
+ */
+static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
+ struct tipc_subscr *s)
+{
+ struct list_head *subscription_list = &subscriber->subscrp_list;
+ struct tipc_subscription *sub, *temp;
+
+ spin_lock_bh(&subscriber->lock);
+ list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
+ if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
+ continue;
+
+ tipc_subscrp_get(sub);
+ spin_unlock_bh(&subscriber->lock);
+ tipc_subscrp_delete(sub);
+ tipc_subscrp_put(sub);
+ spin_lock_bh(&subscriber->lock);
+
+ if (s)
+ break;
+ }
+ spin_unlock_bh(&subscriber->lock);
+}
+
static struct tipc_subscriber *tipc_subscrb_create(int conid)
{
struct tipc_subscriber *subscriber;
@@ -177,8 +226,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
pr_warn("Subscriber rejected, no memory\n");
return NULL;
}
- kref_init(&subscriber->kref);
INIT_LIST_HEAD(&subscriber->subscrp_list);
+ kref_init(&subscriber->kref);
subscriber->conid = conid;
spin_lock_init(&subscriber->lock);
@@ -187,55 +236,22 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Destroy any existing subscriptions for subscriber */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- }
- spin_unlock_bh(&subscriber->lock);
-
+ tipc_subscrb_subscrp_delete(subscriber, NULL);
tipc_subscrb_put(subscriber);
}
static void tipc_subscrp_delete(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- kfree(sub);
- atomic_dec(&tn->subscription_count);
+ if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Find first matching subscription, exit if not found */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) ||
- del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- break;
- }
- }
- spin_unlock_bh(&subscriber->lock);
+ tipc_subscrb_subscrp_delete(subscriber, s);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -272,6 +288,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
sub->swap = swap;
memcpy(&sub->evt.s, s, sizeof(*s));
atomic_inc(&tn->subscription_count);
+ kref_init(&sub->kref);
return sub;
}
@@ -288,17 +305,16 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
spin_lock_bh(&subscriber->lock);
list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- tipc_subscrb_get(subscriber);
sub->subscriber = subscriber;
tipc_nametbl_subscribe(sub);
+ tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
+ setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
timeout = htohl(sub->evt.s.timeout, swap);
- if (timeout == TIPC_WAIT_FOREVER)
- return;
- setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
- mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
+ if (timeout != TIPC_WAIT_FOREVER)
+ mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
}
/* Handle one termination request for the subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index be60103082c9..ffdc214c117a 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -57,6 +57,7 @@ struct tipc_subscriber;
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
+ struct kref kref;
struct tipc_subscriber *subscriber;
struct net *net;
struct timer_list timer;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 127656ebe7be..cef79873b09d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path = { NULL, NULL };
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ if (sun_path[0]) {
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
+ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ goto out;
+ }
+ }
+
err = mutex_lock_interruptible(&u->bindlock);
if (err)
- goto out;
+ goto out_put;
err = -EINVAL;
if (u->addr)
@@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
atomic_set(&addr->refcnt, 1);
if (sun_path[0]) {
- struct path path;
- umode_t mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
- if (err) {
- if (err == -EEXIST)
- err = -EADDRINUSE;
- unix_release_addr(addr);
- goto out_up;
- }
addr->hash = UNIX_HASH_SIZE;
hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
@@ -1065,6 +1067,9 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->bindlock);
+out_put:
+ if (err)
+ path_put(&path);
out:
return err;
}