aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h9
-rw-r--r--net/6lowpan/core.c8
-rw-r--r--net/6lowpan/debugfs.c22
-rw-r--r--net/6lowpan/iphc.c111
-rw-r--r--net/6lowpan/nhc_udp.c2
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/batman-adv/bat_iv_ogm.c171
-rw-r--r--net/batman-adv/bat_v.c21
-rw-r--r--net/batman-adv/bat_v_ogm.c219
-rw-r--r--net/batman-adv/bitarray.c16
-rw-r--r--net/batman-adv/bitarray.h15
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c333
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h43
-rw-r--r--net/batman-adv/debugfs.c21
-rw-r--r--net/batman-adv/distributed-arp-table.c31
-rw-r--r--net/batman-adv/fragmentation.c12
-rw-r--r--net/batman-adv/gateway_client.c12
-rw-r--r--net/batman-adv/hard-interface.c40
-rw-r--r--net/batman-adv/hard-interface.h3
-rw-r--r--net/batman-adv/hash.h6
-rw-r--r--net/batman-adv/icmp_socket.c24
-rw-r--r--net/batman-adv/main.c26
-rw-r--r--net/batman-adv/main.h11
-rw-r--r--net/batman-adv/multicast.c11
-rw-r--r--net/batman-adv/network-coding.c45
-rw-r--r--net/batman-adv/originator.c60
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/packet.h3
-rw-r--r--net/batman-adv/routing.c57
-rw-r--r--net/batman-adv/routing.h6
-rw-r--r--net/batman-adv/send.c16
-rw-r--r--net/batman-adv/soft-interface.c60
-rw-r--r--net/batman-adv/soft-interface.h10
-rw-r--r--net/batman-adv/sysfs.c9
-rw-r--r--net/batman-adv/translation-table.c88
-rw-r--r--net/batman-adv/types.h15
-rw-r--r--net/bluetooth/6lowpan.c93
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_mdb.c124
-rw-r--r--net/bridge/br_multicast.c20
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netfilter_ipv6.c10
-rw-r--r--net/bridge/br_netlink.c78
-rw-r--r--net/bridge/br_private.h22
-rw-r--r--net/bridge/br_sysfs_br.c17
-rw-r--r--net/bridge/br_vlan.c109
-rw-r--r--net/ceph/auth.c8
-rw-r--r--net/ceph/auth_none.c71
-rw-r--r--net/ceph/auth_none.h3
-rw-r--r--net/ceph/auth_x.c21
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/osd_client.c6
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c51
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/gen_stats.c35
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-procfs.c3
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/rtnetlink.c96
-rw-r--r--net/core/skbuff.c247
-rw-r--r--net/core/sock.c53
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dccp/input.c2
-rw-r--r--net/dccp/ipv4.c24
-rw-r--r--net/dccp/ipv6.c24
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c8
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/slave.c100
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h14
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ieee802154/6lowpan/tx.c14
-rw-r--r--net/ieee802154/nl802154.c10
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fou.c6
-rw-r--r--net/ipv4/gre_demux.c61
-rw-r--r--net/ipv4/icmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/ip_forward.c6
-rw-r--r--net/ipv4/ip_fragment.c14
-rw-r--r--net/ipv4/ip_gre.c255
-rw-r--r--net/ipv4/ip_input.c42
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c18
-rw-r--r--net/ipv4/netfilter/arp_tables.c223
-rw-r--r--net/ipv4/netfilter/ip_tables.c250
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c47
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp.c47
-rw-r--r--net/ipv4/tcp_bic.c6
-rw-r--r--net/ipv4/tcp_cdg.c34
-rw-r--r--net/ipv4/tcp_cubic.c26
-rw-r--r--net/ipv4/tcp_fastopen.c14
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_illinois.c21
-rw-r--r--net/ipv4/tcp_input.c163
-rw-r--r--net/ipv4/tcp_ipv4.c52
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_minisocks.c14
-rw-r--r--net/ipv4/tcp_output.c51
-rw-r--r--net/ipv4/tcp_recovery.c4
-rw-r--r--net/ipv4/tcp_timer.c24
-rw-r--r--net/ipv4/tcp_vegas.c6
-rw-r--r--net/ipv4/tcp_vegas.h2
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_westwood.c7
-rw-r--r--net/ipv4/tcp_yeah.c7
-rw-r--r--net/ipv4/udp.c72
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c48
-rw-r--r--net/ipv6/datagram.c13
-rw-r--r--net/ipv6/exthdrs.c66
-rw-r--r--net/ipv6/icmp.c45
-rw-r--r--net/ipv6/ila/ila.h79
-rw-r--r--net/ipv6/ila/ila_common.c81
-rw-r--r--net/ipv6/ila/ila_lwt.c54
-rw-r--r--net/ipv6/ila/ila_xlat.c161
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c438
-rw-r--r--net/ipv6/ip6_input.c39
-rw-r--r--net/ipv6/ip6_output.c76
-rw-r--r--net/ipv6/ip6_tunnel.c268
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c235
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/ping.c12
-rw-r--r--net/ipv6/raw.c33
-rw-r--r--net/ipv6/reassembly.c32
-rw-r--r--net/ipv6/route.c42
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/tcp_ipv6.c41
-rw-r--r--net/ipv6/udp.c129
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip6.c33
-rw-r--r--net/l3mdev/l3mdev.c63
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c162
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c414
-rw-r--r--net/netfilter/nf_conntrack_expect.c83
-rw-r--r--net/netfilter/nf_conntrack_helper.c12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c29
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c13
-rw-r--r--net/netfilter/nf_nat_core.c39
-rw-r--r--net/netfilter/nf_tables_api.c78
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c6
-rw-r--r--net/netfilter/nft_ct.c30
-rw-r--r--net/netfilter/nft_rbtree.c49
-rw-r--r--net/nfc/nci/core.c117
-rw-r--r--net/nfc/nci/ntf.c2
-rw-r--r--net/nfc/nci/rsp.c23
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c27
-rw-r--r--net/qrtr/Kconfig24
-rw-r--r--net/qrtr/Makefile2
-rw-r--r--net/qrtr/qrtr.c1007
-rw-r--r--net/qrtr/qrtr.h31
-rw-r--r--net/qrtr/smd.c117
-rw-r--r--net/rds/tcp.c3
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c8
-rw-r--r--net/rds/tcp_listen.c54
-rw-r--r--net/rds/tcp_recv.c14
-rw-r--r--net/rxrpc/ar-input.c4
-rw-r--r--net/sched/act_api.c7
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_u32.c7
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_fq_codel.c89
-rw-r--r--net/sched/sch_generic.c44
-rw-r--r--net/sched/sch_netem.c61
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/input.c16
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sctp_diag.c11
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/ulpqueue.c25
-rw-r--r--net/socket.c3
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c6
-rw-r--r--net/tipc/core.c8
-rw-r--r--net/tipc/msg.h14
-rw-r--r--net/tipc/node.c27
-rw-r--r--net/tipc/node.h6
-rw-r--r--net/tipc/socket.c144
-rw-r--r--net/tipc/socket.h17
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/xfrm/xfrm_output.c3
228 files changed, 6007 insertions, 3570 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index d16bb4b14aa1..97ecc27aeca6 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -3,6 +3,15 @@
#include <linux/netdevice.h>
+#include <net/6lowpan.h>
+
+/* caller need to be sure it's dev->type is ARPHRD_6LOWPAN */
+static inline bool lowpan_is_ll(const struct net_device *dev,
+ enum lowpan_lltypes lltype)
+{
+ return lowpan_dev(dev)->lltype == lltype;
+}
+
#ifdef CONFIG_6LOWPAN_DEBUGFS
int lowpan_dev_debugfs_init(struct net_device *dev);
void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 34e44c0c0836..7a240b3eaed1 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -27,11 +27,11 @@ int lowpan_register_netdevice(struct net_device *dev,
dev->mtu = IPV6_MIN_MTU;
dev->priv_flags |= IFF_NO_QUEUE;
- lowpan_priv(dev)->lltype = lltype;
+ lowpan_dev(dev)->lltype = lltype;
- spin_lock_init(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_init(&lowpan_dev(dev)->ctx.lock);
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
- lowpan_priv(dev)->ctx.table[i].id = i;
+ lowpan_dev(dev)->ctx.table[i].id = i;
ret = register_netdevice(dev);
if (ret < 0)
@@ -85,7 +85,7 @@ static int lowpan_event(struct notifier_block *unused,
case NETDEV_DOWN:
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
- &lowpan_priv(dev)->ctx.table[i].flags);
+ &lowpan_dev(dev)->ctx.table[i].flags);
break;
default:
return NOTIFY_DONE;
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 0793a8157472..acbaa3db493b 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -172,7 +172,7 @@ static const struct file_operations lowpan_ctx_pfx_fops = {
static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
struct dentry *ctx, u8 id)
{
- struct lowpan_priv *lpriv = lowpan_priv(dev);
+ struct lowpan_dev *ldev = lowpan_dev(dev);
struct dentry *dentry, *root;
char buf[32];
@@ -185,25 +185,25 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
return -EINVAL;
dentry = debugfs_create_file("active", 0644, root,
- &lpriv->ctx.table[id],
+ &ldev->ctx.table[id],
&lowpan_ctx_flag_active_fops);
if (!dentry)
return -EINVAL;
dentry = debugfs_create_file("compression", 0644, root,
- &lpriv->ctx.table[id],
+ &ldev->ctx.table[id],
&lowpan_ctx_flag_c_fops);
if (!dentry)
return -EINVAL;
dentry = debugfs_create_file("prefix", 0644, root,
- &lpriv->ctx.table[id],
+ &ldev->ctx.table[id],
&lowpan_ctx_pfx_fops);
if (!dentry)
return -EINVAL;
dentry = debugfs_create_file("prefix_len", 0644, root,
- &lpriv->ctx.table[id],
+ &ldev->ctx.table[id],
&lowpan_ctx_plen_fops);
if (!dentry)
return -EINVAL;
@@ -247,21 +247,21 @@ static const struct file_operations lowpan_context_fops = {
int lowpan_dev_debugfs_init(struct net_device *dev)
{
- struct lowpan_priv *lpriv = lowpan_priv(dev);
+ struct lowpan_dev *ldev = lowpan_dev(dev);
struct dentry *contexts, *dentry;
int ret, i;
/* creating the root */
- lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
- if (!lpriv->iface_debugfs)
+ ldev->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
+ if (!ldev->iface_debugfs)
goto fail;
- contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs);
+ contexts = debugfs_create_dir("contexts", ldev->iface_debugfs);
if (!contexts)
goto remove_root;
dentry = debugfs_create_file("show", 0644, contexts,
- &lowpan_priv(dev)->ctx,
+ &lowpan_dev(dev)->ctx,
&lowpan_context_fops);
if (!dentry)
goto remove_root;
@@ -282,7 +282,7 @@ fail:
void lowpan_dev_debugfs_exit(struct net_device *dev)
{
- debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs);
+ debugfs_remove_recursive(lowpan_dev(dev)->iface_debugfs);
}
int __init lowpan_debugfs_init(void)
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 68c80f3c9add..8501dd532fe1 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -53,9 +53,6 @@
#include <net/6lowpan.h>
#include <net/ipv6.h>
-/* special link-layer handling */
-#include <net/mac802154.h>
-
#include "6lowpan_i.h"
#include "nhc.h"
@@ -156,32 +153,17 @@
#define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f)
#define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4)
-static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
- const void *lladdr)
-{
- /* fe:80::XXXX:XXXX:XXXX:XXXX
- * \_________________/
- * hwaddr
- */
- ipaddr->s6_addr[0] = 0xFE;
- ipaddr->s6_addr[1] = 0x80;
- memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN);
- /* second bit-flip (Universe/Local)
- * is done according RFC2464
- */
- ipaddr->s6_addr[8] ^= 0x02;
-}
-
-static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
- const void *lladdr)
+static inline void
+lowpan_iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
+ const void *lladdr)
{
const struct ieee802154_addr *addr = lladdr;
- u8 eui64[EUI64_ADDR_LEN] = { };
+ u8 eui64[EUI64_ADDR_LEN];
switch (addr->mode) {
case IEEE802154_ADDR_LONG:
ieee802154_le64_to_be64(eui64, &addr->extended_addr);
- iphc_uncompress_eui64_lladdr(ipaddr, eui64);
+ lowpan_iphc_uncompress_eui64_lladdr(ipaddr, eui64);
break;
case IEEE802154_ADDR_SHORT:
/* fe:80::ff:fe00:XXXX
@@ -207,7 +189,7 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
static struct lowpan_iphc_ctx *
lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id)
{
- struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id];
+ struct lowpan_iphc_ctx *ret = &lowpan_dev(dev)->ctx.table[id];
if (!lowpan_iphc_ctx_is_active(ret))
return NULL;
@@ -219,7 +201,7 @@ static struct lowpan_iphc_ctx *
lowpan_iphc_ctx_get_by_addr(const struct net_device *dev,
const struct in6_addr *addr)
{
- struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
+ struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table;
struct lowpan_iphc_ctx *ret = NULL;
struct in6_addr addr_pfx;
u8 addr_plen;
@@ -263,7 +245,7 @@ static struct lowpan_iphc_ctx *
lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
const struct in6_addr *addr)
{
- struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
+ struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table;
struct lowpan_iphc_ctx *ret = NULL;
struct in6_addr addr_mcast, network_pfx = {};
int i;
@@ -301,9 +283,10 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
*
* address_mode is the masked value for sam or dam value
*/
-static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
- struct in6_addr *ipaddr, u8 address_mode,
- const void *lladdr)
+static int lowpan_iphc_uncompress_addr(struct sk_buff *skb,
+ const struct net_device *dev,
+ struct in6_addr *ipaddr,
+ u8 address_mode, const void *lladdr)
{
bool fail;
@@ -332,12 +315,12 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
case LOWPAN_IPHC_SAM_11:
case LOWPAN_IPHC_DAM_11:
fail = false;
- switch (lowpan_priv(dev)->lltype) {
+ switch (lowpan_dev(dev)->lltype) {
case LOWPAN_LLTYPE_IEEE802154:
- iphc_uncompress_802154_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
break;
}
break;
@@ -360,11 +343,11 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
/* Uncompress address function for source context
* based address(non-multicast).
*/
-static int uncompress_ctx_addr(struct sk_buff *skb,
- const struct net_device *dev,
- const struct lowpan_iphc_ctx *ctx,
- struct in6_addr *ipaddr, u8 address_mode,
- const void *lladdr)
+static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb,
+ const struct net_device *dev,
+ const struct lowpan_iphc_ctx *ctx,
+ struct in6_addr *ipaddr,
+ u8 address_mode, const void *lladdr)
{
bool fail;
@@ -393,12 +376,12 @@ static int uncompress_ctx_addr(struct sk_buff *skb,
case LOWPAN_IPHC_SAM_11:
case LOWPAN_IPHC_DAM_11:
fail = false;
- switch (lowpan_priv(dev)->lltype) {
+ switch (lowpan_dev(dev)->lltype) {
case LOWPAN_LLTYPE_IEEE802154:
- iphc_uncompress_802154_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
break;
}
ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
@@ -657,22 +640,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
}
if (iphc1 & LOWPAN_IPHC_SAC) {
- spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid));
if (!ci) {
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
return -EINVAL;
}
pr_debug("SAC bit is set. Handle context based source address.\n");
- err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr,
- iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.saddr,
+ iphc1 & LOWPAN_IPHC_SAM_MASK,
+ saddr);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
} else {
/* Source address uncompression */
pr_debug("source address stateless compression\n");
- err = uncompress_addr(skb, dev, &hdr.saddr,
- iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
+ err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.saddr,
+ iphc1 & LOWPAN_IPHC_SAM_MASK,
+ saddr);
}
/* Check on error of previous branch */
@@ -681,10 +666,10 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
- spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
return -EINVAL;
}
@@ -693,7 +678,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
err = lowpan_uncompress_multicast_ctx_daddr(skb, ci,
&hdr.daddr,
iphc1 & LOWPAN_IPHC_DAM_MASK);
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
case LOWPAN_IPHC_M:
/* multicast */
@@ -701,22 +686,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
iphc1 & LOWPAN_IPHC_DAM_MASK);
break;
case LOWPAN_IPHC_DAC:
- spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
return -EINVAL;
}
/* Destination address context based uncompression */
pr_debug("DAC bit is set. Handle context based destination address.\n");
- err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr,
- iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.daddr,
+ iphc1 & LOWPAN_IPHC_DAM_MASK,
+ daddr);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
default:
- err = uncompress_addr(skb, dev, &hdr.daddr,
- iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
+ err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr,
+ iphc1 & LOWPAN_IPHC_DAM_MASK,
+ daddr);
pr_debug("dest: stateless compression mode %d dest %pI6c\n",
iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
break;
@@ -736,7 +723,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
return err;
}
- switch (lowpan_priv(dev)->lltype) {
+ switch (lowpan_dev(dev)->lltype) {
case LOWPAN_LLTYPE_IEEE802154:
if (lowpan_802154_cb(skb)->d_size)
hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size -
@@ -1033,7 +1020,7 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
skb->data, skb->len);
ipv6_daddr_type = ipv6_addr_type(&hdr->daddr);
- spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
if (ipv6_daddr_type & IPV6_ADDR_MULTICAST)
dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr);
else
@@ -1042,15 +1029,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
memcpy(&dci_entry, dci, sizeof(*dci));
cid |= dci->id;
}
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
- spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr);
if (sci) {
memcpy(&sci_entry, sci, sizeof(*sci));
cid |= (sci->id << 4);
}
- spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
+ spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
/* if cid is zero it will be compressed */
if (cid) {
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 69537a2eaab1..225d91906dfa 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -91,7 +91,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
* here, we obtain the hint from the remaining size of the
* frame
*/
- switch (lowpan_priv(skb->dev)->lltype) {
+ switch (lowpan_dev(skb->dev)->lltype) {
case LOWPAN_LLTYPE_IEEE802154:
if (lowpan_802154_cb(skb)->d_size)
uh.len = htons(lowpan_802154_cb(skb)->d_size -
diff --git a/net/Kconfig b/net/Kconfig
index a8934d8c8fda..b841c42e5c9b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -236,6 +236,7 @@ source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
source "net/l3mdev/Kconfig"
+source "net/qrtr/Kconfig"
config RPS
bool
diff --git a/net/Makefile b/net/Makefile
index 81d14119eab5..bdd14553a774 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ endif
ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
obj-y += l3mdev/
endif
+obj-$(CONFIG_QRTR) += qrtr/
diff --git a/net/atm/lec.c b/net/atm/lec.c
index cd3b37989057..e574a7e9db6f 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
static void lec_tx_timeout(struct net_device *dev)
{
pr_info("%s\n", dev->name);
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
netif_wake_queue(dev);
}
@@ -324,7 +324,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
out:
if (entry)
lec_arp_put(entry);
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
return NETDEV_TX_OK;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index cb2d1b9b0340..7f98a9d39883 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -32,6 +32,7 @@
#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/kref.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <linux/printk.h>
@@ -175,71 +176,107 @@ unlock:
}
/**
- * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
- * exclude the removed interface
+ * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
* @del_if_num: the index of the interface being removed
- *
- * Return: 0 on success, a negative error code otherwise.
*/
-static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
+static void
+batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
+ int max_if_num, int del_if_num)
{
- int ret = -ENOMEM;
- size_t chunk_size, if_offset;
- void *data_ptr = NULL;
-
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ size_t chunk_size;
+ size_t if_offset;
+ void *data_ptr;
- /* last interface was removed */
- if (max_if_num == 0)
- goto free_bcast_own;
+ lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC);
if (!data_ptr)
- goto unlock;
+ /* use old buffer when new one could not be allocated */
+ data_ptr = orig_node->bat_iv.bcast_own;
/* copy first part */
- memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
+ memmove(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
/* copy second part */
if_offset = (del_if_num + 1) * chunk_size;
- memcpy((char *)data_ptr + del_if_num * chunk_size,
- (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
- (max_if_num - del_if_num) * chunk_size);
+ memmove((char *)data_ptr + del_if_num * chunk_size,
+ (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
+ (max_if_num - del_if_num) * chunk_size);
-free_bcast_own:
- kfree(orig_node->bat_iv.bcast_own);
- orig_node->bat_iv.bcast_own = data_ptr;
+ /* bcast_own was shrunk down in new buffer; free old one */
+ if (orig_node->bat_iv.bcast_own != data_ptr) {
+ kfree(orig_node->bat_iv.bcast_own);
+ orig_node->bat_iv.bcast_own = data_ptr;
+ }
+}
+
+/**
+ * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ * @del_if_num: the index of the interface being removed
+ */
+static void
+batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
+ int max_if_num, int del_if_num)
+{
+ size_t if_offset;
+ void *data_ptr;
- if (max_if_num == 0)
- goto free_own_sum;
+ lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
- if (!data_ptr) {
- kfree(orig_node->bat_iv.bcast_own);
- goto unlock;
- }
+ if (!data_ptr)
+ /* use old buffer when new one could not be allocated */
+ data_ptr = orig_node->bat_iv.bcast_own_sum;
- memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
- del_if_num * sizeof(u8));
+ memmove(data_ptr, orig_node->bat_iv.bcast_own_sum,
+ del_if_num * sizeof(u8));
if_offset = (del_if_num + 1) * sizeof(u8);
- memcpy((char *)data_ptr + del_if_num * sizeof(u8),
- orig_node->bat_iv.bcast_own_sum + if_offset,
- (max_if_num - del_if_num) * sizeof(u8));
+ memmove((char *)data_ptr + del_if_num * sizeof(u8),
+ orig_node->bat_iv.bcast_own_sum + if_offset,
+ (max_if_num - del_if_num) * sizeof(u8));
+
+ /* bcast_own_sum was shrunk down in new buffer; free old one */
+ if (orig_node->bat_iv.bcast_own_sum != data_ptr) {
+ kfree(orig_node->bat_iv.bcast_own_sum);
+ orig_node->bat_iv.bcast_own_sum = data_ptr;
+ }
+}
-free_own_sum:
- kfree(orig_node->bat_iv.bcast_own_sum);
- orig_node->bat_iv.bcast_own_sum = data_ptr;
+/**
+ * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
+ * exclude the removed interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ * @del_if_num: the index of the interface being removed
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
+ int max_if_num, int del_if_num)
+{
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ if (max_if_num == 0) {
+ kfree(orig_node->bat_iv.bcast_own);
+ kfree(orig_node->bat_iv.bcast_own_sum);
+ orig_node->bat_iv.bcast_own = NULL;
+ orig_node->bat_iv.bcast_own_sum = NULL;
+ } else {
+ batadv_iv_ogm_drop_bcast_own_entry(orig_node, max_if_num,
+ del_if_num);
+ batadv_iv_ogm_drop_bcast_own_sum_entry(orig_node, max_if_num,
+ del_if_num);
+ }
- ret = 0;
-unlock:
spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
- return ret;
+ return 0;
}
/**
@@ -644,18 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
unsigned char *skb_buff;
unsigned int skb_size;
- if (!kref_get_unless_zero(&if_incoming->refcount))
- return;
-
- if (!kref_get_unless_zero(&if_outgoing->refcount))
- goto out_free_incoming;
-
/* own packet should always be scheduled */
if (!own_packet) {
if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"batman packet queue full\n");
- goto out_free_outgoing;
+ return;
}
}
@@ -681,6 +712,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
forw_packet_aggr->packet_len = packet_len;
memcpy(skb_buff, packet_buff, packet_len);
+ kref_get(&if_incoming->refcount);
+ kref_get(&if_outgoing->refcount);
forw_packet_aggr->own = own_packet;
forw_packet_aggr->if_incoming = if_incoming;
forw_packet_aggr->if_outgoing = if_outgoing;
@@ -710,10 +743,6 @@ out_free_forw_packet:
out_nomem:
if (!own_packet)
atomic_inc(&bat_priv->batman_queue_left);
-out_free_outgoing:
- batadv_hardif_put(if_outgoing);
-out_free_incoming:
- batadv_hardif_put(if_incoming);
}
/* aggregate a new packet into the existing ogm packet */
@@ -950,9 +979,15 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
continue;
+
+ if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
+ continue;
+
batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
*ogm_buff_len, hard_iface,
tmp_hard_iface, 1, send_time);
+
+ batadv_hardif_put(tmp_hard_iface);
}
rcu_read_unlock();
@@ -1133,13 +1168,13 @@ out:
* @if_incoming: interface where the packet was received
* @if_outgoing: interface for which the retransmission should be considered
*
- * Return: 1 if the link can be considered bidirectional, 0 otherwise
+ * Return: true if the link can be considered bidirectional, false otherwise
*/
-static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh_node,
- struct batadv_ogm_packet *batadv_ogm_packet,
- struct batadv_hard_iface *if_incoming,
- struct batadv_hard_iface *if_outgoing)
+static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_hard_iface *if_incoming,
+ struct batadv_hard_iface *if_outgoing)
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
@@ -1147,9 +1182,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
- int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0;
+ int tq_asym_penalty, inv_asym_penalty, if_num;
unsigned int combined_tq;
int tq_iface_penalty;
+ bool ret = false;
/* find corresponding one hop neighbor */
rcu_read_lock();
@@ -1261,7 +1297,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
* consider it bidirectional
*/
if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT)
- ret = 1;
+ ret = true;
out:
if (neigh_node)
@@ -1290,9 +1326,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
struct batadv_orig_ifinfo *orig_ifinfo = NULL;
struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
- int is_dup;
+ bool is_dup;
s32 seq_diff;
- int need_update = 0;
+ bool need_update = false;
int set_mark;
enum batadv_dup_status ret = BATADV_NO_DUP;
u32 seqno = ntohl(batadv_ogm_packet->seqno);
@@ -1402,7 +1438,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
struct sk_buff *skb_priv;
struct ethhdr *ethhdr;
u8 *prev_sender;
- int is_bidirect;
+ bool is_bidirect;
/* create a private copy of the skb, as some functions change tq value
* and/or flags.
@@ -1730,8 +1766,13 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (hard_iface->soft_iface != bat_priv->soft_iface)
continue;
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
if_incoming, hard_iface);
+
+ batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -1829,9 +1870,8 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
int batman_count = 0;
u32 i;
- seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
- "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
- "Nexthop", "outgoingIF", "Potential nexthops");
+ seq_puts(seq,
+ " Originator last-seen (#/255) Nexthop [outgoingIF]: Potential nexthops ...\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1911,8 +1951,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
struct batadv_hard_iface *hard_iface;
int batman_count = 0;
- seq_printf(seq, " %10s %-13s %s\n",
- "IF", "Neighbor", "last-seen");
+ seq_puts(seq, " IF Neighbor last-seen\n");
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 3315b9a598af..3ff8bd1b7bdc 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -32,10 +32,21 @@
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
+#include "hard-interface.h"
#include "hash.h"
#include "originator.h"
#include "packet.h"
+static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+ /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can
+ * set the interface as ACTIVE right away, without any risk of race
+ * condition
+ */
+ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
+ hard_iface->if_status = BATADV_IF_ACTIVE;
+}
+
static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
{
int ret;
@@ -151,8 +162,8 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
struct batadv_hard_iface *hard_iface;
int batman_count = 0;
- seq_printf(seq, " %-15s %s (%11s) [%10s]\n", "Neighbor",
- "last-seen", "throughput", "IF");
+ seq_puts(seq,
+ " Neighbor last-seen ( throughput) [ IF]\n");
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -191,9 +202,8 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv,
int batman_count = 0;
u32 i;
- seq_printf(seq, " %-15s %s (%11s) %17s [%10s]: %20s ...\n",
- "Originator", "last-seen", "throughput", "Nexthop",
- "outgoingIF", "Potential nexthops");
+ seq_puts(seq,
+ " Originator last-seen ( throughput) Nexthop [outgoingIF]: Potential nexthops ...\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -274,6 +284,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.name = "BATMAN_V",
+ .bat_iface_activate = batadv_v_iface_activate,
.bat_iface_enable = batadv_v_iface_enable,
.bat_iface_disable = batadv_v_iface_disable,
.bat_iface_update_mac = batadv_v_iface_update_mac,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index d9bcbe6e7d65..473ebb9a0e73 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -26,6 +26,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/random.h>
@@ -176,6 +177,9 @@ static void batadv_v_ogm_send(struct work_struct *work)
if (hard_iface->soft_iface != bat_priv->soft_iface)
continue;
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -185,10 +189,13 @@ static void batadv_v_ogm_send(struct work_struct *work)
/* this skb gets consumed by batadv_v_ogm_send_to_if() */
skb_tmp = skb_clone(skb, GFP_ATOMIC);
- if (!skb_tmp)
+ if (!skb_tmp) {
+ batadv_hardif_put(hard_iface);
break;
+ }
batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
+ batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -234,73 +241,6 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
}
/**
- * batadv_v_ogm_orig_update - update the originator status based on the received
- * OGM
- * @bat_priv: the bat priv with all the soft interface information
- * @orig_node: the originator to update
- * @neigh_node: the neighbour the OGM has been received from (to update)
- * @ogm2: the received OGM
- * @if_outgoing: the interface where this OGM is going to be forwarded through
- */
-static void
-batadv_v_ogm_orig_update(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- struct batadv_neigh_node *neigh_node,
- const struct batadv_ogm2_packet *ogm2,
- struct batadv_hard_iface *if_outgoing)
-{
- struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
- struct batadv_neigh_node *router = NULL;
- s32 neigh_seq_diff;
- u32 neigh_last_seqno;
- u32 router_last_seqno;
- u32 router_throughput, neigh_throughput;
-
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Searching and updating originator entry of received packet\n");
-
- /* if this neighbor already is our next hop there is nothing
- * to change
- */
- router = batadv_orig_router_get(orig_node, if_outgoing);
- if (router == neigh_node)
- goto out;
-
- /* don't consider neighbours with worse throughput.
- * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
- * the last received seqno from our best next hop.
- */
- if (router) {
- router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
- neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
-
- /* if these are not allocated, something is wrong. */
- if (!router_ifinfo || !neigh_ifinfo)
- goto out;
-
- neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
- router_last_seqno = router_ifinfo->bat_v.last_seqno;
- neigh_seq_diff = neigh_last_seqno - router_last_seqno;
- router_throughput = router_ifinfo->bat_v.throughput;
- neigh_throughput = neigh_ifinfo->bat_v.throughput;
-
- if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
- (router_throughput >= neigh_throughput))
- goto out;
- }
-
- batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
-
-out:
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
-}
-
-/**
* batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded
* with B.A.T.M.A.N. V OGMs
* @bat_priv: the bat priv with all the soft interface information
@@ -347,10 +287,12 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
}
/**
- * batadv_v_ogm_forward - forward an OGM to the given outgoing interface
+ * batadv_v_ogm_forward - check conditions and forward an OGM to the given
+ * outgoing interface
* @bat_priv: the bat priv with all the soft interface information
* @ogm_received: previously received OGM to be forwarded
- * @throughput: throughput to announce, may vary per outgoing interface
+ * @orig_node: the originator which has been updated
+ * @neigh_node: the neigh_node through with the OGM has been received
* @if_incoming: the interface on which this OGM was received on
* @if_outgoing: the interface to which the OGM has to be forwarded to
*
@@ -359,28 +301,57 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
*/
static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
const struct batadv_ogm2_packet *ogm_received,
- u32 throughput,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node,
struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing)
{
+ struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
+ struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+ struct batadv_neigh_node *router = NULL;
struct batadv_ogm2_packet *ogm_forward;
unsigned char *skb_buff;
struct sk_buff *skb;
size_t packet_len;
u16 tvlv_len;
+ /* only forward for specific interfaces, not for the default one. */
+ if (if_outgoing == BATADV_IF_DEFAULT)
+ goto out;
+
+ orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
+ if (!orig_ifinfo)
+ goto out;
+
+ /* acquire possibly updated router */
+ router = batadv_orig_router_get(orig_node, if_outgoing);
+
+ /* strict rule: forward packets coming from the best next hop only */
+ if (neigh_node != router)
+ goto out;
+
+ /* don't forward the same seqno twice on one interface */
+ if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm_received->seqno))
+ goto out;
+
+ orig_ifinfo->last_seqno_forwarded = ntohl(ogm_received->seqno);
+
if (ogm_received->ttl <= 1) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
- return;
+ goto out;
}
+ neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+ if (!neigh_ifinfo)
+ goto out;
+
tvlv_len = ntohs(ogm_received->tvlv_len);
packet_len = BATADV_OGM2_HLEN + tvlv_len;
skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev,
ETH_HLEN + packet_len);
if (!skb)
- return;
+ goto out;
skb_reserve(skb, ETH_HLEN);
skb_buff = skb_put(skb, packet_len);
@@ -388,15 +359,23 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
/* apply forward penalty */
ogm_forward = (struct batadv_ogm2_packet *)skb_buff;
- ogm_forward->throughput = htonl(throughput);
+ ogm_forward->throughput = htonl(neigh_ifinfo->bat_v.throughput);
ogm_forward->ttl--;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n",
- if_outgoing->net_dev->name, throughput, ogm_forward->ttl,
- if_incoming->net_dev->name);
+ if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
+ ogm_forward->ttl, if_incoming->net_dev->name);
batadv_v_ogm_send_to_if(skb, if_outgoing);
+
+out:
+ if (orig_ifinfo)
+ batadv_orig_ifinfo_put(orig_ifinfo);
+ if (router)
+ batadv_neigh_node_put(router);
+ if (neigh_ifinfo)
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
/**
@@ -493,8 +472,10 @@ out:
* @neigh_node: the neigh_node through with the OGM has been received
* @if_incoming: the interface where this packet was received
* @if_outgoing: the interface for which the packet should be considered
+ *
+ * Return: true if the packet should be forwarded, false otherwise
*/
-static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
+static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
const struct ethhdr *ethhdr,
const struct batadv_ogm2_packet *ogm2,
struct batadv_orig_node *orig_node,
@@ -503,14 +484,14 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_neigh_node *router = NULL;
- struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
struct batadv_orig_node *orig_neigh_node = NULL;
- struct batadv_orig_ifinfo *orig_ifinfo = NULL;
struct batadv_neigh_node *orig_neigh_router = NULL;
-
- neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
- if (!neigh_ifinfo)
- goto out;
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
+ u32 router_throughput, neigh_throughput;
+ u32 router_last_seqno;
+ u32 neigh_last_seqno;
+ s32 neigh_seq_diff;
+ bool forward = false;
orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source);
if (!orig_neigh_node)
@@ -529,47 +510,57 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
goto out;
}
- if (router)
- batadv_neigh_node_put(router);
+ /* Mark the OGM to be considered for forwarding, and update routes
+ * if needed.
+ */
+ forward = true;
- /* Update routes, and check if the OGM is from the best next hop */
- batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2,
- if_outgoing);
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Searching and updating originator entry of received packet\n");
- orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
- if (!orig_ifinfo)
+ /* if this neighbor already is our next hop there is nothing
+ * to change
+ */
+ if (router == neigh_node)
goto out;
- /* don't forward the same seqno twice on one interface */
- if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm2->seqno))
- goto out;
+ /* don't consider neighbours with worse throughput.
+ * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
+ * the last received seqno from our best next hop.
+ */
+ if (router) {
+ router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
+ neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
- /* acquire possibly updated router */
- router = batadv_orig_router_get(orig_node, if_outgoing);
+ /* if these are not allocated, something is wrong. */
+ if (!router_ifinfo || !neigh_ifinfo)
+ goto out;
- /* strict rule: forward packets coming from the best next hop only */
- if (neigh_node != router)
- goto out;
+ neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
+ router_last_seqno = router_ifinfo->bat_v.last_seqno;
+ neigh_seq_diff = neigh_last_seqno - router_last_seqno;
+ router_throughput = router_ifinfo->bat_v.throughput;
+ neigh_throughput = neigh_ifinfo->bat_v.throughput;
- /* only forward for specific interface, not for the default one. */
- if (if_outgoing != BATADV_IF_DEFAULT) {
- orig_ifinfo->last_seqno_forwarded = ntohl(ogm2->seqno);
- batadv_v_ogm_forward(bat_priv, ogm2,
- neigh_ifinfo->bat_v.throughput,
- if_incoming, if_outgoing);
+ if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
+ (router_throughput >= neigh_throughput))
+ goto out;
}
+ batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
out:
- if (orig_ifinfo)
- batadv_orig_ifinfo_put(orig_ifinfo);
if (router)
batadv_neigh_node_put(router);
if (orig_neigh_router)
batadv_neigh_node_put(orig_neigh_router);
if (orig_neigh_node)
batadv_orig_node_put(orig_neigh_node);
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
if (neigh_ifinfo)
batadv_neigh_ifinfo_put(neigh_ifinfo);
+
+ return forward;
}
/**
@@ -592,6 +583,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing)
{
int seqno_age;
+ bool forward;
/* first, update the metric with according sanity checks */
seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node,
@@ -610,8 +602,14 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
ntohs(ogm2->tvlv_len));
/* if the metric update went through, update routes if needed */
- batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node,
- neigh_node, if_incoming, if_outgoing);
+ forward = batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node,
+ neigh_node, if_incoming,
+ if_outgoing);
+
+ /* if the routes have been processed correctly, check and forward */
+ if (forward)
+ batadv_v_ogm_forward(bat_priv, ogm2, orig_node, neigh_node,
+ if_incoming, if_outgoing);
}
/**
@@ -713,9 +711,14 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (hard_iface->soft_iface != bat_priv->soft_iface)
continue;
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
orig_node, neigh_node,
if_incoming, hard_iface);
+
+ batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
out:
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index b56bb000a0ab..a0c7913837a5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -38,11 +38,11 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
* the last sequence number
* @set_mark: whether this packet should be marked in seq_bits
*
- * Return: 1 if the window was moved (either new or very old),
- * 0 if the window was not moved/shifted.
+ * Return: true if the window was moved (either new or very old),
+ * false if the window was not moved/shifted.
*/
-int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
- int set_mark)
+bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
+ s32 seq_num_diff, int set_mark)
{
struct batadv_priv *bat_priv = priv;
@@ -52,7 +52,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
if (seq_num_diff <= 0 && seq_num_diff > -BATADV_TQ_LOCAL_WINDOW_SIZE) {
if (set_mark)
batadv_set_bit(seq_bits, -seq_num_diff);
- return 0;
+ return false;
}
/* sequence number is slightly newer, so we shift the window and
@@ -63,7 +63,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
if (set_mark)
batadv_set_bit(seq_bits, 0);
- return 1;
+ return true;
}
/* sequence number is much newer, probably missed a lot of packets */
@@ -75,7 +75,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
if (set_mark)
batadv_set_bit(seq_bits, 0);
- return 1;
+ return true;
}
/* received a much older packet. The other host either restarted
@@ -94,5 +94,5 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
if (set_mark)
batadv_set_bit(seq_bits, 0);
- return 1;
+ return true;
}
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 3e41bb80eb81..0e6e9d09078c 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -22,6 +22,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
+#include <linux/stddef.h>
#include <linux/types.h>
/**
@@ -31,17 +32,17 @@
* @last_seqno: latest sequence number in seq_bits
* @curr_seqno: sequence number to test for
*
- * Return: 1 if the corresponding bit in the given seq_bits indicates true
- * and curr_seqno is within range of last_seqno. Otherwise returns 0.
+ * Return: true if the corresponding bit in the given seq_bits indicates true
+ * and curr_seqno is within range of last_seqno. Otherwise returns false.
*/
-static inline int batadv_test_bit(const unsigned long *seq_bits,
- u32 last_seqno, u32 curr_seqno)
+static inline bool batadv_test_bit(const unsigned long *seq_bits,
+ u32 last_seqno, u32 curr_seqno)
{
s32 diff;
diff = last_seqno - curr_seqno;
if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE)
- return 0;
+ return false;
return test_bit(diff, seq_bits) != 0;
}
@@ -55,7 +56,7 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
set_bit(n, seq_bits); /* turn the position on */
}
-int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
- int set_mark);
+bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
+ s32 seq_num_diff, int set_mark);
#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 0a6c8b824a00..748a9ead7ce5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -50,6 +50,7 @@
#include "hash.h"
#include "originator.h"
#include "packet.h"
+#include "sysfs.h"
#include "translation-table.h"
static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
@@ -100,10 +101,10 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
* @node: list node of the first entry to compare
* @data2: pointer to the second backbone gateway
*
- * Return: 1 if the backbones have the same data, 0 otherwise
+ * Return: true if the backbones have the same data, false otherwise
*/
-static int batadv_compare_backbone_gw(const struct hlist_node *node,
- const void *data2)
+static bool batadv_compare_backbone_gw(const struct hlist_node *node,
+ const void *data2)
{
const void *data1 = container_of(node, struct batadv_bla_backbone_gw,
hash_entry);
@@ -111,23 +112,23 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
const struct batadv_bla_backbone_gw *gw2 = data2;
if (!batadv_compare_eth(gw1->orig, gw2->orig))
- return 0;
+ return false;
if (gw1->vid != gw2->vid)
- return 0;
+ return false;
- return 1;
+ return true;
}
/**
- * batadv_compare_backbone_gw - compare address and vid of two claims
+ * batadv_compare_claim - compare address and vid of two claims
* @node: list node of the first entry to compare
* @data2: pointer to the second claims
*
- * Return: 1 if the claim have the same data, 0 otherwise
+ * Return: true if the claim have the same data, 0 otherwise
*/
-static int batadv_compare_claim(const struct hlist_node *node,
- const void *data2)
+static bool batadv_compare_claim(const struct hlist_node *node,
+ const void *data2)
{
const void *data1 = container_of(node, struct batadv_bla_claim,
hash_entry);
@@ -135,12 +136,12 @@ static int batadv_compare_claim(const struct hlist_node *node,
const struct batadv_bla_claim *cl2 = data2;
if (!batadv_compare_eth(cl1->addr, cl2->addr))
- return 0;
+ return false;
if (cl1->vid != cl2->vid)
- return 0;
+ return false;
- return 1;
+ return true;
}
/**
@@ -200,9 +201,9 @@ static void batadv_claim_put(struct batadv_bla_claim *claim)
*
* Return: claim if found or NULL otherwise.
*/
-static struct batadv_bla_claim
-*batadv_claim_hash_find(struct batadv_priv *bat_priv,
- struct batadv_bla_claim *data)
+static struct batadv_bla_claim *
+batadv_claim_hash_find(struct batadv_priv *bat_priv,
+ struct batadv_bla_claim *data)
{
struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
struct hlist_head *head;
@@ -407,6 +408,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ethhdr->h_source, ethhdr->h_dest,
BATADV_PRINT_VID(vid));
break;
+ case BATADV_CLAIM_TYPE_LOOPDETECT:
+ ether_addr_copy(ethhdr->h_source, mac);
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
+ ethhdr->h_source, ethhdr->h_dest,
+ BATADV_PRINT_VID(vid));
+
+ break;
}
if (vid & BATADV_VLAN_HAS_TAG)
@@ -427,6 +436,36 @@ out:
}
/**
+ * batadv_bla_loopdetect_report - worker for reporting the loop
+ * @work: work queue item
+ *
+ * Throws an uevent, as the loopdetect check function can't do that itself
+ * since the kernel may sleep while throwing uevents.
+ */
+static void batadv_bla_loopdetect_report(struct work_struct *work)
+{
+ struct batadv_bla_backbone_gw *backbone_gw;
+ struct batadv_priv *bat_priv;
+ char vid_str[6] = { '\0' };
+
+ backbone_gw = container_of(work, struct batadv_bla_backbone_gw,
+ report_work);
+ bat_priv = backbone_gw->bat_priv;
+
+ batadv_info(bat_priv->soft_iface,
+ "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n",
+ BATADV_PRINT_VID(backbone_gw->vid));
+ snprintf(vid_str, sizeof(vid_str), "%d",
+ BATADV_PRINT_VID(backbone_gw->vid));
+ vid_str[sizeof(vid_str) - 1] = 0;
+
+ batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT,
+ vid_str);
+
+ batadv_backbone_gw_put(backbone_gw);
+}
+
+/**
* batadv_bla_get_backbone_gw - finds or creates a backbone gateway
* @bat_priv: the bat priv with all the soft interface information
* @orig: the mac address of the originator
@@ -464,6 +503,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
atomic_set(&entry->request_sent, 0);
atomic_set(&entry->wait_periods, 0);
ether_addr_copy(entry->orig, orig);
+ INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
/* one for the hash, one for returning */
kref_init(&entry->refcount);
@@ -735,22 +775,22 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
* @backbone_addr: originator address of the sender (Ethernet source MAC)
* @vid: the VLAN ID of the frame
*
- * Return: 1 if handled
+ * Return: true if handled
*/
-static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
- u8 *backbone_addr, unsigned short vid)
+static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
+ u8 *backbone_addr, unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
u16 backbone_crc, crc;
if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
- return 0;
+ return false;
backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid,
false);
if (unlikely(!backbone_gw))
- return 1;
+ return true;
/* handle as ANNOUNCE frame */
backbone_gw->lasttime = jiffies;
@@ -783,7 +823,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
}
batadv_backbone_gw_put(backbone_gw);
- return 1;
+ return true;
}
/**
@@ -794,29 +834,29 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
* @ethhdr: ethernet header of a packet
* @vid: the VLAN ID of the frame
*
- * Return: 1 if handled
+ * Return: true if handled
*/
-static int batadv_handle_request(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *primary_if,
- u8 *backbone_addr, struct ethhdr *ethhdr,
- unsigned short vid)
+static bool batadv_handle_request(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *primary_if,
+ u8 *backbone_addr, struct ethhdr *ethhdr,
+ unsigned short vid)
{
/* check for REQUEST frame */
if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest))
- return 0;
+ return false;
/* sanity check, this should not happen on a normal switch,
* we ignore it in this case.
*/
if (!batadv_compare_eth(ethhdr->h_dest, primary_if->net_dev->dev_addr))
- return 1;
+ return true;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_request(): REQUEST vid %d (sent by %pM)...\n",
BATADV_PRINT_VID(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
- return 1;
+ return true;
}
/**
@@ -827,12 +867,12 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
* @claim_addr: Client to be unclaimed (ARP sender HW MAC)
* @vid: the VLAN ID of the frame
*
- * Return: 1 if handled
+ * Return: true if handled
*/
-static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *primary_if,
- u8 *backbone_addr, u8 *claim_addr,
- unsigned short vid)
+static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *primary_if,
+ u8 *backbone_addr, u8 *claim_addr,
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -845,7 +885,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid);
if (!backbone_gw)
- return 1;
+ return true;
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -854,7 +894,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
batadv_bla_del_claim(bat_priv, claim_addr, vid);
batadv_backbone_gw_put(backbone_gw);
- return 1;
+ return true;
}
/**
@@ -865,12 +905,12 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
* @claim_addr: client mac address to be claimed (ARP sender HW MAC)
* @vid: the VLAN ID of the frame
*
- * Return: 1 if handled
+ * Return: true if handled
*/
-static int batadv_handle_claim(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *primary_if,
- u8 *backbone_addr, u8 *claim_addr,
- unsigned short vid)
+static bool batadv_handle_claim(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *primary_if,
+ u8 *backbone_addr, u8 *claim_addr,
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -880,7 +920,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
false);
if (unlikely(!backbone_gw))
- return 1;
+ return true;
/* this must be a CLAIM frame */
batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw);
@@ -891,7 +931,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
/* TODO: we could call something like tt_local_del() here. */
batadv_backbone_gw_put(backbone_gw);
- return 1;
+ return true;
}
/**
@@ -975,12 +1015,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
* @primary_if: the primary hard interface of this batman soft interface
* @skb: the frame to be checked
*
- * Return: 1 if it was a claim frame, otherwise return 0 to
+ * Return: true if it was a claim frame, otherwise return false to
* tell the callee that it can use the frame on its own.
*/
-static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *primary_if,
- struct sk_buff *skb)
+static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *primary_if,
+ struct sk_buff *skb)
{
struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
u8 *hw_src, *hw_dst;
@@ -1011,7 +1051,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
&vhdr_buf);
if (!vhdr)
- return 0;
+ return false;
proto = vhdr->h_vlan_encapsulated_proto;
headlen += VLAN_HLEN;
@@ -1020,12 +1060,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
}
if (proto != htons(ETH_P_ARP))
- return 0; /* not a claim frame */
+ return false; /* not a claim frame */
/* this must be a ARP frame. check if it is a claim. */
if (unlikely(!pskb_may_pull(skb, headlen + arp_hdr_len(skb->dev))))
- return 0;
+ return false;
/* pskb_may_pull() may have modified the pointers, get ethhdr again */
ethhdr = eth_hdr(skb);
@@ -1035,13 +1075,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
* IP information
*/
if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
- return 0;
+ return false;
if (arphdr->ar_pro != htons(ETH_P_IP))
- return 0;
+ return false;
if (arphdr->ar_hln != ETH_ALEN)
- return 0;
+ return false;
if (arphdr->ar_pln != 4)
- return 0;
+ return false;
hw_src = (u8 *)arphdr + sizeof(struct arphdr);
hw_dst = hw_src + ETH_ALEN + 4;
@@ -1051,14 +1091,18 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
/* check if it is a claim frame in general */
if (memcmp(bla_dst->magic, bla_dst_own->magic,
sizeof(bla_dst->magic)) != 0)
- return 0;
+ return false;
/* check if there is a claim frame encapsulated deeper in (QinQ) and
* drop that, as this is not supported by BLA but should also not be
* sent via the mesh.
*/
if (vlan_depth > 1)
- return 1;
+ return true;
+
+ /* Let the loopdetect frames on the mesh in any case. */
+ if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
+ return 0;
/* check if it is a claim frame. */
ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1070,7 +1114,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
hw_dst);
if (ret < 2)
- return ret;
+ return !!ret;
/* become a backbone gw ourselves on this vlan if not happened yet */
batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
@@ -1080,30 +1124,30 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
case BATADV_CLAIM_TYPE_CLAIM:
if (batadv_handle_claim(bat_priv, primary_if, hw_src,
ethhdr->h_source, vid))
- return 1;
+ return true;
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
if (batadv_handle_unclaim(bat_priv, primary_if,
ethhdr->h_source, hw_src, vid))
- return 1;
+ return true;
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
if (batadv_handle_announce(bat_priv, hw_src, ethhdr->h_source,
vid))
- return 1;
+ return true;
break;
case BATADV_CLAIM_TYPE_REQUEST:
if (batadv_handle_request(bat_priv, primary_if, hw_src, ethhdr,
vid))
- return 1;
+ return true;
break;
}
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
- return 1;
+ return true;
}
/**
@@ -1265,6 +1309,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
}
/**
+ * batadv_bla_send_loopdetect - send a loopdetect frame
+ * @bat_priv: the bat priv with all the soft interface information
+ * @backbone_gw: the backbone gateway for which a loop should be detected
+ *
+ * To detect loops that the bridge loop avoidance can't handle, send a loop
+ * detection packet on the backbone. Unlike other BLA frames, this frame will
+ * be allowed on the mesh by other nodes. If it is received on the mesh, this
+ * indicates that there is a loop.
+ */
+static void
+batadv_bla_send_loopdetect(struct batadv_priv *bat_priv,
+ struct batadv_bla_backbone_gw *backbone_gw)
+{
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "Send loopdetect frame for vid %d\n",
+ backbone_gw->vid);
+ batadv_bla_send_claim(bat_priv, bat_priv->bla.loopdetect_addr,
+ backbone_gw->vid, BATADV_CLAIM_TYPE_LOOPDETECT);
+}
+
+/**
* batadv_bla_status_update - purge bla interfaces if necessary
* @net_dev: the soft interface net device
*/
@@ -1301,9 +1365,10 @@ static void batadv_bla_periodic_work(struct work_struct *work)
struct batadv_bla_backbone_gw *backbone_gw;
struct batadv_hashtable *hash;
struct batadv_hard_iface *primary_if;
+ bool send_loopdetect = false;
int i;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
priv_bla = container_of(delayed_work, struct batadv_priv_bla, work);
bat_priv = container_of(priv_bla, struct batadv_priv, bla);
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -1316,6 +1381,22 @@ static void batadv_bla_periodic_work(struct work_struct *work)
if (!atomic_read(&bat_priv->bridge_loop_avoidance))
goto out;
+ if (atomic_dec_and_test(&bat_priv->bla.loopdetect_next)) {
+ /* set a new random mac address for the next bridge loop
+ * detection frames. Set the locally administered bit to avoid
+ * collisions with users mac addresses.
+ */
+ random_ether_addr(bat_priv->bla.loopdetect_addr);
+ bat_priv->bla.loopdetect_addr[0] = 0xba;
+ bat_priv->bla.loopdetect_addr[1] = 0xbe;
+ bat_priv->bla.loopdetect_lasttime = jiffies;
+ atomic_set(&bat_priv->bla.loopdetect_next,
+ BATADV_BLA_LOOPDETECT_PERIODS);
+
+ /* mark for sending loop detect on all VLANs */
+ send_loopdetect = true;
+ }
+
hash = bat_priv->bla.backbone_hash;
if (!hash)
goto out;
@@ -1332,6 +1413,9 @@ static void batadv_bla_periodic_work(struct work_struct *work)
backbone_gw->lasttime = jiffies;
batadv_bla_send_announce(bat_priv, backbone_gw);
+ if (send_loopdetect)
+ batadv_bla_send_loopdetect(bat_priv,
+ backbone_gw);
/* request_sent is only set after creation to avoid
* problems when we are not yet known as backbone gw
@@ -1405,6 +1489,9 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
bat_priv->bla.bcast_duplist[i].entrytime = entrytime;
bat_priv->bla.bcast_duplist_curr = 0;
+ atomic_set(&bat_priv->bla.loopdetect_next,
+ BATADV_BLA_LOOPDETECT_PERIODS);
+
if (bat_priv->bla.claim_hash)
return 0;
@@ -1442,15 +1529,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
* sent by another host, drop it. We allow equal packets from
* the same host however as this might be intended.
*
- * Return: 1 if a packet is in the duplicate list, 0 otherwise.
+ * Return: true if a packet is in the duplicate list, false otherwise.
*/
-int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct sk_buff *skb)
+bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
{
- int i, curr, ret = 0;
+ int i, curr;
__be32 crc;
struct batadv_bcast_packet *bcast_packet;
struct batadv_bcast_duplist_entry *entry;
+ bool ret = false;
bcast_packet = (struct batadv_bcast_packet *)skb->data;
@@ -1478,9 +1566,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
continue;
/* this entry seems to match: same crc, not too old,
- * and from another gw. therefore return 1 to forbid it.
+ * and from another gw. therefore return true to forbid it.
*/
- ret = 1;
+ ret = true;
goto out;
}
/* not found, add a new entry (overwrite the oldest entry)
@@ -1546,21 +1634,21 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
* @orig_node: the orig_node of the frame
* @hdr_size: maximum length of the frame
*
- * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise
- * it returns 0.
+ * Return: true if the orig_node is also a gateway on the soft interface,
+ * otherwise it returns false.
*/
-int batadv_bla_is_backbone_gw(struct sk_buff *skb,
- struct batadv_orig_node *orig_node, int hdr_size)
+bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node, int hdr_size)
{
struct batadv_bla_backbone_gw *backbone_gw;
unsigned short vid;
if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
- return 0;
+ return false;
/* first, find out the vid. */
if (!pskb_may_pull(skb, hdr_size + ETH_HLEN))
- return 0;
+ return false;
vid = batadv_get_vid(skb, hdr_size);
@@ -1568,14 +1656,14 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv,
orig_node->orig, vid);
if (!backbone_gw)
- return 0;
+ return false;
batadv_backbone_gw_put(backbone_gw);
- return 1;
+ return true;
}
/**
- * batadv_bla_init - free all bla structures
+ * batadv_bla_free - free all bla structures
* @bat_priv: the bat priv with all the soft interface information
*
* for softinterface free or module unload
@@ -1602,6 +1690,55 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
}
/**
+ * batadv_bla_loopdetect_check - check and handle a detected loop
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the packet to check
+ * @primary_if: interface where the request came on
+ * @vid: the VLAN ID of the frame
+ *
+ * Checks if this packet is a loop detect frame which has been sent by us,
+ * throw an uevent and log the event if that is the case.
+ *
+ * Return: true if it is a loop detect frame which is to be dropped, false
+ * otherwise.
+ */
+static bool
+batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ struct batadv_hard_iface *primary_if,
+ unsigned short vid)
+{
+ struct batadv_bla_backbone_gw *backbone_gw;
+ struct ethhdr *ethhdr;
+
+ ethhdr = eth_hdr(skb);
+
+ /* Only check for the MAC address and skip more checks here for
+ * performance reasons - this function is on the hotpath, after all.
+ */
+ if (!batadv_compare_eth(ethhdr->h_source,
+ bat_priv->bla.loopdetect_addr))
+ return false;
+
+ /* If the packet came too late, don't forward it on the mesh
+ * but don't consider that as loop. It might be a coincidence.
+ */
+ if (batadv_has_timed_out(bat_priv->bla.loopdetect_lasttime,
+ BATADV_BLA_LOOPDETECT_TIMEOUT))
+ return true;
+
+ backbone_gw = batadv_bla_get_backbone_gw(bat_priv,
+ primary_if->net_dev->dev_addr,
+ vid, true);
+ if (unlikely(!backbone_gw))
+ return true;
+
+ queue_work(batadv_event_workqueue, &backbone_gw->report_work);
+ /* backbone_gw is unreferenced in the report work function function */
+
+ return true;
+}
+
+/**
* batadv_bla_rx - check packets coming from the mesh.
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
@@ -1614,16 +1751,16 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
*
* in these cases, the skb is further handled by this function
*
- * Return: 1 if handled, otherwise it returns 0 and the caller shall further
- * process the skb.
+ * Return: true if handled, otherwise it returns false and the caller shall
+ * further process the skb.
*/
-int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast)
+bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, bool is_bcast)
{
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
struct batadv_hard_iface *primary_if;
- int ret;
+ bool ret;
ethhdr = eth_hdr(skb);
@@ -1634,6 +1771,9 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (!atomic_read(&bat_priv->bridge_loop_avoidance))
goto allow;
+ if (batadv_bla_loopdetect_check(bat_priv, skb, primary_if, vid))
+ goto handled;
+
if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
/* don't allow broadcasts while requests are in flight */
if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
@@ -1682,12 +1822,12 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
allow:
batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
- ret = 0;
+ ret = false;
goto out;
handled:
kfree_skb(skb);
- ret = 1;
+ ret = true;
out:
if (primary_if)
@@ -1711,16 +1851,16 @@ out:
*
* This call might reallocate skb data.
*
- * Return: 1 if handled, otherwise it returns 0 and the caller shall further
- * process the skb.
+ * Return: true if handled, otherwise it returns false and the caller shall
+ * further process the skb.
*/
-int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid)
+bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid)
{
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
struct batadv_hard_iface *primary_if;
- int ret = 0;
+ bool ret = false;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
@@ -1774,10 +1914,10 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
allow:
batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
- ret = 0;
+ ret = false;
goto out;
handled:
- ret = 1;
+ ret = true;
out:
if (primary_if)
batadv_hardif_put(primary_if);
@@ -1815,8 +1955,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
"Claims announced for the mesh %s (orig %pM, group id %#.4x)\n",
net_dev->name, primary_addr,
ntohs(bat_priv->bla.claim_dest.group));
- seq_printf(seq, " %-17s %-5s %-17s [o] (%-6s)\n",
- "Client", "VID", "Originator", "CRC");
+ seq_puts(seq,
+ " Client VID Originator [o] (CRC )\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1873,8 +2013,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
"Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n",
net_dev->name, primary_addr,
ntohs(bat_priv->bla.claim_dest.group));
- seq_printf(seq, " %-17s %-5s %-9s (%-6s)\n",
- "Originator", "VID", "last seen", "CRC");
+ seq_puts(seq, " Originator VID last seen (CRC )\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 579f0fa6fe6a..0f01daeb359e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -27,19 +27,20 @@ struct seq_file;
struct sk_buff;
#ifdef CONFIG_BATMAN_ADV_BLA
-int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast);
-int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid);
-int batadv_bla_is_backbone_gw(struct sk_buff *skb,
- struct batadv_orig_node *orig_node, int hdr_size);
+bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, bool is_bcast);
+bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid);
+bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ int hdr_size);
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
void *offset);
bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid);
-int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct sk_buff *skb);
+bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct batadv_hard_iface *oldif);
@@ -50,24 +51,24 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
#define BATADV_BLA_CRC_INIT 0
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
-static inline int batadv_bla_rx(struct batadv_priv *bat_priv,
- struct sk_buff *skb, unsigned short vid,
- bool is_bcast)
+static inline bool batadv_bla_rx(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ bool is_bcast)
{
- return 0;
+ return false;
}
-static inline int batadv_bla_tx(struct batadv_priv *bat_priv,
- struct sk_buff *skb, unsigned short vid)
+static inline bool batadv_bla_tx(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid)
{
- return 0;
+ return false;
}
-static inline int batadv_bla_is_backbone_gw(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- int hdr_size)
+static inline bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ int hdr_size)
{
- return 0;
+ return false;
}
static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq,
@@ -88,11 +89,11 @@ static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
return false;
}
-static inline int
+static inline bool
batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
- return 0;
+ return false;
}
static inline void
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 48253cf8341b..952900466d88 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -134,7 +134,7 @@ static int batadv_log_release(struct inode *inode, struct file *file)
return 0;
}
-static int batadv_log_empty(struct batadv_priv_debug_log *debug_log)
+static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
{
return !(debug_log->log_start - debug_log->log_end);
}
@@ -365,14 +365,17 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
#define BATADV_DEBUGINFO(_name, _mode, _open) \
struct batadv_debuginfo batadv_debuginfo_##_name = { \
- .attr = { .name = __stringify(_name), \
- .mode = _mode, }, \
- .fops = { .owner = THIS_MODULE, \
- .open = _open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
- } \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = _mode, \
+ }, \
+ .fops = { \
+ .owner = THIS_MODULE, \
+ .open = _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+ }, \
}
/* the following attributes are general and therefore they will be directly
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index e96d7c745b4a..278800a99c69 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -152,7 +152,7 @@ static void batadv_dat_purge(struct work_struct *work)
struct batadv_priv_dat *priv_dat;
struct batadv_priv *bat_priv;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
priv_dat = container_of(delayed_work, struct batadv_priv_dat, work);
bat_priv = container_of(priv_dat, struct batadv_priv, dat);
@@ -165,14 +165,14 @@ static void batadv_dat_purge(struct work_struct *work)
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Return: 1 if the two entries are the same, 0 otherwise.
+ * Return: true if the two entries are the same, false otherwise.
*/
-static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
+static bool batadv_compare_dat(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_dat_entry,
hash_entry);
- return memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0;
+ return memcmp(data1, data2, sizeof(__be32)) == 0;
}
/**
@@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* be sent to
* @bat_priv: the bat priv with all the soft interface information
* @ip_dst: ipv4 to look up in the DHT
+ * @vid: VLAN identifier
*
* An originator O is selected if and only if its DHT_ID value is one of three
* closest values (from the LEFT, with wrap around if needed) then the hash
@@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
*/
static struct batadv_dat_candidate *
-batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
+ unsigned short vid)
{
int select;
batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
return NULL;
dat.ip = ip_dst;
- dat.vid = 0;
+ dat.vid = vid;
ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
BATADV_DAT_ADDR_MAX);
@@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @ip: the DHT key
+ * @vid: VLAN identifier
* @packet_subtype: unicast4addr packet subtype to use
*
* This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
*/
static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *skb, __be32 ip,
- int packet_subtype)
+ unsigned short vid, int packet_subtype)
{
int i;
bool ret = false;
@@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *tmp_skb;
struct batadv_dat_candidate *cand;
- cand = batadv_dat_select_candidates(bat_priv, ip);
+ cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
goto out;
@@ -717,7 +720,7 @@ void batadv_dat_status_update(struct net_device *net_dev)
}
/**
- * batadv_gw_tvlv_ogm_handler_v1 - process incoming dat tvlv container
+ * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -814,8 +817,8 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name);
- seq_printf(seq, " %-7s %-9s %4s %11s\n", "IPv4",
- "MAC", "VID", "last-seen");
+ seq_puts(seq,
+ " IPv4 MAC VID last-seen\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
ret = true;
} else {
/* Send the request to the DHT */
- ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
BATADV_P_DAT_DHT_GET);
}
out:
@@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
/* Send the ARP reply to the candidates for both the IP addresses that
* the node obtained from the ARP reply
*/
- batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
- batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
}
/**
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index e6956d0746a2..65536db1bff7 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -407,8 +407,8 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
unsigned int mtu)
{
struct sk_buff *skb_fragment;
- unsigned header_size = sizeof(*frag_head);
- unsigned fragment_size = mtu - header_size;
+ unsigned int header_size = sizeof(*frag_head);
+ unsigned int fragment_size = mtu - header_size;
skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
if (!skb_fragment)
@@ -444,15 +444,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
- unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
- unsigned header_size = sizeof(frag_header);
- unsigned max_fragment_size, max_packet_size;
+ unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
+ unsigned int header_size = sizeof(frag_header);
+ unsigned int max_fragment_size, max_packet_size;
bool ret = false;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
*/
- mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+ mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
max_fragment_size = mtu - header_size;
max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index c59aff5ccac8..5839c569f769 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -135,8 +135,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->gw.list_lock);
- if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
- new_gw_node = NULL;
+ if (new_gw_node)
+ kref_get(&new_gw_node->refcount);
curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
@@ -440,15 +440,11 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
if (gateway->bandwidth_down == 0)
return;
- if (!kref_get_unless_zero(&orig_node->refcount))
- return;
-
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
- if (!gw_node) {
- batadv_orig_node_put(orig_node);
+ if (!gw_node)
return;
- }
+ kref_get(&orig_node->refcount);
INIT_HLIST_NODE(&gw_node->list);
gw_node->orig_node = orig_node;
gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b22b2775a0a5..8c2f39962fa5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -36,7 +36,6 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
-#include <net/net_namespace.h>
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
@@ -121,6 +120,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
{
struct net_device *parent_dev;
+ struct net *net = dev_net(net_dev);
bool ret;
/* check if this is a batman-adv mesh interface */
@@ -133,7 +133,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return false;
/* recurse over the parent device */
- parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev));
+ parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
/* if we got a NULL parent_dev there is something broken.. */
if (WARN(!parent_dev, "Cannot find parent device"))
return false;
@@ -146,22 +146,22 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return ret;
}
-static int batadv_is_valid_iface(const struct net_device *net_dev)
+static bool batadv_is_valid_iface(const struct net_device *net_dev)
{
if (net_dev->flags & IFF_LOOPBACK)
- return 0;
+ return false;
if (net_dev->type != ARPHRD_ETHER)
- return 0;
+ return false;
if (net_dev->addr_len != ETH_ALEN)
- return 0;
+ return false;
/* no batman over batman */
if (batadv_is_on_batman_iface(net_dev))
- return 0;
+ return false;
- return 1;
+ return true;
}
/**
@@ -236,8 +236,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
ASSERT_RTNL();
- if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
- new_hard_iface = NULL;
+ if (new_hard_iface)
+ kref_get(&new_hard_iface->refcount);
curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
rcu_assign_pointer(bat_priv->primary_if, new_hard_iface);
@@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
+ if (bat_priv->bat_algo_ops->bat_iface_activate)
+ bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+
out:
if (primary_if)
batadv_hardif_put(primary_if);
@@ -453,7 +456,7 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave,
}
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
- const char *iface_name)
+ struct net *net, const char *iface_name)
{
struct batadv_priv *bat_priv;
struct net_device *soft_iface, *master;
@@ -464,13 +467,12 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
goto out;
- if (!kref_get_unless_zero(&hard_iface->refcount))
- goto out;
+ kref_get(&hard_iface->refcount);
- soft_iface = dev_get_by_name(&init_net, iface_name);
+ soft_iface = dev_get_by_name(net, iface_name);
if (!soft_iface) {
- soft_iface = batadv_softif_create(iface_name);
+ soft_iface = batadv_softif_create(net, iface_name);
if (!soft_iface) {
ret = -ENOMEM;
@@ -519,6 +521,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
goto err_upper;
}
+ kref_get(&hard_iface->refcount);
hard_iface->batman_adv_ptype.type = ethertype;
hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv;
hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
@@ -572,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hard_iface *primary_if = NULL;
- if (hard_iface->if_status == BATADV_IF_ACTIVE)
- batadv_hardif_deactivate_interface(hard_iface);
+ batadv_hardif_deactivate_interface(hard_iface);
if (hard_iface->if_status != BATADV_IF_INACTIVE)
goto out;
@@ -581,6 +583,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_info(hard_iface->soft_iface, "Removing interface: %s\n",
hard_iface->net_dev->name);
dev_remove_pack(&hard_iface->batman_adv_ptype);
+ batadv_hardif_put(hard_iface);
bat_priv->num_ifaces--;
batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
@@ -650,8 +653,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
ASSERT_RTNL();
- ret = batadv_is_valid_iface(net_dev);
- if (ret != 1)
+ if (!batadv_is_valid_iface(net_dev))
goto out;
dev_hold(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index d74f1983f33e..a76724d369bf 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
struct net_device;
+struct net;
enum batadv_hard_if_state {
BATADV_IF_NOT_IN_USE,
@@ -55,7 +56,7 @@ bool batadv_is_wifi_iface(int ifindex);
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
- const char *iface_name);
+ struct net *net, const char *iface_name);
void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
enum batadv_hard_if_cleanup autodel);
void batadv_hardif_remove_interfaces(void);
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 9bb57b87447c..cbbf87075f06 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -32,10 +32,10 @@ struct lock_class_key;
/* callback to a compare function. should compare 2 element datas for their
* keys
*
- * Return: 0 if same and not 0 if not same
+ * Return: true if same and false if not same
*/
-typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *,
- const void *);
+typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *,
+ const void *);
/* the hashfunction
*
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 14d0013b387e..777aea10cd8f 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -104,25 +104,21 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
static int batadv_socket_release(struct inode *inode, struct file *file)
{
- struct batadv_socket_client *socket_client = file->private_data;
- struct batadv_socket_packet *socket_packet;
- struct list_head *list_pos, *list_pos_tmp;
+ struct batadv_socket_client *client = file->private_data;
+ struct batadv_socket_packet *packet, *tmp;
- spin_lock_bh(&socket_client->lock);
+ spin_lock_bh(&client->lock);
/* for all packets in the queue ... */
- list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) {
- socket_packet = list_entry(list_pos,
- struct batadv_socket_packet, list);
-
- list_del(list_pos);
- kfree(socket_packet);
+ list_for_each_entry_safe(packet, tmp, &client->queue_list, list) {
+ list_del(&packet->list);
+ kfree(packet);
}
- batadv_socket_client_hash[socket_client->index] = NULL;
- spin_unlock_bh(&socket_client->lock);
+ batadv_socket_client_hash[client->index] = NULL;
+ spin_unlock_bh(&client->lock);
- kfree(socket_client);
+ kfree(client);
module_put(THIS_MODULE);
return 0;
@@ -337,7 +333,7 @@ err:
}
/**
- * batadv_socket_receive_packet - schedule an icmp packet to be sent to
+ * batadv_socket_add_packet - schedule an icmp packet to be sent to
* userspace on an icmp socket.
* @socket_client: the socket this packet belongs to
* @icmph: pointer to the header of the icmp packet
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d64ddb961979..5f2974bd1227 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -401,11 +401,19 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
hard_iface = container_of(ptype, struct batadv_hard_iface,
batman_adv_ptype);
+
+ /* Prevent processing a packet received on an interface which is getting
+ * shut down otherwise the packet may trigger de-reference errors
+ * further down in the receive path.
+ */
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ goto err_out;
+
skb = skb_share_check(skb, GFP_ATOMIC);
/* skb was released by skb_share_check() */
if (!skb)
- goto err_out;
+ goto err_put;
/* packet should hold at least type and version */
if (unlikely(!pskb_may_pull(skb, 2)))
@@ -448,6 +456,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
if (ret == NET_RX_DROP)
kfree_skb(skb);
+ batadv_hardif_put(hard_iface);
+
/* return NET_RX_SUCCESS in any case as we
* most probably dropped the packet for
* routing-logical reasons.
@@ -456,6 +466,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
err_free:
kfree_skb(skb);
+err_put:
+ batadv_hardif_put(hard_iface);
err_out:
return NET_RX_DROP;
}
@@ -663,8 +675,8 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
*
* Return: tvlv handler if found or NULL otherwise.
*/
-static struct batadv_tvlv_handler
-*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+static struct batadv_tvlv_handler *
+batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
{
struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
@@ -722,8 +734,8 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
*
* Return: tvlv container if found or NULL otherwise.
*/
-static struct batadv_tvlv_container
-*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+static struct batadv_tvlv_container *
+batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
{
struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
@@ -736,9 +748,7 @@ static struct batadv_tvlv_container
if (tvlv_tmp->tvlv_hdr.version != version)
continue;
- if (!kref_get_unless_zero(&tvlv_tmp->refcount))
- continue;
-
+ kref_get(&tvlv_tmp->refcount);
tvlv = tvlv_tmp;
break;
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index db4533631834..76925266deed 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.1"
+#define BATADV_SOURCE_VERSION "2016.2"
#endif
/* B.A.T.M.A.N. parameters */
@@ -120,6 +120,8 @@
#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6)
#define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10)
#define BATADV_BLA_WAIT_PERIODS 3
+#define BATADV_BLA_LOOPDETECT_PERIODS 6
+#define BATADV_BLA_LOOPDETECT_TIMEOUT 3000 /* 3 seconds */
#define BATADV_DUPLIST_SIZE 16
#define BATADV_DUPLIST_TIMEOUT 500 /* 500 ms */
@@ -142,10 +144,12 @@ enum batadv_uev_action {
BATADV_UEV_ADD = 0,
BATADV_UEV_DEL,
BATADV_UEV_CHANGE,
+ BATADV_UEV_LOOPDETECT,
};
enum batadv_uev_type {
BATADV_UEV_GW = 0,
+ BATADV_UEV_BLA,
};
#define BATADV_GW_THRESHOLD 50
@@ -288,7 +292,7 @@ static inline void _batadv_dbg(int type __always_unused,
*
* note: can't use ether_addr_equal() as it requires aligned memory
*
- * Return: 1 if they are the same ethernet addr
+ * Return: true if they are the same ethernet addr
*/
static inline bool batadv_compare_eth(const void *data1, const void *data2)
{
@@ -296,7 +300,8 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
}
/**
- * has_timed_out - compares current time (jiffies) and timestamp + timeout
+ * batadv_has_timed_out - compares current time (jiffies) and timestamp +
+ * timeout
* @timestamp: base value to compare with (in jiffies)
* @timeout: added to base value before comparing (in milliseconds)
*
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 8caa2c72efa3..c32f24fafe67 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -394,7 +394,8 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_want_all_ip_count - count nodes with unspecific mcast interest
+ * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast
+ * interest
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: ethernet header of a packet
*
@@ -433,7 +434,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag
+ * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
@@ -460,7 +461,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag
+ * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
@@ -487,7 +488,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_want_forw_ip_node_get - get a node with an ipv4/ipv6 flag
+ * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: an ethernet header to determine the protocol family from
*
@@ -511,7 +512,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag
+ * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b41719b6487a..678f06865312 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -510,10 +510,10 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Return: 1 if the two entry are the same, 0 otherwise
+ * Return: true if the two entry are the same, false otherwise
*/
-static int batadv_nc_hash_compare(const struct hlist_node *node,
- const void *data2)
+static bool batadv_nc_hash_compare(const struct hlist_node *node,
+ const void *data2)
{
const struct batadv_nc_path *nc_path1, *nc_path2;
@@ -521,15 +521,13 @@ static int batadv_nc_hash_compare(const struct hlist_node *node,
nc_path2 = data2;
/* Return 1 if the two keys are identical */
- if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop,
- sizeof(nc_path1->prev_hop)) != 0)
- return 0;
+ if (!batadv_compare_eth(nc_path1->prev_hop, nc_path2->prev_hop))
+ return false;
- if (memcmp(nc_path1->next_hop, nc_path2->next_hop,
- sizeof(nc_path1->next_hop)) != 0)
- return 0;
+ if (!batadv_compare_eth(nc_path1->next_hop, nc_path2->next_hop))
+ return false;
- return 1;
+ return true;
}
/**
@@ -714,7 +712,7 @@ static void batadv_nc_worker(struct work_struct *work)
struct batadv_priv *bat_priv;
unsigned long timeout;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
bat_priv = container_of(priv_nc, struct batadv_priv, nc);
@@ -793,10 +791,10 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
*
* Return: the nc_node if found, NULL otherwise.
*/
-static struct batadv_nc_node
-*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh_node,
- bool in_coding)
+static struct batadv_nc_node *
+batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ bool in_coding)
{
struct batadv_nc_node *nc_node, *nc_node_out = NULL;
struct list_head *list;
@@ -835,11 +833,11 @@ static struct batadv_nc_node
*
* Return: the nc_node if found or created, NULL in case of an error.
*/
-static struct batadv_nc_node
-*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh_node,
- bool in_coding)
+static struct batadv_nc_node *
+batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ bool in_coding)
{
struct batadv_nc_node *nc_node;
spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
@@ -856,8 +854,7 @@ static struct batadv_nc_node
if (!nc_node)
return NULL;
- if (!kref_get_unless_zero(&orig_neigh_node->refcount))
- goto free;
+ kref_get(&orig_neigh_node->refcount);
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
@@ -884,10 +881,6 @@ static struct batadv_nc_node
spin_unlock_bh(lock);
return nc_node;
-
-free:
- kfree(nc_node);
- return NULL;
}
/**
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index e4cbb0753e37..1ff4ee473966 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -54,9 +54,9 @@ static void batadv_purge_orig(struct work_struct *work);
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Return: 1 if they are the same originator
+ * Return: true if they are the same originator
*/
-int batadv_compare_orig(const struct hlist_node *node, const void *data2)
+bool batadv_compare_orig(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_orig_node,
hash_entry);
@@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
- struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_neigh_ifinfo *neigh_ifinfo;
struct batadv_algo_ops *bao;
@@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref)
batadv_neigh_ifinfo_put(neigh_ifinfo);
}
- hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming,
- neigh_node->addr);
- if (hardif_neigh) {
- /* batadv_hardif_neigh_get() increases refcount too */
- batadv_hardif_neigh_put(hardif_neigh);
- batadv_hardif_neigh_put(hardif_neigh);
- }
+ batadv_hardif_neigh_put(neigh_node->hardif_neigh);
if (bao->bat_neigh_free)
bao->bat_neigh_free(neigh_node);
@@ -289,7 +282,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
}
/**
- * batadv_orig_node_get_router - router to the originator depending on iface
+ * batadv_orig_router_get - router to the originator depending on iface
* @orig_node: the orig node for the router
* @if_outgoing: the interface where the payload packet has been received or
* the OGM should be sent to
@@ -381,12 +374,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
if (!orig_ifinfo)
goto out;
- if (if_outgoing != BATADV_IF_DEFAULT &&
- !kref_get_unless_zero(&if_outgoing->refcount)) {
- kfree(orig_ifinfo);
- orig_ifinfo = NULL;
- goto out;
- }
+ if (if_outgoing != BATADV_IF_DEFAULT)
+ kref_get(&if_outgoing->refcount);
reset_time = jiffies - 1;
reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS);
@@ -462,11 +451,8 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
if (!neigh_ifinfo)
goto out;
- if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) {
- kfree(neigh_ifinfo);
- neigh_ifinfo = NULL;
- goto out;
- }
+ if (if_outgoing)
+ kref_get(&if_outgoing->refcount);
INIT_HLIST_NODE(&neigh_ifinfo->list);
kref_init(&neigh_ifinfo->refcount);
@@ -539,15 +525,11 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
if (hardif_neigh)
goto out;
- if (!kref_get_unless_zero(&hard_iface->refcount))
- goto out;
-
hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
- if (!hardif_neigh) {
- batadv_hardif_put(hard_iface);
+ if (!hardif_neigh)
goto out;
- }
+ kref_get(&hard_iface->refcount);
INIT_HLIST_NODE(&hardif_neigh->list);
ether_addr_copy(hardif_neigh->addr, neigh_addr);
hardif_neigh->if_incoming = hard_iface;
@@ -650,19 +632,19 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
if (!neigh_node)
goto out;
- if (!kref_get_unless_zero(&hard_iface->refcount)) {
- kfree(neigh_node);
- neigh_node = NULL;
- goto out;
- }
-
INIT_HLIST_NODE(&neigh_node->list);
INIT_HLIST_HEAD(&neigh_node->ifinfo_list);
spin_lock_init(&neigh_node->ifinfo_lock);
+ kref_get(&hard_iface->refcount);
ether_addr_copy(neigh_node->addr, neigh_addr);
neigh_node->if_incoming = hard_iface;
neigh_node->orig_node = orig_node;
+ neigh_node->last_seen = jiffies;
+
+ /* increment unique neighbor refcount */
+ kref_get(&hardif_neigh->refcount);
+ neigh_node->hardif_neigh = hardif_neigh;
/* extra reference for return */
kref_init(&neigh_node->refcount);
@@ -672,9 +654,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
spin_unlock_bh(&orig_node->neigh_list_lock);
- /* increment unique neighbor refcount */
- kref_get(&hardif_neigh->refcount);
-
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
"Creating new neighbor %pM for orig_node %pM on interface %s\n",
neigh_addr, orig_node->orig, hard_iface->net_dev->name);
@@ -1165,6 +1144,9 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
if (hard_iface->soft_iface != bat_priv->soft_iface)
continue;
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
best_neigh_node = batadv_find_best_neighbor(bat_priv,
orig_node,
hard_iface);
@@ -1172,6 +1154,8 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
best_neigh_node);
if (best_neigh_node)
batadv_neigh_node_put(best_neigh_node);
+
+ batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -1222,7 +1206,7 @@ static void batadv_purge_orig(struct work_struct *work)
struct delayed_work *delayed_work;
struct batadv_priv *bat_priv;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
_batadv_purge_orig(bat_priv);
queue_delayed_work(batadv_event_workqueue,
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 4e8b67f11051..64a8951e5844 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -33,7 +33,7 @@
struct seq_file;
-int batadv_compare_orig(const struct hlist_node *node, const void *data2);
+bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 8a8d7ca1a5cf..372128ddb474 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -175,6 +175,7 @@ enum batadv_bla_claimframe {
BATADV_CLAIM_TYPE_UNCLAIM = 0x01,
BATADV_CLAIM_TYPE_ANNOUNCE = 0x02,
BATADV_CLAIM_TYPE_REQUEST = 0x03,
+ BATADV_CLAIM_TYPE_LOOPDETECT = 0x04,
};
/**
@@ -501,7 +502,7 @@ struct batadv_coded_packet {
#pragma pack()
/**
- * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload
+ * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
* @ttl: time to live for this packet, part of the genereal header
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 4dd646a52f1a..ae850f2d11cb 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -100,11 +100,20 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
if (curr_router)
batadv_neigh_node_put(curr_router);
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
/* increase refcount of new best neighbor */
- if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount))
- neigh_node = NULL;
+ if (neigh_node)
+ kref_get(&neigh_node->refcount);
- spin_lock_bh(&orig_node->neigh_list_lock);
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_put(orig_ifinfo);
@@ -154,18 +163,18 @@ out:
* doesn't change otherwise.
*
* Return:
- * 0 if the packet is to be accepted.
- * 1 if the packet is to be ignored.
+ * false if the packet is to be accepted.
+ * true if the packet is to be ignored.
*/
-int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
- s32 seq_old_max_diff, unsigned long *last_reset,
- bool *protection_started)
+bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
+ s32 seq_old_max_diff, unsigned long *last_reset,
+ bool *protection_started)
{
if (seq_num_diff <= -seq_old_max_diff ||
seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
if (!batadv_has_timed_out(*last_reset,
BATADV_RESET_PROTECTION_MS))
- return 1;
+ return true;
*last_reset = jiffies;
if (protection_started)
@@ -174,7 +183,7 @@ int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
"old packet received, start protection\n");
}
- return 0;
+ return false;
}
bool batadv_check_management_packet(struct sk_buff *skb,
@@ -709,8 +718,9 @@ out:
return ret;
}
-static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int hdr_len) {
+static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_len)
+{
struct batadv_unicast_packet *unicast_packet;
struct batadv_hard_iface *primary_if;
struct batadv_orig_node *orig_node;
@@ -721,11 +731,11 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
/* check if there is enough data before accessing it */
if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
- return 0;
+ return false;
/* create a copy of the skb (in case of for re-routing) to modify it. */
if (skb_cow(skb, sizeof(*unicast_packet)) < 0)
- return 0;
+ return false;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
vid = batadv_get_vid(skb, hdr_len);
@@ -749,7 +759,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* table. If not, let the packet go untouched anyway because
* there is nothing the node can do
*/
- return 1;
+ return true;
}
/* retrieve the TTVN known by this node for the packet destination. This
@@ -765,7 +775,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* not be possible to deliver it
*/
if (!orig_node)
- return 0;
+ return false;
curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
batadv_orig_node_put(orig_node);
@@ -776,7 +786,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
*/
is_old_ttvn = batadv_seq_before(unicast_packet->ttvn, curr_ttvn);
if (!is_old_ttvn)
- return 1;
+ return true;
old_ttvn = unicast_packet->ttvn;
/* the packet was forged based on outdated network information. Its
@@ -789,7 +799,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
unicast_packet->dest, ethhdr->h_dest,
old_ttvn, curr_ttvn);
- return 1;
+ return true;
}
/* the packet has not been re-routed: either the destination is
@@ -797,14 +807,14 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* it is possible to drop the packet
*/
if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid))
- return 0;
+ return false;
/* update the header in order to let the packet be delivered to this
* node's soft interface
*/
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
- return 0;
+ return false;
ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
@@ -812,7 +822,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
unicast_packet->ttvn = curr_ttvn;
- return 1;
+ return true;
}
/**
@@ -903,7 +913,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
hdr_size))
goto rx_success;
- batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
+ batadv_interface_rx(recv_if->soft_iface, skb, hdr_size,
orig_node);
rx_success:
@@ -1113,8 +1123,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
goto rx_success;
/* broadcast for me */
- batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
- orig_node);
+ batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node);
rx_success:
ret = NET_RX_SUCCESS;
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 02a5caa84127..05c3ff42e181 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -51,8 +51,8 @@ struct batadv_neigh_node *
batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
-int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
- s32 seq_old_max_diff, unsigned long *last_reset,
- bool *protection_started);
+bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
+ s32 seq_old_max_diff, unsigned long *last_reset,
+ bool *protection_started);
#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3ce06e0a91b1..f2f125684ed9 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -26,6 +26,7 @@
#include <linux/if.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@ -552,7 +553,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
struct net_device *soft_iface;
struct batadv_priv *bat_priv;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
soft_iface = forw_packet->if_incoming->soft_iface;
@@ -577,10 +578,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (forw_packet->num_packets >= hard_iface->num_bcasts)
continue;
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
/* send a copy of the saved skb */
skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
if (skb1)
batadv_send_broadcast_skb(skb1, hard_iface);
+
+ batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -604,7 +610,7 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
struct batadv_forw_packet *forw_packet;
struct batadv_priv *bat_priv;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
@@ -675,6 +681,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->bcast_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
@@ -702,6 +711,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0710379491bf..343d2c904399 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -186,7 +186,6 @@ static int batadv_interface_tx(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_hard_iface *primary_if = NULL;
struct batadv_bcast_packet *bcast_packet;
- __be16 ethertype = htons(ETH_P_BATMAN);
static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
0x00, 0x00};
static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
@@ -208,7 +207,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
- soft_iface->trans_start = jiffies;
+ netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
@@ -216,7 +215,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
case ETH_P_8021Q:
vhdr = vlan_eth_hdr(skb);
- if (vhdr->h_vlan_encapsulated_proto != ethertype) {
+ /* drop batman-in-batman packets to prevent loops */
+ if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) {
network_offset += VLAN_HLEN;
break;
}
@@ -381,13 +381,29 @@ end:
return NETDEV_TX_OK;
}
+/**
+ * batadv_interface_rx - receive ethernet frame on local batman-adv interface
+ * @soft_iface: local interface which will receive the ethernet frame
+ * @skb: ethernet frame for @soft_iface
+ * @hdr_size: size of already parsed batman-adv header
+ * @orig_node: originator from which the batman-adv packet was sent
+ *
+ * Sends a ethernet frame to the receive path of the local @soft_iface.
+ * skb->data has still point to the batman-adv header with the size @hdr_size.
+ * The caller has to have parsed this header already and made sure that at least
+ * @hdr_size bytes are still available for pull in @skb.
+ *
+ * The packet may still get dropped. This can happen when the encapsulated
+ * ethernet frame is invalid or contains again an batman-adv packet. Also
+ * unicast packets will be dropped directly when it was sent between two
+ * isolated clients.
+ */
void batadv_interface_rx(struct net_device *soft_iface,
- struct sk_buff *skb, struct batadv_hard_iface *recv_if,
- int hdr_size, struct batadv_orig_node *orig_node)
+ struct sk_buff *skb, int hdr_size,
+ struct batadv_orig_node *orig_node)
{
struct batadv_bcast_packet *batadv_bcast_packet;
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
- __be16 ethertype = htons(ETH_P_BATMAN);
struct vlan_ethhdr *vhdr;
struct ethhdr *ethhdr;
unsigned short vid;
@@ -396,10 +412,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data;
is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST);
- /* check if enough space is available for pulling, and pull */
- if (!pskb_may_pull(skb, hdr_size))
- goto dropped;
-
skb_pull_rcsum(skb, hdr_size);
skb_reset_mac_header(skb);
@@ -408,14 +420,21 @@ void batadv_interface_rx(struct net_device *soft_iface,
*/
nf_reset(skb);
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto dropped;
+
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+ goto dropped;
+
vhdr = (struct vlan_ethhdr *)skb->data;
- if (vhdr->h_vlan_encapsulated_proto != ethertype)
+ /* drop batman-in-batman packets to prevent loops */
+ if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN))
break;
/* fall through */
@@ -424,8 +443,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
}
/* skb->dev & skb->pkt_type are set here */
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- goto dropped;
skb->protocol = eth_type_trans(skb, soft_iface);
/* should not be necessary anymore as we use skb_pull_rcsum()
@@ -539,7 +556,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_create_vlan - allocate the needed resources for a new vlan
+ * batadv_softif_create_vlan - allocate the needed resources for a new vlan
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier
*
@@ -868,13 +885,14 @@ static int batadv_softif_slave_add(struct net_device *dev,
struct net_device *slave_dev)
{
struct batadv_hard_iface *hard_iface;
+ struct net *net = dev_net(dev);
int ret = -EINVAL;
hard_iface = batadv_hardif_get_by_netdev(slave_dev);
if (!hard_iface || hard_iface->soft_iface)
goto out;
- ret = batadv_hardif_enable_interface(hard_iface, dev->name);
+ ret = batadv_hardif_enable_interface(hard_iface, net, dev->name);
out:
if (hard_iface)
@@ -955,7 +973,7 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->destructor = batadv_softif_free;
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
dev->priv_flags |= IFF_NO_QUEUE;
/* can't call min_mtu, because the needed variables
@@ -971,7 +989,7 @@ static void batadv_softif_init_early(struct net_device *dev)
memset(priv, 0, sizeof(*priv));
}
-struct net_device *batadv_softif_create(const char *name)
+struct net_device *batadv_softif_create(struct net *net, const char *name)
{
struct net_device *soft_iface;
int ret;
@@ -981,6 +999,8 @@ struct net_device *batadv_softif_create(const char *name)
if (!soft_iface)
return NULL;
+ dev_net_set(soft_iface, net);
+
soft_iface->rtnl_link_ops = &batadv_link_ops;
ret = register_netdevice(soft_iface);
@@ -1025,12 +1045,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
unregister_netdevice_queue(soft_iface, head);
}
-int batadv_softif_is_valid(const struct net_device *net_dev)
+bool batadv_softif_is_valid(const struct net_device *net_dev)
{
if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
- return 1;
+ return true;
- return 0;
+ return false;
}
struct rtnl_link_ops batadv_link_ops __read_mostly = {
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 9ae265703d23..ec303ddbf647 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -20,18 +20,20 @@
#include "main.h"
+#include <linux/types.h>
#include <net/rtnetlink.h>
struct net_device;
+struct net;
struct sk_buff;
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
void batadv_interface_rx(struct net_device *soft_iface,
- struct sk_buff *skb, struct batadv_hard_iface *recv_if,
- int hdr_size, struct batadv_orig_node *orig_node);
-struct net_device *batadv_softif_create(const char *name);
+ struct sk_buff *skb, int hdr_size,
+ struct batadv_orig_node *orig_node);
+struct net_device *batadv_softif_create(struct net *net, const char *name);
void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
-int batadv_softif_is_valid(const struct net_device *net_dev);
+bool batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e7cf51333a36..414b2074165f 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -116,11 +116,13 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
static char *batadv_uev_action_str[] = {
"add",
"del",
- "change"
+ "change",
+ "loopdetect",
};
static char *batadv_uev_type_str[] = {
- "gw"
+ "gw",
+ "bla",
};
/* Use this, if you have customized show and store functions for vlan attrs */
@@ -830,6 +832,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
size_t count)
{
struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+ struct net *net = dev_net(net_dev);
struct batadv_hard_iface *hard_iface;
int status_tmp = -1;
int ret = count;
@@ -873,7 +876,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
batadv_hardif_disable_interface(hard_iface,
BATADV_IF_CLEANUP_AUTO);
- ret = batadv_hardif_enable_interface(hard_iface, buff);
+ ret = batadv_hardif_enable_interface(hard_iface, net, buff);
unlock:
rtnl_unlock();
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0b43e86328a5..feaf492b01ca 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -43,7 +43,6 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
-#include <net/net_namespace.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
@@ -76,9 +75,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
*
* Compare the MAC address and the VLAN ID of the two TT entries and check if
* they are the same TT client.
- * Return: 1 if the two TT clients are the same, 0 otherwise
+ * Return: true if the two TT clients are the same, false otherwise
*/
-static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
+static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_tt_common_entry,
hash_entry);
@@ -215,6 +214,8 @@ static void batadv_tt_local_entry_release(struct kref *ref)
tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
common.refcount);
+ batadv_softif_vlan_put(tt_local_entry->vlan);
+
kfree_rcu(tt_local_entry, common.rcu);
}
@@ -583,6 +584,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
struct batadv_tt_global_entry *tt_global = NULL;
+ struct net *net = dev_net(soft_iface);
struct batadv_softif_vlan *vlan;
struct net_device *in_dev = NULL;
struct hlist_head *head;
@@ -594,7 +596,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
u32 match_mark;
if (ifindex != BATADV_NULL_IFINDEX)
- in_dev = dev_get_by_index(&init_net, ifindex);
+ in_dev = dev_get_by_index(net, ifindex);
tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
@@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
kref_get(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
+ tt_local->vlan = vlan;
/* the batman interface mac and multicast addresses should never be
* purged
@@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
struct batadv_hard_iface *primary_if;
- struct batadv_softif_vlan *vlan;
struct hlist_head *head;
unsigned short vid;
u32 i;
@@ -1008,8 +1010,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
net_dev->name, (u8)atomic_read(&bat_priv->tt.vn));
- seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID",
- "Flags", "Last seen", "CRC");
+ seq_puts(seq,
+ " Client VID Flags Last seen (CRC )\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
last_seen_msecs = last_seen_msecs % 1000;
no_purge = tt_common_entry->flags & np_flag;
-
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan) {
- seq_printf(seq, "Cannot retrieve VLAN %d\n",
- BATADV_PRINT_VID(vid));
- continue;
- }
-
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
@@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
no_purge ? 0 : last_seen_secs,
no_purge ? 0 : last_seen_msecs,
- vlan->tt.crc);
-
- batadv_softif_vlan_put(vlan);
+ tt_local->vlan->tt.crc);
}
rcu_read_unlock();
}
@@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
{
struct batadv_tt_local_entry *tt_local_entry;
u16 flags, curr_flags = BATADV_NO_FLAGS;
- struct batadv_softif_vlan *vlan;
void *tt_entry_exists;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
@@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
/* extra call to free the local tt entry */
batadv_tt_local_entry_put(tt_local_entry);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan)
- goto out;
-
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
-
out:
if (tt_local_entry)
batadv_tt_local_entry_put(tt_local_entry);
@@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
u32 i;
@@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv,
- tt_common_entry->vid);
- if (vlan) {
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
- }
-
batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
@@ -1706,9 +1680,8 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Globally announced TT entries received via the mesh %s\n",
net_dev->name);
- seq_printf(seq, " %-13s %s %s %-15s %s (%-10s) %s\n",
- "Client", "VID", "(TTVN)", "Originator", "(Curr TTVN)",
- "CRC", "Flags");
+ seq_puts(seq,
+ " Client VID (TTVN) Originator (Curr TTVN) (CRC ) Flags\n");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2388,19 +2361,19 @@ unlock:
* @entry_ptr: to be checked local tt entry
* @data_ptr: not used but definition required to satisfy the callback prototype
*
- * Return: 1 if the entry is a valid, 0 otherwise.
+ * Return: true if the entry is a valid, false otherwise.
*/
-static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
+static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW)
- return 0;
- return 1;
+ return false;
+ return true;
}
-static int batadv_tt_global_valid(const void *entry_ptr,
- const void *data_ptr)
+static bool batadv_tt_global_valid(const void *entry_ptr,
+ const void *data_ptr)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
const struct batadv_tt_global_entry *tt_global_entry;
@@ -2408,7 +2381,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,
if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ||
tt_common_entry->flags & BATADV_TT_CLIENT_TEMP)
- return 0;
+ return false;
tt_global_entry = container_of(tt_common_entry,
struct batadv_tt_global_entry,
@@ -2430,7 +2403,8 @@ static int batadv_tt_global_valid(const void *entry_ptr,
static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash,
void *tvlv_buff, u16 tt_len,
- int (*valid_cb)(const void *, const void *),
+ bool (*valid_cb)(const void *,
+ const void *),
void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
@@ -2579,11 +2553,11 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
*
* Return: true if the TT Request was sent, false otherwise
*/
-static int batadv_send_tt_request(struct batadv_priv *bat_priv,
- struct batadv_orig_node *dst_orig_node,
- u8 ttvn,
- struct batadv_tvlv_tt_vlan_data *tt_vlan,
- u16 num_vlan, bool full_table)
+static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *dst_orig_node,
+ u8 ttvn,
+ struct batadv_tvlv_tt_vlan_data *tt_vlan,
+ u16 num_vlan, bool full_table)
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tt_req_node *tt_req_node = NULL;
@@ -3227,7 +3201,7 @@ static void batadv_tt_purge(struct work_struct *work)
struct batadv_priv_tt *priv_tt;
struct batadv_priv *bat_priv;
- delayed_work = container_of(work, struct delayed_work, work);
+ delayed_work = to_delayed_work(work);
priv_tt = container_of(delayed_work, struct batadv_priv_tt, work);
bat_priv = container_of(priv_tt, struct batadv_priv, tt);
@@ -3309,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3339,13 +3312,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
- if (vlan) {
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
- }
-
batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 9abfb3e73c34..6a577f4f8ba7 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node {
* @ifinfo_lock: lock protecting private ifinfo members and list
* @if_incoming: pointer to incoming hard-interface
* @last_seen: when last packet via this neighbor was received
+ * @hardif_neigh: hardif_neigh of this neighbor
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
@@ -444,6 +445,7 @@ struct batadv_neigh_node {
spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
struct batadv_hard_iface *if_incoming;
unsigned long last_seen;
+ struct batadv_hardif_neigh_node *hardif_neigh;
struct kref refcount;
struct rcu_head rcu;
};
@@ -655,6 +657,9 @@ struct batadv_priv_tt {
* @num_requests: number of bla requests in flight
* @claim_hash: hash table containing mesh nodes this host has claimed
* @backbone_hash: hash table containing all detected backbone gateways
+ * @loopdetect_addr: MAC address used for own loopdetection frames
+ * @loopdetect_lasttime: time when the loopdetection frames were sent
+ * @loopdetect_next: how many periods to wait for the next loopdetect process
* @bcast_duplist: recently received broadcast packets array (for broadcast
* duplicate suppression)
* @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
@@ -666,6 +671,9 @@ struct batadv_priv_bla {
atomic_t num_requests;
struct batadv_hashtable *claim_hash;
struct batadv_hashtable *backbone_hash;
+ u8 loopdetect_addr[ETH_ALEN];
+ unsigned long loopdetect_lasttime;
+ atomic_t loopdetect_next;
struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
int bcast_duplist_curr;
/* protects bcast_duplist & bcast_duplist_curr */
@@ -1010,6 +1018,7 @@ struct batadv_socket_packet {
* resolved
* @crc: crc16 checksum over all claims
* @crc_lock: lock protecting crc
+ * @report_work: work struct for reporting detected loops
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
@@ -1023,6 +1032,7 @@ struct batadv_bla_backbone_gw {
atomic_t request_sent;
u16 crc;
spinlock_t crc_lock; /* protects crc */
+ struct work_struct report_work;
struct kref refcount;
struct rcu_head rcu;
};
@@ -1073,10 +1083,12 @@ struct batadv_tt_common_entry {
* struct batadv_tt_local_entry - translation table local entry data
* @common: general translation table data
* @last_seen: timestamp used for purging stale tt local entries
+ * @vlan: soft-interface vlan of the entry
*/
struct batadv_tt_local_entry {
struct batadv_tt_common_entry common;
unsigned long last_seen;
+ struct batadv_softif_vlan *vlan;
};
/**
@@ -1250,6 +1262,8 @@ struct batadv_forw_packet {
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
+ * @bat_iface_activate: start routing mechanisms when hard-interface is brought
+ * up
* @bat_iface_enable: init routing info when hard-interface is enabled
* @bat_iface_disable: de-init routing info when hard-interface is disabled
* @bat_iface_update_mac: (re-)init mac addresses of the protocol information
@@ -1277,6 +1291,7 @@ struct batadv_forw_packet {
struct batadv_algo_ops {
struct hlist_node list;
char *name;
+ void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 8a4cc2f7f0db..780089d75915 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -68,7 +68,7 @@ struct lowpan_peer {
struct in6_addr peer_addr;
};
-struct lowpan_dev {
+struct lowpan_btle_dev {
struct list_head list;
struct hci_dev *hdev;
@@ -80,18 +80,21 @@ struct lowpan_dev {
struct delayed_work notify_peers;
};
-static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev)
+static inline struct lowpan_btle_dev *
+lowpan_btle_dev(const struct net_device *netdev)
{
- return (struct lowpan_dev *)lowpan_priv(netdev)->priv;
+ return (struct lowpan_btle_dev *)lowpan_dev(netdev)->priv;
}
-static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer)
+static inline void peer_add(struct lowpan_btle_dev *dev,
+ struct lowpan_peer *peer)
{
list_add_rcu(&peer->list, &dev->peers);
atomic_inc(&dev->peer_count);
}
-static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer)
+static inline bool peer_del(struct lowpan_btle_dev *dev,
+ struct lowpan_peer *peer)
{
list_del_rcu(&peer->list);
kfree_rcu(peer, rcu);
@@ -106,7 +109,7 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer)
return false;
}
-static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev,
+static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev,
bdaddr_t *ba, __u8 type)
{
struct lowpan_peer *peer;
@@ -134,8 +137,8 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev,
return NULL;
}
-static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev,
- struct l2cap_chan *chan)
+static inline struct lowpan_peer *
+__peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan)
{
struct lowpan_peer *peer;
@@ -147,8 +150,8 @@ static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev,
return NULL;
}
-static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev,
- struct l2cap_conn *conn)
+static inline struct lowpan_peer *
+__peer_lookup_conn(struct lowpan_btle_dev *dev, struct l2cap_conn *conn)
{
struct lowpan_peer *peer;
@@ -160,7 +163,7 @@ static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev,
return NULL;
}
-static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
+static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
struct in6_addr *daddr,
struct sk_buff *skb)
{
@@ -220,7 +223,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn)
{
- struct lowpan_dev *entry;
+ struct lowpan_btle_dev *entry;
struct lowpan_peer *peer = NULL;
rcu_read_lock();
@@ -236,10 +239,10 @@ static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn)
return peer;
}
-static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn)
+static struct lowpan_btle_dev *lookup_dev(struct l2cap_conn *conn)
{
- struct lowpan_dev *entry;
- struct lowpan_dev *dev = NULL;
+ struct lowpan_btle_dev *entry;
+ struct lowpan_btle_dev *dev = NULL;
rcu_read_lock();
@@ -270,10 +273,10 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
struct l2cap_chan *chan)
{
const u8 *saddr, *daddr;
- struct lowpan_dev *dev;
+ struct lowpan_btle_dev *dev;
struct lowpan_peer *peer;
- dev = lowpan_dev(netdev);
+ dev = lowpan_btle_dev(netdev);
rcu_read_lock();
peer = __peer_lookup_chan(dev, chan);
@@ -375,7 +378,7 @@ drop:
/* Packet from BT LE device */
static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct lowpan_dev *dev;
+ struct lowpan_btle_dev *dev;
struct lowpan_peer *peer;
int err;
@@ -431,15 +434,18 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
bdaddr_t *peer_addr, u8 *peer_addr_type)
{
struct in6_addr ipv6_daddr;
- struct lowpan_dev *dev;
+ struct ipv6hdr *hdr;
+ struct lowpan_btle_dev *dev;
struct lowpan_peer *peer;
bdaddr_t addr, *any = BDADDR_ANY;
u8 *daddr = any->b;
int err, status = 0;
- dev = lowpan_dev(netdev);
+ hdr = ipv6_hdr(skb);
+
+ dev = lowpan_btle_dev(netdev);
- memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr));
+ memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr));
if (ipv6_addr_is_multicast(&ipv6_daddr)) {
lowpan_cb(skb)->chan = NULL;
@@ -489,15 +495,9 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev,
unsigned short type, const void *_daddr,
const void *_saddr, unsigned int len)
{
- struct ipv6hdr *hdr;
-
if (type != ETH_P_IPV6)
return -EINVAL;
- hdr = ipv6_hdr(skb);
-
- memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr));
-
return 0;
}
@@ -543,19 +543,19 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
{
struct sk_buff *local_skb;
- struct lowpan_dev *entry;
+ struct lowpan_btle_dev *entry;
int err = 0;
rcu_read_lock();
list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) {
struct lowpan_peer *pentry;
- struct lowpan_dev *dev;
+ struct lowpan_btle_dev *dev;
if (entry->netdev != netdev)
continue;
- dev = lowpan_dev(entry->netdev);
+ dev = lowpan_btle_dev(entry->netdev);
list_for_each_entry_rcu(pentry, &dev->peers, list) {
int ret;
@@ -723,8 +723,8 @@ static void ifdown(struct net_device *netdev)
static void do_notify_peers(struct work_struct *work)
{
- struct lowpan_dev *dev = container_of(work, struct lowpan_dev,
- notify_peers.work);
+ struct lowpan_btle_dev *dev = container_of(work, struct lowpan_btle_dev,
+ notify_peers.work);
netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */
}
@@ -766,7 +766,7 @@ static void set_ip_addr_bits(u8 addr_type, u8 *addr)
}
static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
- struct lowpan_dev *dev)
+ struct lowpan_btle_dev *dev)
{
struct lowpan_peer *peer;
@@ -803,12 +803,12 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
return peer->chan;
}
-static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
+static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
{
struct net_device *netdev;
int err = 0;
- netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)),
+ netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)),
IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN,
netdev_setup);
if (!netdev)
@@ -820,7 +820,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
SET_NETDEV_DEVTYPE(netdev, &bt_type);
- *dev = lowpan_dev(netdev);
+ *dev = lowpan_btle_dev(netdev);
(*dev)->netdev = netdev;
(*dev)->hdev = chan->conn->hcon->hdev;
INIT_LIST_HEAD(&(*dev)->peers);
@@ -853,7 +853,7 @@ out:
static inline void chan_ready_cb(struct l2cap_chan *chan)
{
- struct lowpan_dev *dev;
+ struct lowpan_btle_dev *dev;
dev = lookup_dev(chan->conn);
@@ -890,8 +890,9 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
static void delete_netdev(struct work_struct *work)
{
- struct lowpan_dev *entry = container_of(work, struct lowpan_dev,
- delete_netdev);
+ struct lowpan_btle_dev *entry = container_of(work,
+ struct lowpan_btle_dev,
+ delete_netdev);
lowpan_unregister_netdev(entry->netdev);
@@ -900,8 +901,8 @@ static void delete_netdev(struct work_struct *work)
static void chan_close_cb(struct l2cap_chan *chan)
{
- struct lowpan_dev *entry;
- struct lowpan_dev *dev = NULL;
+ struct lowpan_btle_dev *entry;
+ struct lowpan_btle_dev *dev = NULL;
struct lowpan_peer *peer;
int err = -ENOENT;
bool last = false, remove = true;
@@ -921,7 +922,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
spin_lock(&devices_lock);
list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) {
- dev = lowpan_dev(entry->netdev);
+ dev = lowpan_btle_dev(entry->netdev);
peer = __peer_lookup_chan(dev, chan);
if (peer) {
last = peer_del(dev, peer);
@@ -1131,7 +1132,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
static void disconnect_all_peers(void)
{
- struct lowpan_dev *entry;
+ struct lowpan_btle_dev *entry;
struct lowpan_peer *peer, *tmp_peer, *new_peer;
struct list_head peers;
@@ -1291,7 +1292,7 @@ static ssize_t lowpan_control_write(struct file *fp,
static int lowpan_control_show(struct seq_file *f, void *ptr)
{
- struct lowpan_dev *entry;
+ struct lowpan_btle_dev *entry;
struct lowpan_peer *peer;
spin_lock(&devices_lock);
@@ -1322,7 +1323,7 @@ static const struct file_operations lowpan_control_fops = {
static void disconnect_devices(void)
{
- struct lowpan_dev *entry, *tmp, *new_dev;
+ struct lowpan_btle_dev *entry, *tmp, *new_dev;
struct list_head devices;
INIT_LIST_HEAD(&devices);
@@ -1360,7 +1361,7 @@ static int device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- struct lowpan_dev *entry;
+ struct lowpan_btle_dev *entry;
if (netdev->type != ARPHRD_6LOWPAN)
return NOTIFY_DONE;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 6ceb5d36a32b..f4fcb4a9d5c1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -188,7 +188,7 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
* So we have to queue them and wake up session thread which is sleeping
* on the sk_sleep(sk).
*/
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
skb_queue_tail(&sk->sk_write_queue, skb);
wake_up_interruptible(sk_sleep(sk));
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index f8fc6241469a..d99b2009771a 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
#include <asm/uaccess.h>
#include "br_private.h"
-/* called with RTNL */
static int get_bridge_ifindices(struct net *net, int *indices, int num)
{
struct net_device *dev;
int i = 0;
- for_each_netdev(net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
if (i >= num)
break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
+ rcu_read_unlock();
return i;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 253bc77eda3b..7dbc80d01eb0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -61,6 +61,19 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
e->flags |= MDB_FLAGS_OFFLOAD;
}
+static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
+{
+ memset(ip, 0, sizeof(struct br_ip));
+ ip->vid = entry->vid;
+ ip->proto = entry->addr.proto;
+ if (ip->proto == htons(ETH_P_IP))
+ ip->u.ip4 = entry->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ ip->u.ip6 = entry->addr.u.ip6;
+#endif
+}
+
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
@@ -243,9 +256,45 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
+ nla_total_size(sizeof(struct br_mdb_entry));
}
-static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
- int type, struct net_bridge_port_group *pg)
+struct br_mdb_complete_info {
+ struct net_bridge_port *port;
+ struct br_ip ip;
+};
+
+static void br_mdb_complete(struct net_device *dev, int err, void *priv)
{
+ struct br_mdb_complete_info *data = priv;
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_entry *mp;
+ struct net_bridge_port *port = data->port;
+ struct net_bridge *br = port->br;
+
+ if (err)
+ goto err;
+
+ spin_lock_bh(&br->multicast_lock);
+ mdb = mlock_dereference(br->mdb, br);
+ mp = br_mdb_ip_get(mdb, &data->ip);
+ if (!mp)
+ goto out;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
+ if (p->port != port)
+ continue;
+ p->flags |= MDB_PG_FLAGS_OFFLOAD;
+ }
+out:
+ spin_unlock_bh(&br->multicast_lock);
+err:
+ kfree(priv);
+}
+
+static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
+ struct br_mdb_entry *entry, int type)
+{
+ struct br_mdb_complete_info *complete_info;
struct switchdev_obj_port_mdb mdb = {
.obj = {
.id = SWITCHDEV_OBJ_ID_PORT_MDB,
@@ -268,9 +317,14 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
mdb.obj.orig_dev = port_dev;
if (port_dev && type == RTM_NEWMDB) {
- err = switchdev_port_obj_add(port_dev, &mdb.obj);
- if (!err && pg)
- pg->flags |= MDB_PG_FLAGS_OFFLOAD;
+ complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
+ if (complete_info) {
+ complete_info->port = p;
+ __mdb_entry_to_br_ip(entry, &complete_info->ip);
+ mdb.obj.complete_priv = complete_info;
+ mdb.obj.complete = br_mdb_complete;
+ switchdev_port_obj_add(port_dev, &mdb.obj);
+ }
} else if (port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
}
@@ -291,21 +345,21 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
- int type)
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
+ struct br_ip *group, int type, u8 flags)
{
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = pg->port->dev->ifindex;
- entry.addr.proto = pg->addr.proto;
- entry.addr.u.ip4 = pg->addr.u.ip4;
+ entry.ifindex = port->dev->ifindex;
+ entry.addr.proto = group->proto;
+ entry.addr.u.ip4 = group->u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- entry.addr.u.ip6 = pg->addr.u.ip6;
+ entry.addr.u.ip6 = group->u.ip6;
#endif
- entry.vid = pg->addr.vid;
- __mdb_entry_fill_flags(&entry, pg->flags);
- __br_mdb_notify(dev, &entry, type, pg);
+ entry.vid = group->vid;
+ __mdb_entry_fill_flags(&entry, flags);
+ __br_mdb_notify(dev, port, &entry, type);
}
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@ -450,8 +504,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, unsigned char state,
- struct net_bridge_port_group **pg)
+ struct br_ip *group, unsigned char state)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@ -482,7 +535,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
- *pg = p;
if (state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -490,8 +542,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct br_mdb_entry *entry,
- struct net_bridge_port_group **pg)
+ struct br_mdb_entry *entry)
{
struct br_ip ip;
struct net_device *dev;
@@ -509,18 +560,10 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
- memset(&ip, 0, sizeof(ip));
- ip.vid = entry->vid;
- ip.proto = entry->addr.proto;
- if (ip.proto == htons(ETH_P_IP))
- ip.u.ip4 = entry->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- else
- ip.u.ip6 = entry->addr.u.ip6;
-#endif
+ __mdb_entry_to_br_ip(entry, &ip);
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry->state, pg);
+ ret = br_mdb_add_group(br, p, &ip, entry->state);
spin_unlock_bh(&br->multicast_lock);
return ret;
}
@@ -528,7 +571,6 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- struct net_bridge_port_group *pg;
struct net_bridge_vlan_group *vg;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
@@ -558,15 +600,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
if (br_vlan_enabled(br) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_add(net, br, entry, &pg);
+ err = __br_mdb_add(net, br, entry);
if (err)
break;
- __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
+ __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
}
} else {
- err = __br_mdb_add(net, br, entry, &pg);
+ err = __br_mdb_add(net, br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
+ __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
}
return err;
@@ -584,15 +626,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!netif_running(br->dev) || br->multicast_disabled)
return -EINVAL;
- memset(&ip, 0, sizeof(ip));
- ip.vid = entry->vid;
- ip.proto = entry->addr.proto;
- if (ip.proto == htons(ETH_P_IP))
- ip.u.ip4 = entry->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- else
- ip.u.ip6 = entry->addr.u.ip6;
-#endif
+ __mdb_entry_to_br_ip(entry, &ip);
spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
@@ -662,12 +696,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
entry->vid = v->vid;
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
+ __br_mdb_notify(dev, p, entry, RTM_DELMDB);
}
} else {
err = __br_mdb_del(br, entry);
if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
+ __br_mdb_notify(dev, p, entry, RTM_DELMDB);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a4c15df2b792..6852f3c7009c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -283,7 +283,8 @@ static void br_multicast_del_pg(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- br_mdb_notify(br->dev, p, RTM_DELMDB);
+ br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
+ p->flags);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
if (!mp->ports && !mp->mglist &&
@@ -705,7 +706,7 @@ static int br_multicast_add_group(struct net_bridge *br,
if (unlikely(!p))
goto err;
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, p, RTM_NEWMDB);
+ br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0);
found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -1278,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
+ unsigned int offset = skb_transport_offset(skb);
__be32 group;
int err = 0;
@@ -1288,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
group = ih->group;
- if (skb->len == sizeof(*ih)) {
+ if (skb->len == offset + sizeof(*ih)) {
max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
if (!max_delay) {
max_delay = 10 * HZ;
group = 0;
}
- } else if (skb->len >= sizeof(*ih3)) {
+ } else if (skb->len >= offset + sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
goto out;
@@ -1356,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
+ unsigned int offset = skb_transport_offset(skb);
const struct in6_addr *group = NULL;
bool is_general_query;
int err = 0;
@@ -1365,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- if (skb->len == sizeof(*mld)) {
- if (!pskb_may_pull(skb, sizeof(*mld))) {
+ if (skb->len == offset + sizeof(*mld)) {
+ if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
err = -EINVAL;
goto out;
}
@@ -1375,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (max_delay)
group = &mld->mld_mca;
} else {
- if (!pskb_may_pull(skb, sizeof(*mld2q))) {
+ if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
err = -EINVAL;
goto out;
}
@@ -1461,7 +1464,8 @@ br_multicast_leave_group(struct net_bridge *br,
hlist_del_init(&p->mglist);
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- br_mdb_notify(br->dev, p, RTM_DELMDB);
+ br_mdb_notify(br->dev, port, group, RTM_DELMDB,
+ p->flags);
if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 44114a94c576..2d25979273a6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
return 0;
inhdr_error:
- IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index d61f56efc8dc..5e59a8457e7b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + ip6h_len > skb->len) {
- IP6_INC_STATS_BH(net, idev,
- IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
- IP6_INC_STATS_BH(net, idev,
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
@@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
return 0;
inhdr_error:
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6bae1125e36d..a5343c7232bf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -850,6 +850,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 },
[IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
+ [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -921,6 +922,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (data[IFLA_BR_VLAN_STATS_ENABLED]) {
+ __u8 vlan_stats = nla_get_u8(data[IFLA_BR_VLAN_STATS_ENABLED]);
+
+ err = br_vlan_set_stats(br, vlan_stats);
+ if (err)
+ return err;
+ }
#endif
if (data[IFLA_BR_GROUP_FWD_MASK]) {
@@ -1082,6 +1091,7 @@ static size_t br_get_size(const struct net_device *brdev)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */
#endif
nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */
nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */
@@ -1167,7 +1177,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
- nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid))
+ nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) ||
+ nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled))
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -1223,6 +1234,69 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return 0;
}
+static size_t br_get_linkxstats_size(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ int numvls = 0;
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ /* we need to count all, even placeholder entries */
+ list_for_each_entry(v, &vg->vlan_list, vlist)
+ numvls++;
+
+ /* account for the vlans and the link xstats type nest attribute */
+ return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
+ nla_total_size(0);
+}
+
+static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
+ int *prividx)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct nlattr *nest;
+ int vl_idx = 0;
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ goto out;
+ nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
+ if (!nest)
+ return -EMSGSIZE;
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ struct bridge_vlan_xstats vxi;
+ struct br_vlan_stats stats;
+
+ if (vl_idx++ < *prividx)
+ continue;
+ memset(&vxi, 0, sizeof(vxi));
+ vxi.vid = v->vid;
+ br_vlan_get_stats(v, &stats);
+ vxi.rx_bytes = stats.rx_bytes;
+ vxi.rx_packets = stats.rx_packets;
+ vxi.tx_bytes = stats.tx_bytes;
+ vxi.tx_packets = stats.tx_packets;
+
+ if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ *prividx = 0;
+out:
+ return 0;
+
+nla_put_failure:
+ nla_nest_end(skb, nest);
+ *prividx = vl_idx;
+
+ return -EMSGSIZE;
+}
static struct rtnl_af_ops br_af_ops __read_mostly = {
.family = AF_BRIDGE,
@@ -1241,6 +1315,8 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.dellink = br_dev_delete,
.get_size = br_get_size,
.fill_info = br_fill_info,
+ .fill_linkxstats = br_fill_linkxstats,
+ .get_linkxstats_size = br_get_linkxstats_size,
.slave_maxtype = IFLA_BRPORT_MAX,
.slave_policy = br_port_policy,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b5d145dfcbf..c7fb5d7a7218 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -77,12 +77,21 @@ struct bridge_mcast_querier {
};
#endif
+struct br_vlan_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
+ struct u64_stats_sync syncp;
+};
+
/**
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
+ * @stats: per-cpu VLAN statistics
* @br: if MASTER flag set, this points to a bridge struct
* @port: if MASTER flag unset, this points to a port struct
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
@@ -100,6 +109,7 @@ struct net_bridge_vlan {
struct rhash_head vnode;
u16 vid;
u16 flags;
+ struct br_vlan_stats __percpu *stats;
union {
struct net_bridge *br;
struct net_bridge_port *port;
@@ -342,6 +352,7 @@ struct net_bridge
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
u8 vlan_enabled;
+ u8 vlan_stats_enabled;
__be16 vlan_proto;
u16 default_pvid;
#endif
@@ -560,8 +571,8 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
unsigned char flags);
void br_mdb_init(void);
void br_mdb_uninit(void);
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
- int type);
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
+ struct br_ip *group, int type, u8 flags);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
@@ -691,6 +702,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
+int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
@@ -699,6 +711,8 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
int nbp_vlan_init(struct net_bridge_port *port);
int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
+void br_vlan_get_stats(const struct net_bridge_vlan *v,
+ struct br_vlan_stats *stats);
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
@@ -881,6 +895,10 @@ static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
return NULL;
}
+static inline void br_vlan_get_stats(const struct net_bridge_vlan *v,
+ struct br_vlan_stats *stats)
+{
+}
#endif
struct nf_br_ops {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 70bddfd0f3e9..beb47071e38d 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -731,6 +731,22 @@ static ssize_t default_pvid_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid);
}
static DEVICE_ATTR_RW(default_pvid);
+
+static ssize_t vlan_stats_enabled_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%u\n", br->vlan_stats_enabled);
+}
+
+static ssize_t vlan_stats_enabled_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_stats);
+}
+static DEVICE_ATTR_RW(vlan_stats_enabled);
#endif
static struct attribute *bridge_attrs[] = {
@@ -778,6 +794,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_vlan_filtering.attr,
&dev_attr_vlan_protocol.attr,
&dev_attr_default_pvid.attr,
+ &dev_attr_vlan_stats_enabled.attr,
#endif
NULL
};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index e001152d6ad1..b6de4f457161 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -162,6 +162,17 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
return masterv;
}
+static void br_master_vlan_rcu_free(struct rcu_head *rcu)
+{
+ struct net_bridge_vlan *v;
+
+ v = container_of(rcu, struct net_bridge_vlan, rcu);
+ WARN_ON(!br_vlan_is_master(v));
+ free_percpu(v->stats);
+ v->stats = NULL;
+ kfree(v);
+}
+
static void br_vlan_put_master(struct net_bridge_vlan *masterv)
{
struct net_bridge_vlan_group *vg;
@@ -174,7 +185,7 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
rhashtable_remove_fast(&vg->vlan_hash,
&masterv->vnode, br_vlan_rht_params);
__vlan_del_list(masterv);
- kfree_rcu(masterv, rcu);
+ call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
}
}
@@ -230,6 +241,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
if (!masterv)
goto out_filt;
v->brvlan = masterv;
+ v->stats = masterv->stats;
}
/* Add the dev mac and count the vlan only if it's usable */
@@ -329,6 +341,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb)
{
+ struct br_vlan_stats *stats;
struct net_bridge_vlan *v;
u16 vid;
@@ -355,18 +368,27 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
return NULL;
}
}
+ if (br->vlan_stats_enabled) {
+ stats = this_cpu_ptr(v->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ u64_stats_update_end(&stats->syncp);
+ }
+
if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
skb->vlan_tci = 0;
-
out:
return skb;
}
/* Called under RCU */
-static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
+static bool __allowed_ingress(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
struct sk_buff *skb, u16 *vid)
{
- const struct net_bridge_vlan *v;
+ struct br_vlan_stats *stats;
+ struct net_bridge_vlan *v;
bool tagged;
BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
@@ -375,7 +397,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
* HW accelerated vlan tag.
*/
if (unlikely(!skb_vlan_tag_present(skb) &&
- skb->protocol == proto)) {
+ skb->protocol == br->vlan_proto)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
return false;
@@ -383,7 +405,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
if (!br_vlan_get_tag(skb, vid)) {
/* Tagged frame */
- if (skb->vlan_proto != proto) {
+ if (skb->vlan_proto != br->vlan_proto) {
/* Protocol-mismatch, empty out vlan_tci for new tag */
skb_push(skb, ETH_HLEN);
skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
@@ -419,7 +441,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
*vid = pvid;
if (likely(!tagged))
/* Untagged Frame. */
- __vlan_hwaccel_put_tag(skb, proto, pvid);
+ __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid);
else
/* Priority-tagged Frame.
* At this point, We know that skb->vlan_tci had
@@ -428,13 +450,24 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
*/
skb->vlan_tci |= pvid;
- return true;
+ /* if stats are disabled we can avoid the lookup */
+ if (!br->vlan_stats_enabled)
+ return true;
}
-
- /* Frame had a valid vlan tag. See if vlan is allowed */
v = br_vlan_find(vg, *vid);
- if (v && br_vlan_should_use(v))
- return true;
+ if (!v || !br_vlan_should_use(v))
+ goto drop;
+
+ if (br->vlan_stats_enabled) {
+ stats = this_cpu_ptr(v->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_bytes += skb->len;
+ stats->rx_packets++;
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ return true;
+
drop:
kfree_skb(skb);
return false;
@@ -452,7 +485,7 @@ bool br_allowed_ingress(const struct net_bridge *br,
return true;
}
- return __allowed_ingress(vg, br->vlan_proto, skb, vid);
+ return __allowed_ingress(br, vg, skb, vid);
}
/* Called under RCU. */
@@ -542,6 +575,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
if (!vlan)
return -ENOMEM;
+ vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats);
+ if (!vlan->stats) {
+ kfree(vlan);
+ return -ENOMEM;
+ }
vlan->vid = vid;
vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER;
vlan->flags &= ~BRIDGE_VLAN_INFO_PVID;
@@ -549,8 +587,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
if (flags & BRIDGE_VLAN_INFO_BRENTRY)
atomic_set(&vlan->refcnt, 1);
ret = __vlan_add(vlan, flags);
- if (ret)
+ if (ret) {
+ free_percpu(vlan->stats);
kfree(vlan);
+ }
return ret;
}
@@ -711,6 +751,20 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
return __br_vlan_set_proto(br, htons(val));
}
+int br_vlan_set_stats(struct net_bridge *br, unsigned long val)
+{
+ switch (val) {
+ case 0:
+ case 1:
+ br->vlan_stats_enabled = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
struct net_bridge_vlan *v;
@@ -1000,3 +1054,30 @@ void nbp_vlan_flush(struct net_bridge_port *port)
synchronize_rcu();
__vlan_group_free(vg);
}
+
+void br_vlan_get_stats(const struct net_bridge_vlan *v,
+ struct br_vlan_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ u64 rxpackets, rxbytes, txpackets, txbytes;
+ struct br_vlan_stats *cpu_stats;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(v->stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ rxpackets = cpu_stats->rx_packets;
+ rxbytes = cpu_stats->rx_bytes;
+ txbytes = cpu_stats->tx_bytes;
+ txpackets = cpu_stats->tx_packets;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->rx_packets += rxpackets;
+ stats->rx_bytes += rxbytes;
+ stats->tx_bytes += txbytes;
+ stats->tx_packets += txpackets;
+ }
+}
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 6b923bcaa2a4..2bc5965fdd1e 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
}
EXPORT_SYMBOL(ceph_auth_create_authorizer);
-void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
+void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
{
- mutex_lock(&ac->mutex);
- if (ac->ops && ac->ops->destroy_authorizer)
- ac->ops->destroy_authorizer(ac, a);
- mutex_unlock(&ac->mutex);
+ a->destroy(a);
}
EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 8c93fa8d81bc..5f836f02ae36 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac)
struct ceph_auth_none_info *xi = ac->private;
xi->starting = true;
- xi->built_authorizer = false;
}
static void destroy(struct ceph_auth_client *ac)
@@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac)
return xi->starting;
}
+static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
+ struct ceph_none_authorizer *au)
+{
+ void *p = au->buf;
+ void *const end = p + sizeof(au->buf);
+ int ret;
+
+ ceph_encode_8_safe(&p, end, 1, e_range);
+ ret = ceph_entity_name_encode(ac->name, &p, end);
+ if (ret < 0)
+ return ret;
+
+ ceph_encode_64_safe(&p, end, ac->global_id, e_range);
+ au->buf_len = p - (void *)au->buf;
+ dout("%s built authorizer len %d\n", __func__, au->buf_len);
+ return 0;
+
+e_range:
+ return -ERANGE;
+}
+
static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
{
return 0;
@@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
return result;
}
+static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a)
+{
+ kfree(a);
+}
+
/*
- * build an 'authorizer' with our entity_name and global_id. we can
- * reuse a single static copy since it is identical for all services
- * we connect to.
+ * build an 'authorizer' with our entity_name and global_id. it is
+ * identical for all services we connect to.
*/
static int ceph_auth_none_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth)
{
- struct ceph_auth_none_info *ai = ac->private;
- struct ceph_none_authorizer *au = &ai->au;
- void *p, *end;
+ struct ceph_none_authorizer *au;
int ret;
- if (!ai->built_authorizer) {
- p = au->buf;
- end = p + sizeof(au->buf);
- ceph_encode_8(&p, 1);
- ret = ceph_entity_name_encode(ac->name, &p, end - 8);
- if (ret < 0)
- goto bad;
- ceph_decode_need(&p, end, sizeof(u64), bad2);
- ceph_encode_64(&p, ac->global_id);
- au->buf_len = p - (void *)au->buf;
- ai->built_authorizer = true;
- dout("built authorizer len %d\n", au->buf_len);
+ au = kmalloc(sizeof(*au), GFP_NOFS);
+ if (!au)
+ return -ENOMEM;
+
+ au->base.destroy = ceph_auth_none_destroy_authorizer;
+
+ ret = ceph_auth_none_build_authorizer(ac, au);
+ if (ret) {
+ kfree(au);
+ return ret;
}
auth->authorizer = (struct ceph_authorizer *) au;
@@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer(
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
return 0;
-
-bad2:
- ret = -ERANGE;
-bad:
- return ret;
-}
-
-static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
-{
- /* nothing to do */
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
@@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.build_request = build_request,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
- .destroy_authorizer = ceph_auth_none_destroy_authorizer,
};
int ceph_auth_none_init(struct ceph_auth_client *ac)
@@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
return -ENOMEM;
xi->starting = true;
- xi->built_authorizer = false;
ac->protocol = CEPH_AUTH_NONE;
ac->private = xi;
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 059a3ce4b53f..62021535ae4a 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -12,6 +12,7 @@
*/
struct ceph_none_authorizer {
+ struct ceph_authorizer base;
char buf[128];
int buf_len;
char reply_buf[0];
@@ -19,8 +20,6 @@ struct ceph_none_authorizer {
struct ceph_auth_none_info {
bool starting;
- bool built_authorizer;
- struct ceph_none_authorizer au; /* we only need one; it's static */
};
int ceph_auth_none_init(struct ceph_auth_client *ac);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 9e43a315e662..a0905f04bd13 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
+static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+
+ ceph_x_authorizer_cleanup(au);
+ kfree(au);
+}
+
static int ceph_x_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth)
@@ -581,6 +589,8 @@ static int ceph_x_create_authorizer(
if (!au)
return -ENOMEM;
+ au->base.destroy = ceph_x_destroy_authorizer;
+
ret = ceph_x_build_authorizer(ac, th, au);
if (ret) {
kfree(au);
@@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
return ret;
}
-static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
-{
- struct ceph_x_authorizer *au = (void *)a;
-
- ceph_x_authorizer_cleanup(au);
- kfree(au);
-}
-
-
static void ceph_x_reset(struct ceph_auth_client *ac)
{
struct ceph_x_info *xi = ac->private;
@@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.create_authorizer = ceph_x_create_authorizer,
.update_authorizer = ceph_x_update_authorizer,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
- .destroy_authorizer = ceph_x_destroy_authorizer,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
.destroy = ceph_x_destroy,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 40b1a3cf7397..21a5af904bae 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
struct ceph_x_authorizer {
+ struct ceph_authorizer base;
struct ceph_crypto_key session_key;
struct ceph_buffer *buf;
unsigned int service;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 32355d9d0103..40a53a70efdf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1087,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd)
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
if (atomic_dec_and_test(&osd->o_ref)) {
- struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
-
if (osd->o_auth.authorizer)
- ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
+ ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -2984,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_auth_handshake *auth = &o->o_auth;
if (force_new && auth->authorizer) {
- ceph_auth_destroy_authorizer(ac, auth->authorizer);
+ ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6324bc9267f7..12436d1312ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1741,7 +1741,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
__net_timestamp(SKB); \
} \
-bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
+bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
unsigned int len;
@@ -1850,7 +1850,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
* taps currently in use.
*/
-static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
@@ -1907,6 +1907,7 @@ out_unlock:
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
/**
* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
@@ -2815,7 +2816,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, type)) {
- features &= ~NETIF_F_CSUM_MASK;
+ features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
@@ -3469,6 +3470,7 @@ u32 rps_cpu_mask __read_mostly;
EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
+EXPORT_SYMBOL(rps_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -3955,9 +3957,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
qdisc_qstats_cpu_drop(cl->q);
+ kfree_skb(skb);
+ return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
- kfree_skb(skb);
+ consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
/* skb_mac_header check was done by cls/act_bpf, so
@@ -4982,8 +4986,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
netpoll_poll_unlock(have);
}
if (rc > 0)
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ __NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
if (rc == LL_FLUSH_FAILED)
@@ -6720,6 +6724,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_TSO6;
}
+ /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
+ if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
+ features &= ~NETIF_F_TSO_MANGLEID;
+
/* TSO ECN requires that TSO is present as well. */
if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
features &= ~NETIF_F_TSO_ECN;
diff --git a/net/core/filter.c b/net/core/filter.c
index 218e5de8c402..71c2a1f473ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1344,6 +1344,21 @@ struct bpf_scratchpad {
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+static inline int bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err;
+
+ if (!skb_cloned(skb))
+ return 0;
+ if (skb_clone_writable(skb, write_len))
+ return 0;
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (!err)
+ bpf_compute_data_end(skb);
+ return err;
+}
+
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
@@ -1366,7 +1381,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*/
if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + len)))
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, sp->buff);
@@ -1444,7 +1459,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1499,7 +1514,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1699,12 +1714,15 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
__be16 vlan_proto = (__force __be16) r2;
+ int ret;
if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
vlan_proto != htons(ETH_P_8021AD)))
vlan_proto = htons(ETH_P_8021Q);
- return skb_vlan_push(skb, vlan_proto, vlan_tci);
+ ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_push_proto = {
@@ -1720,8 +1738,11 @@ EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ int ret;
- return skb_vlan_pop(skb);
+ ret = skb_vlan_pop(skb);
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
@@ -2066,8 +2087,12 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
- if (off == offsetof(struct __sk_buff, tc_classid))
+ switch (off) {
+ case offsetof(struct __sk_buff, tc_classid):
+ case offsetof(struct __sk_buff, data):
+ case offsetof(struct __sk_buff, data_end):
return false;
+ }
if (type == BPF_WRITE) {
switch (off) {
@@ -2215,6 +2240,20 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ ctx_off -= offsetof(struct __sk_buff, data_end);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct bpf_skb_data_end, data_end);
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
+ dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/core/flow.c b/net/core/flow.c
index 1033725be40b..3937b1b68d5b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work)
list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
spin_unlock_bh(&xfrm->flow_cache_gc_lock);
- list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+ list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
flow_entry_kill(fce, xfrm);
+ atomic_dec(&xfrm->flow_cache_gc_count);
+ WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0);
+ }
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
@@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
struct netns_xfrm *xfrm)
{
if (deleted) {
+ atomic_add(deleted, &xfrm->flow_cache_gc_count);
fcp->hash_count -= deleted;
spin_lock_bh(&xfrm->flow_cache_gc_lock);
list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
@@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
+ if (fcp->hash_count > 2 * fc->high_watermark ||
+ atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) {
+ atomic_inc(&net->xfrm.flow_cache_genid);
+ flo = ERR_PTR(-ENOBUFS);
+ goto ret_object;
+ }
+
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
fle->net = net;
@@ -446,6 +457,7 @@ int flow_cache_init(struct net *net)
INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
mutex_init(&net->xfrm.flow_flush_sem);
+ atomic_set(&net->xfrm.flow_cache_gc_count, 0);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index e640462ea8bf..f96ee8b9478d 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -25,9 +25,9 @@
static inline int
-gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
{
- if (nla_put(d->skb, type, size, buf))
+ if (nla_put_64bit(d->skb, type, size, buf, padattr))
goto nla_put_failure;
return 0;
@@ -59,7 +59,8 @@ nla_put_failure:
*/
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
- int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+ int xstats_type, spinlock_t *lock,
+ struct gnet_dump *d, int padattr)
__acquires(lock)
{
memset(d, 0, sizeof(*d));
@@ -71,16 +72,17 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
+ d->padattr = padattr;
if (d->tail)
- return gnet_stats_copy(d, type, NULL, 0);
+ return gnet_stats_copy(d, type, NULL, 0, padattr);
return 0;
}
EXPORT_SYMBOL(gnet_stats_start_copy_compat);
/**
- * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * gnet_stats_start_copy - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @lock: statistics lock
@@ -94,9 +96,9 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat);
*/
int
gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
- struct gnet_dump *d)
+ struct gnet_dump *d, int padattr)
{
- return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+ return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr);
}
EXPORT_SYMBOL(gnet_stats_start_copy);
@@ -169,7 +171,8 @@ gnet_stats_copy_basic(struct gnet_dump *d,
memset(&sb, 0, sizeof(sb));
sb.bytes = bstats.bytes;
sb.packets = bstats.packets;
- return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
+ return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb),
+ TCA_STATS_PAD);
}
return 0;
}
@@ -208,11 +211,13 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
}
if (d->tail) {
- res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+ res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est),
+ TCA_STATS_PAD);
if (res < 0 || est.bps == r->bps)
return res;
/* emit 64bit stats only if needed */
- return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r),
+ TCA_STATS_PAD);
}
return 0;
@@ -286,7 +291,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_QUEUE,
- &qstats, sizeof(qstats));
+ &qstats, sizeof(qstats),
+ TCA_STATS_PAD);
return 0;
}
@@ -316,7 +322,8 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
}
if (d->tail)
- return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+ return gnet_stats_copy(d, TCA_STATS_APP, st, len,
+ TCA_STATS_PAD);
return 0;
@@ -347,12 +354,12 @@ gnet_stats_finish_copy(struct gnet_dump *d)
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
- sizeof(d->tc_stats)) < 0)
+ sizeof(d->tc_stats), d->padattr) < 0)
return -1;
if (d->compat_xstats && d->xstats) {
if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
- d->xstats_len) < 0)
+ d->xstats_len, d->padattr) < 0)
return -1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6a395d440228..29dd8cc22bbf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1857,7 +1857,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndst.ndts_table_fulls += st->table_fulls;
}
- if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
+ if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
+ NDTA_PAD))
goto nla_put_failure;
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 2bf83299600a..14d09345f00d 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- sd->cpu_collision, sd->received_rps, flow_limit_count);
+ 0, /* was cpu_collision */
+ sd->received_rps, flow_limit_count);
return 0;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 20999aa596dd..8604ae245960 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3472,7 +3472,6 @@ xmit_more:
pkt_dev->odevname, ret);
pkt_dev->errors++;
/* fallthru */
- case NETDEV_TX_LOCKED:
case NETDEV_TX_BUSY:
/* Retry it next time */
atomic_dec(&(pkt_dev->skb->users));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9efc1f34ef3b..d69c4644f8f2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -876,7 +876,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
- + nla_total_size(sizeof(struct rtnl_link_ifmap))
+ + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
+ nla_total_size_64bit(sizeof(struct rtnl_link_stats64))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
@@ -1173,15 +1173,17 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
- if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+ struct rtnl_link_ifmap map;
+
+ memset(&map, 0, sizeof(map));
+ map.mem_start = dev->mem_start;
+ map.mem_end = dev->mem_end;
+ map.base_addr = dev->base_addr;
+ map.irq = dev->irq;
+ map.dma = dev->dma;
+ map.port = dev->if_port;
+
+ if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
return 0;
@@ -3444,13 +3446,21 @@ out:
return err;
}
+static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
+{
+ return (mask & IFLA_STATS_FILTER_BIT(attrid)) &&
+ (!idxattr || idxattr == attrid);
+}
+
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
- unsigned int flags, unsigned int filter_mask)
+ unsigned int flags, unsigned int filter_mask,
+ int *idxattr, int *prividx)
{
struct if_stats_msg *ifsm;
struct nlmsghdr *nlh;
struct nlattr *attr;
+ int s_prividx = *prividx;
ASSERT_RTNL();
@@ -3462,7 +3472,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
ifsm->ifindex = dev->ifindex;
ifsm->filter_mask = filter_mask;
- if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) {
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) {
struct rtnl_link_stats64 *sp;
attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
@@ -3475,12 +3485,36 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
dev_get_stats(dev, sp);
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) {
+ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+
+ if (ops && ops->fill_linkxstats) {
+ int err;
+
+ *idxattr = IFLA_STATS_LINK_XSTATS;
+ attr = nla_nest_start(skb,
+ IFLA_STATS_LINK_XSTATS);
+ if (!attr)
+ goto nla_put_failure;
+
+ err = ops->fill_linkxstats(skb, dev, prividx);
+ nla_nest_end(skb, attr);
+ if (err)
+ goto nla_put_failure;
+ *idxattr = 0;
+ }
+ }
+
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
- nlmsg_cancel(skb, nlh);
+ /* not a multi message or no progress mean a real error */
+ if (!(flags & NLM_F_MULTI) || s_prividx == *prividx)
+ nlmsg_cancel(skb, nlh);
+ else
+ nlmsg_end(skb, nlh);
return -EMSGSIZE;
}
@@ -3494,17 +3528,28 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
{
size_t size = 0;
- if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64))
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
+ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+
+ if (ops && ops->get_linkxstats_size) {
+ size += nla_total_size(ops->get_linkxstats_size(dev));
+ /* for IFLA_STATS_LINK_XSTATS */
+ size += nla_total_size(0);
+ }
+ }
+
return size;
}
static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- struct if_stats_msg *ifsm;
struct net_device *dev = NULL;
+ int idxattr = 0, prividx = 0;
+ struct if_stats_msg *ifsm;
struct sk_buff *nskb;
u32 filter_mask;
int err;
@@ -3528,7 +3573,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- 0, filter_mask);
+ 0, filter_mask, &idxattr, &prividx);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON(err == -EMSGSIZE);
@@ -3542,18 +3587,19 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct net *net = sock_net(skb->sk);
+ unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
struct hlist_head *head;
- unsigned int flags = NLM_F_MULTI;
+ struct net_device *dev;
u32 filter_mask = 0;
- int err;
+ int idx = 0;
s_h = cb->args[0];
s_idx = cb->args[1];
+ s_idxattr = cb->args[2];
+ s_prividx = cb->args[3];
cb->seq = net->dev_base_seq;
@@ -3571,7 +3617,8 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
- flags, filter_mask);
+ flags, filter_mask,
+ &s_idxattr, &s_prividx);
/* If we ran out of room on the first message,
* we're in trouble
*/
@@ -3579,13 +3626,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
goto out;
-
+ s_prividx = 0;
+ s_idxattr = 0;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
out:
+ cb->args[3] = s_prividx;
+ cb->args[2] = s_idxattr;
cb->args[1] = idx;
cb->args[0] = h;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7ff7788b0151..f2b77e549c03 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3080,8 +3080,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
unsigned int headroom;
unsigned int len = head_skb->len;
__be16 proto;
- bool csum;
- int sg = !!(features & NETIF_F_SG);
+ bool csum, sg;
int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
@@ -3093,15 +3092,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
+ sg = !!(features & NETIF_F_SG);
csum = !!can_checksum_protocol(features, proto);
/* GSO partial only requires that we trim off any excess that
* doesn't fit into an MSS sized block, so take care of that
* now.
*/
- if (features & NETIF_F_GSO_PARTIAL) {
+ if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
partial_segs = len / mss;
- mss *= partial_segs;
+ if (partial_segs > 1)
+ mss *= partial_segs;
+ else
+ partial_segs = 0;
}
headroom = skb_headroom(head_skb);
@@ -4622,3 +4625,239 @@ failure:
return NULL;
}
EXPORT_SYMBOL(alloc_skb_with_frags);
+
+/* carve out the first off bytes from skb when off < headlen */
+static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
+ const int headlen, gfp_t gfp_mask)
+{
+ int i;
+ int size = skb_end_offset(skb);
+ int new_hlen = headlen - off;
+ u8 *data;
+
+ size = SKB_DATA_ALIGN(size);
+
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
+ if (!data)
+ return -ENOMEM;
+
+ size = SKB_WITH_OVERHEAD(ksize(data));
+
+ /* Copy real data, and all frags */
+ skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
+ skb->len -= off;
+
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb),
+ offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ if (skb_cloned(skb)) {
+ /* drop the old head gracefully */
+ if (skb_orphan_frags(skb, gfp_mask)) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ skb_frag_ref(skb, i);
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+ skb_release_data(skb);
+ } else {
+ /* we can reuse existing recount- all we did was
+ * relocate values
+ */
+ skb_free_head(skb);
+ }
+
+ skb->head = data;
+ skb->data = data;
+ skb->head_frag = 0;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+#else
+ skb->end = skb->head + size;
+#endif
+ skb_set_tail_pointer(skb, skb_headlen(skb));
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+ skb->hdr_len = 0;
+ skb->nohdr = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+
+ return 0;
+}
+
+static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
+
+/* carve out the first eat bytes from skb's frag_list. May recurse into
+ * pskb_carve()
+ */
+static int pskb_carve_frag_list(struct sk_buff *skb,
+ struct skb_shared_info *shinfo, int eat,
+ gfp_t gfp_mask)
+{
+ struct sk_buff *list = shinfo->frag_list;
+ struct sk_buff *clone = NULL;
+ struct sk_buff *insp = NULL;
+
+ do {
+ if (!list) {
+ pr_err("Not enough bytes to eat. Want %d\n", eat);
+ return -EFAULT;
+ }
+ if (list->len <= eat) {
+ /* Eaten as whole. */
+ eat -= list->len;
+ list = list->next;
+ insp = list;
+ } else {
+ /* Eaten partially. */
+ if (skb_shared(list)) {
+ clone = skb_clone(list, gfp_mask);
+ if (!clone)
+ return -ENOMEM;
+ insp = list->next;
+ list = clone;
+ } else {
+ /* This may be pulled without problems. */
+ insp = list;
+ }
+ if (pskb_carve(list, eat, gfp_mask) < 0) {
+ kfree_skb(clone);
+ return -ENOMEM;
+ }
+ break;
+ }
+ } while (eat);
+
+ /* Free pulled out fragments. */
+ while ((list = shinfo->frag_list) != insp) {
+ shinfo->frag_list = list->next;
+ kfree_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+ clone->next = list;
+ shinfo->frag_list = clone;
+ }
+ return 0;
+}
+
+/* carve off first len bytes from skb. Split line (off) is in the
+ * non-linear part of skb
+ */
+static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
+ int pos, gfp_t gfp_mask)
+{
+ int i, k = 0;
+ int size = skb_end_offset(skb);
+ u8 *data;
+ const int nfrags = skb_shinfo(skb)->nr_frags;
+ struct skb_shared_info *shinfo;
+
+ size = SKB_DATA_ALIGN(size);
+
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
+ if (!data)
+ return -ENOMEM;
+
+ size = SKB_WITH_OVERHEAD(ksize(data));
+
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb), offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ if (skb_orphan_frags(skb, gfp_mask)) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ shinfo = (struct skb_shared_info *)(data + size);
+ for (i = 0; i < nfrags; i++) {
+ int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (pos + fsize > off) {
+ shinfo->frags[k] = skb_shinfo(skb)->frags[i];
+
+ if (pos < off) {
+ /* Split frag.
+ * We have two variants in this case:
+ * 1. Move all the frag to the second
+ * part, if it is possible. F.e.
+ * this approach is mandatory for TUX,
+ * where splitting is expensive.
+ * 2. Split is accurately. We make this.
+ */
+ shinfo->frags[0].page_offset += off - pos;
+ skb_frag_size_sub(&shinfo->frags[0], off - pos);
+ }
+ skb_frag_ref(skb, i);
+ k++;
+ }
+ pos += fsize;
+ }
+ shinfo->nr_frags = k;
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+
+ if (k == 0) {
+ /* split line is in frag list */
+ pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
+ }
+ skb_release_data(skb);
+
+ skb->head = data;
+ skb->head_frag = 0;
+ skb->data = data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+#else
+ skb->end = skb->head + size;
+#endif
+ skb_reset_tail_pointer(skb);
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+ skb->hdr_len = 0;
+ skb->nohdr = 0;
+ skb->len -= off;
+ skb->data_len = skb->len;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+ return 0;
+}
+
+/* remove len bytes from the beginning of the skb */
+static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
+{
+ int headlen = skb_headlen(skb);
+
+ if (len < headlen)
+ return pskb_carve_inside_header(skb, len, headlen, gfp);
+ else
+ return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
+}
+
+/* Extract to_copy bytes starting at off from skb, and return this in
+ * a new skb
+ */
+struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
+ int to_copy, gfp_t gfp)
+{
+ struct sk_buff *clone = skb_clone(skb, gfp);
+
+ if (!clone)
+ return NULL;
+
+ if (pskb_carve(clone, off, gfp) < 0 ||
+ pskb_trim(clone, to_copy)) {
+ kfree_skb(clone);
+ return NULL;
+ }
+ return clone;
+}
+EXPORT_SYMBOL(pskb_extract);
diff --git a/net/core/sock.c b/net/core/sock.c
index e16a5db853c6..08bf97eceeb3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1655,6 +1655,17 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+/* This variant of sock_wfree() is used by TCP,
+ * since it sets SOCK_USE_WRITE_QUEUE.
+ */
+void __sock_wfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
+
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
@@ -1677,8 +1688,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL(skb_set_owner_w);
+/* This helper is used by netem, as it can hold packets in its
+ * delay queue. We want to allow the owner socket to send more
+ * packets, as if they were already TX completed by a typical driver.
+ * But we also want to keep skb->sk set because some packet schedulers
+ * rely on it (sch_fq for example). So we set skb->truesize to a small
+ * amount (1) and decrease sk_wmem_alloc accordingly.
+ */
void skb_orphan_partial(struct sk_buff *skb)
{
+ /* If this skb is a TCP pure ACK or already went here,
+ * we have nothing to do. 2 is already a very small truesize.
+ */
+ if (skb->truesize <= 2)
+ return;
+
/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
* so we do not completely orphan skb, but transfert all
* accounted bytes but one, to avoid unexpected reorders.
@@ -2019,33 +2043,27 @@ static void __release_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
{
- struct sk_buff *skb = sk->sk_backlog.head;
+ struct sk_buff *skb, *next;
- do {
+ while ((skb = sk->sk_backlog.head) != NULL) {
sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
- bh_unlock_sock(sk);
- do {
- struct sk_buff *next = skb->next;
+ spin_unlock_bh(&sk->sk_lock.slock);
+ do {
+ next = skb->next;
prefetch(next);
WARN_ON_ONCE(skb_dst_is_noref(skb));
skb->next = NULL;
sk_backlog_rcv(sk, skb);
- /*
- * We are in process context here with softirqs
- * disabled, use cond_resched_softirq() to preempt.
- * This is safe to do because we've taken the backlog
- * queue private:
- */
- cond_resched_softirq();
+ cond_resched();
skb = next;
} while (skb != NULL);
- bh_lock_sock(sk);
- } while ((skb = sk->sk_backlog.head) != NULL);
+ spin_lock_bh(&sk->sk_lock.slock);
+ }
/*
* Doing the zeroing here guarantee we can not loop forever
@@ -2054,6 +2072,13 @@ static void __release_sock(struct sock *sk)
sk->sk_backlog.len = 0;
}
+void __sk_flush_backlog(struct sock *sk)
+{
+ spin_lock_bh(&sk->sk_lock.slock);
+ __release_sock(sk);
+ spin_unlock_bh(&sk->sk_lock.slock);
+}
+
/**
* sk_wait_data - wait for data to arrive at sk_receive_queue
* @sk: sock to wait on
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index ca9e35bbe13c..6b10573cc9fa 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -120,7 +120,7 @@ static size_t sock_diag_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
- + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
+ + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b0e28d24e1a7..0c55ffb859bf 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -198,9 +198,9 @@ struct dccp_mib {
};
DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
-#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
-#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
-#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
+#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field)
+#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
/*
* Checksumming routines
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3bd14e885396..ba347184bda9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
goto discard;
}
- DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+ DCCP_INC_STATS(DCCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f6d183f8f332..5c7e413a3ae4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
* socket here.
*/
if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else {
/*
* Still in RESPOND, just remove it silently.
@@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
iph->saddr, ntohs(dh->dccph_sport),
inet_iif(skb));
if (!sk) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
@@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
dp = dccp_sk(sk);
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
!between48(seq, dp->dccps_awl, dp->dccps_awh)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
case DCCP_REQUESTING:
case DCCP_RESPOND:
if (!sock_owned_by_user(sk)) {
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
@@ -431,11 +431,11 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
return newsk;
exit_overflow:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit_nonewsk:
dst_release(dst);
exit:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
inet_csk_prepare_forced_close(newsk);
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
bh_unlock_sock(ctl_sk);
if (net_xmit_eval(err) == 0) {
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+ DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+ DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
}
out:
dst_release(dst);
@@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
drop_and_free:
reqsk_free(req);
drop:
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
}
EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8ceb3cebcad4..d176f4e66369 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
inet6_iif(skb));
if (!sk) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -106,7 +106,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -114,7 +114,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
dp = dccp_sk(sk);
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
!between48(seq, dp->dccps_awl, dp->dccps_awh)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case DCCP_RESPOND: /* Cannot happen.
It can, it SYNs are crossed. --ANK */
if (!sock_owned_by_user(sk)) {
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
/*
* Wake people up to see the error
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+ DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+ DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
}
@@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
drop_and_free:
reqsk_free(req);
drop:
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
}
@@ -527,11 +527,11 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
return newsk;
out_overflow:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
dst_release(dst);
out:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
}
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1994f8af646b..53eddf99e4f6 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
}
dccp_init_xmit_timers(newsk);
- DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+ __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
}
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9bce31886bda..74d29c56c367 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
return 0;
out_invalid_option:
- DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+ DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
rc = DCCP_RESET_CODE_OPTION_ERROR;
out_featneg_failed:
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3ef7acef3ce8..3a2c34027758 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -28,7 +28,7 @@ static void dccp_write_err(struct sock *sk)
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
- DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
+ __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT);
}
/* A write timeout has occurred. Process the after effects. */
@@ -100,7 +100,7 @@ static void dccp_retransmit_timer(struct sock *sk)
* total number of retransmissions of clones of original packets.
*/
if (icsk->icsk_retransmits == 0)
- DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
+ __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS);
if (dccp_retransmit_skb(sk) != 0) {
/*
@@ -179,7 +179,7 @@ static void dccp_delack_timer(unsigned long data)
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer,
jiffies + TCP_DELACK_MIN);
goto out;
@@ -209,7 +209,7 @@ static void dccp_delack_timer(unsigned long data)
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d61ceed912be..eff5dfc2e33f 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -182,7 +182,7 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
/* basic switch operations **************************************************/
static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master)
{
- struct dsa_chip_data *cd = ds->pd;
+ struct dsa_chip_data *cd = ds->cd;
struct device_node *port_dn;
struct phy_device *phydev;
int ret, port, mode;
@@ -219,7 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
{
struct dsa_switch_driver *drv = ds->drv;
struct dsa_switch_tree *dst = ds->dst;
- struct dsa_chip_data *pd = ds->pd;
+ struct dsa_chip_data *cd = ds->cd;
bool valid_name_found = false;
int index = ds->index;
int i, ret;
@@ -230,7 +230,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
for (i = 0; i < DSA_MAX_PORTS; i++) {
char *name;
- name = pd->port_names[i];
+ name = cd->port_names[i];
if (name == NULL)
continue;
@@ -328,10 +328,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
if (!(ds->enabled_port_mask & (1 << i)))
continue;
- ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
+ ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
if (ret < 0) {
netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
- index, i, pd->port_names[i], ret);
+ index, i, cd->port_names[i], ret);
ret = 0;
}
}
@@ -379,7 +379,7 @@ static struct dsa_switch *
dsa_switch_setup(struct dsa_switch_tree *dst, int index,
struct device *parent, struct device *host_dev)
{
- struct dsa_chip_data *pd = dst->pd->chip + index;
+ struct dsa_chip_data *cd = dst->pd->chip + index;
struct dsa_switch_driver *drv;
struct dsa_switch *ds;
int ret;
@@ -389,7 +389,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
/*
* Probe for switch model.
*/
- drv = dsa_switch_probe(parent, host_dev, pd->sw_addr, &name, &priv);
+ drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
if (drv == NULL) {
netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
index);
@@ -408,10 +408,10 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
ds->dst = dst;
ds->index = index;
- ds->pd = pd;
+ ds->cd = cd;
ds->drv = drv;
ds->priv = priv;
- ds->master_dev = host_dev;
+ ds->dev = parent;
ret = dsa_switch_setup_one(ds, parent);
if (ret)
@@ -424,7 +424,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
{
struct device_node *port_dn;
struct phy_device *phydev;
- struct dsa_chip_data *cd = ds->pd;
+ struct dsa_chip_data *cd = ds->cd;
int port;
#ifdef CONFIG_NET_DSA_HWMON
@@ -659,9 +659,6 @@ static int dsa_of_probe(struct device *dev)
const char *port_name;
int chip_index, port_index;
const unsigned int *sw_addr, *port_reg;
- int gpio;
- enum of_gpio_flags of_flags;
- unsigned long flags;
u32 eeprom_len;
int ret;
@@ -740,19 +737,6 @@ static int dsa_of_probe(struct device *dev)
put_device(cd->host_dev);
cd->host_dev = &mdio_bus_switch->dev;
}
- gpio = of_get_named_gpio_flags(child, "reset-gpios", 0,
- &of_flags);
- if (gpio_is_valid(gpio)) {
- flags = (of_flags == OF_GPIO_ACTIVE_LOW ?
- GPIOF_ACTIVE_LOW : 0);
- ret = devm_gpio_request_one(dev, gpio, flags,
- "switch_reset");
- if (ret)
- goto out_free_chip;
-
- cd->reset = gpio_to_desc(gpio);
- gpiod_direction_output(cd->reset, 0);
- }
for_each_available_child_of_node(child, port) {
port_reg = of_get_property(port, "reg", NULL);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3b6750f5e68b..152436cdab30 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -50,8 +50,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->read = dsa_slave_phy_read;
ds->slave_mii_bus->write = dsa_slave_phy_write;
snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
- ds->index, ds->pd->sw_addr);
- ds->slave_mii_bus->parent = ds->master_dev;
+ ds->index, ds->cd->sw_addr);
+ ds->slave_mii_bus->parent = ds->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
@@ -615,8 +615,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->pd->eeprom_len)
- return ds->pd->eeprom_len;
+ if (ds->cd->eeprom_len)
+ return ds->cd->eeprom_len;
if (ds->drv->get_eeprom_len)
return ds->drv->get_eeprom_len(ds);
@@ -666,6 +666,78 @@ static void dsa_slave_get_strings(struct net_device *dev,
}
}
+static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds = dst->ds[0];
+ s8 cpu_port = dst->cpu_port;
+ int count = 0;
+
+ if (dst->master_ethtool_ops.get_sset_count) {
+ count = dst->master_ethtool_ops.get_sset_count(dev,
+ ETH_SS_STATS);
+ dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
+ }
+
+ if (ds->drv->get_ethtool_stats)
+ ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
+}
+
+static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds = dst->ds[0];
+ int count = 0;
+
+ if (dst->master_ethtool_ops.get_sset_count)
+ count += dst->master_ethtool_ops.get_sset_count(dev, sset);
+
+ if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
+ count += ds->drv->get_sset_count(ds);
+
+ return count;
+}
+
+static void dsa_cpu_port_get_strings(struct net_device *dev,
+ uint32_t stringset, uint8_t *data)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds = dst->ds[0];
+ s8 cpu_port = dst->cpu_port;
+ int len = ETH_GSTRING_LEN;
+ int mcount = 0, count;
+ unsigned int i;
+ uint8_t pfx[4];
+ uint8_t *ndata;
+
+ snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
+ /* We do not want to be NULL-terminated, since this is a prefix */
+ pfx[sizeof(pfx) - 1] = '_';
+
+ if (dst->master_ethtool_ops.get_sset_count) {
+ mcount = dst->master_ethtool_ops.get_sset_count(dev,
+ ETH_SS_STATS);
+ dst->master_ethtool_ops.get_strings(dev, stringset, data);
+ }
+
+ if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
+ ndata = data + mcount * len;
+ /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
+ * the output after to prepend our CPU port prefix we
+ * constructed earlier
+ */
+ ds->drv->get_strings(ds, cpu_port, ndata);
+ count = ds->drv->get_sset_count(ds);
+ for (i = 0; i < count; i++) {
+ memmove(ndata + (i * len + sizeof(pfx)),
+ ndata + i * len, len - sizeof(pfx));
+ memcpy(ndata + i * len, pfx, sizeof(pfx));
+ }
+ }
+}
+
static void dsa_slave_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
@@ -821,6 +893,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee = dsa_slave_get_eee,
};
+static struct ethtool_ops dsa_cpu_port_ethtool_ops;
+
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
@@ -925,7 +999,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
struct net_device *slave_dev)
{
struct dsa_switch *ds = p->parent;
- struct dsa_chip_data *cd = ds->pd;
+ struct dsa_chip_data *cd = ds->cd;
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
@@ -1038,6 +1112,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
int port, char *name)
{
struct net_device *master = ds->dst->master_netdev;
+ struct dsa_switch_tree *dst = ds->dst;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
int ret;
@@ -1049,6 +1124,19 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
+ if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) {
+ memcpy(&dst->master_ethtool_ops, master->ethtool_ops,
+ sizeof(struct ethtool_ops));
+ memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops,
+ sizeof(struct ethtool_ops));
+ dsa_cpu_port_ethtool_ops.get_sset_count =
+ dsa_cpu_port_get_sset_count;
+ dsa_cpu_port_ethtool_ops.get_ethtool_stats =
+ dsa_cpu_port_get_ethtool_stats;
+ dsa_cpu_port_ethtool_ops.get_strings =
+ dsa_cpu_port_get_strings;
+ master->ethtool_ops = &dsa_cpu_port_ethtool_ops;
+ }
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
@@ -1059,7 +1147,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
NULL);
SET_NETDEV_DEV(slave_dev, parent);
- slave_dev->dev.of_node = ds->pd->port_dn[port];
+ slave_dev->dev.of_node = ds->cd->port_dn[port];
slave_dev->vlan_features = master->vlan_features;
p = netdev_priv(slave_dev);
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index b4e17a7c0df0..5ac778962e4e 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -41,24 +41,12 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
return (((__force u64)a->extended_addr) >> 32) ^
(((__force u64)a->extended_addr) & 0xffffffff);
case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr);
+ return (__force u32)(a->short_addr + (a->pan_id << 16));
default:
return 0;
}
}
-/* private device info */
-struct lowpan_dev_info {
- struct net_device *wdev; /* wpan device ptr */
- u16 fragment_tag;
-};
-
-static inline struct
-lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
-{
- return (struct lowpan_dev_info *)lowpan_priv(dev)->priv;
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 0023c9048812..dd085db8580e 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -148,7 +148,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
return -EBUSY;
}
- lowpan_dev_info(ldev)->wdev = wdev;
+ lowpan_802154_dev(ldev)->wdev = wdev;
/* Set the lowpan hardware address to the wpan hardware address. */
memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
/* We need headroom for possible wpan_dev_hard_header call and tailroom
@@ -173,7 +173,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
{
- struct net_device *wdev = lowpan_dev_info(ldev)->wdev;
+ struct net_device *wdev = lowpan_802154_dev(ldev)->wdev;
ASSERT_RTNL();
@@ -184,7 +184,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
.kind = "lowpan",
- .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)),
+ .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)),
.setup = lowpan_setup,
.newlink = lowpan_newlink,
.dellink = lowpan_dellink,
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index d4353faced35..e459afd16bb3 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -84,7 +84,7 @@ static struct sk_buff*
lowpan_alloc_frag(struct sk_buff *skb, int size,
const struct ieee802154_hdr *master_hdr, bool frag1)
{
- struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev;
+ struct net_device *wdev = lowpan_802154_dev(skb->dev)->wdev;
struct sk_buff *frag;
int rc;
@@ -148,8 +148,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev,
int frag_cap, frag_len, payload_cap, rc;
int skb_unprocessed, skb_offset;
- frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag);
- lowpan_dev_info(ldev)->fragment_tag++;
+ frag_tag = htons(lowpan_802154_dev(ldev)->fragment_tag);
+ lowpan_802154_dev(ldev)->fragment_tag++;
frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
frag_hdr[1] = dgram_size & 0xff;
@@ -208,7 +208,7 @@ err:
static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
u16 *dgram_size, u16 *dgram_offset)
{
- struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr;
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
@@ -248,8 +248,8 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
cb->ackreq = wpan_dev->ackreq;
}
- return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa,
- 0);
+ return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
+ &sa, 0);
}
netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
@@ -283,7 +283,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
max_single = ieee802154_max_payload(&wpan_hdr);
if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
- skb->dev = lowpan_dev_info(ldev)->wdev;
+ skb->dev = lowpan_802154_dev(ldev)->wdev;
ldev->stats.tx_packets++;
ldev->stats.tx_bytes += dgram_size;
return dev_queue_xmit(skb);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 8035c93dd527..ca207dbf673b 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1078,6 +1078,11 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
if (netif_running(dev))
return -EBUSY;
+ if (wpan_dev->lowpan_dev) {
+ if (netif_running(wpan_dev->lowpan_dev))
+ return -EBUSY;
+ }
+
/* don't change address fields on monitor */
if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
!info->attrs[NL802154_ATTR_PAN_ID])
@@ -1109,6 +1114,11 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
if (netif_running(dev))
return -EBUSY;
+ if (wpan_dev->lowpan_dev) {
+ if (netif_running(wpan_dev->lowpan_dev))
+ return -EBUSY;
+ }
+
/* don't change address fields on monitor */
if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
!info->attrs[NL802154_ATTR_SHORT_ADDR])
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c34c7544d1db..89a8cac4726a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -436,7 +436,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
- NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
+ __NET_INC_STATS(net, LINUX_MIB_ARPFILTER);
flag = 1;
}
ip_rt_put(rt);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8a9246deccfe..ef2ebeb89d0f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -110,6 +110,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
return tb;
}
+EXPORT_SYMBOL_GPL(fib_new_table);
/* caller must hold either rtnl or rcu read lock */
struct fib_table *fib_get_table(struct net *net, u32 id)
@@ -904,7 +905,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (!prim) {
- pr_warn("%s: bug: prim == NULL\n", __func__);
+ /* if the device has been deleted, we don't perform
+ * address promotion
+ */
+ if (!in_dev->dead)
+ pr_warn("%s: bug: prim == NULL\n", __func__);
return;
}
if (iprim && iprim != prim) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 7ac5ec87b004..eeec7d60e5fd 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -227,8 +227,6 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int err = -ENOSYS;
const struct net_offload **offloads;
- udp_tunnel_gro_complete(skb, nhoff);
-
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
@@ -237,6 +235,8 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
err = ops->callbacks.gro_complete(skb, nhoff);
+ skb_set_inner_mac_header(skb, nhoff);
+
out_unlock:
rcu_read_unlock();
@@ -412,6 +412,8 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+ skb_set_inner_mac_header(skb, nhoff + guehlen);
+
out_unlock:
rcu_read_unlock();
return err;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index d9c552a721fc..d78e2eefc0f7 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -60,6 +60,67 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
+/* Fills in tpi and returns header length to be pulled. */
+int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err)
+{
+ const struct gre_base_hdr *greh;
+ __be32 *options;
+ int hdr_len;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ hdr_len = gre_calc_hlen(tpi->flags);
+
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ tpi->proto = greh->protocol;
+
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (skb_checksum_simple_validate(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else {
+ tpi->key = 0;
+ }
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else {
+ tpi->seq = 0;
+ }
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40)
+ hdr_len += 4;
+ }
+ return hdr_len;
+}
+EXPORT_SYMBOL(gre_parse_header);
+
static int gre_rcv(struct sk_buff *skb)
{
const struct gre_protocol *proto;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6333489771ed..38abe70e595f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
ipc, rt, MSG_DONTWAIT) < 0) {
- ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
+ __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
ip_flush_pending_frames(sk);
} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
struct icmphdr *icmph = icmp_hdr(skb);
@@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
* avoid additional coding at protocol handlers.
*/
if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
- ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return;
}
@@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb)
out:
return true;
out_err:
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return false;
}
@@ -877,7 +877,7 @@ out_err:
static bool icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
- ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return false;
}
@@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
return true;
out_err:
- ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
return false;
}
@@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
if (skb_checksum_simple_validate(skb))
goto csum_error;
@@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
icmph = icmp_hdr(skb);
- ICMPMSGIN_INC_STATS_BH(net, icmph->type);
+ ICMPMSGIN_INC_STATS(net, icmph->type);
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
@@ -1052,9 +1052,9 @@ drop:
kfree_skb(skb);
return 0;
csum_error:
- ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
goto drop;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ab69da2d2a77..fa8c39804bdb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
route_err:
ip_rt_put(rt);
no_route:
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
EXPORT_SYMBOL_GPL(inet_csk_route_req);
@@ -466,7 +466,7 @@ route_err:
ip_rt_put(rt);
no_route:
rcu_read_unlock();
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
@@ -706,7 +706,9 @@ void inet_csk_destroy_sock(struct sock *sk)
sk_refcnt_debug_release(sk);
+ local_bh_disable();
percpu_counter_dec(sk->sk_prot->orphan_count);
+ local_bh_enable();
sock_put(sk);
}
EXPORT_SYMBOL(inet_csk_destroy_sock);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ad7956fa659a..25af1243649b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -220,8 +220,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
- attr = nla_reserve(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
+ attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
+ handler->idiag_info_size,
+ INET_DIAG_PAD);
if (!attr)
goto errout;
@@ -1078,7 +1079,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
attr = handler->idiag_info_size
- ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
+ ? nla_reserve_64bit(skb, INET_DIAG_INFO,
+ handler->idiag_info_size,
+ INET_DIAG_PAD)
: NULL;
if (attr)
info = nla_data(attr);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index b76b0d7e59c1..77c20a489218 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -360,7 +360,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
sk_nulls_del_node_init_rcu((struct sock *)tw);
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -438,6 +438,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
const struct sock *sk2,
bool match_wildcard))
{
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -446,6 +447,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
sk2->sk_family == sk->sk_family &&
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
saddr_same(sk, sk2, false))
return reuseport_add_sock(sk, sk2);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c67f9bd7699c..206581674806 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -94,7 +94,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
}
/*
- * Enter the time wait state. This is called with locally disabled BH.
+ * Enter the time wait state.
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
*/
@@ -112,7 +112,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
- spin_lock(&bhead->lock);
+ spin_lock_bh(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -138,7 +138,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- spin_unlock(lock);
+ spin_unlock_bh(lock);
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -147,9 +147,9 @@ static void tw_timer_handler(unsigned long data)
struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
if (tw->tw_kill)
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+ __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
else
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+ __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
inet_twsk_kill(tw);
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index af18f1e4889e..cbfb1808fcc4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -65,8 +65,8 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
{
struct ip_options *opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -157,7 +157,7 @@ sr_failed:
too_many_hops:
/* Tell the sender its packet died... */
- IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index efbd47d1a531..bbe7f72db9c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg)
goto out;
ipq_kill(qp);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
if (!inet_frag_evicting(&qp->q)) {
struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph;
int err;
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
@@ -291,7 +291,7 @@ static int ip_frag_too_far(struct ipq *qp)
struct net *net;
net = container_of(qp->q.net, struct net, ipv4.frags);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
}
return rc;
@@ -635,7 +635,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
ip_send_check(iph);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
qp->q.fragments_tail = NULL;
return 0;
@@ -647,7 +647,7 @@ out_nomem:
out_oversize:
net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
out_fail:
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
}
@@ -658,7 +658,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
int vif = l3mdev_master_ifindex_rcu(dev);
struct ipq *qp;
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
skb_orphan(skb);
/* Lookup (or create) queue header */
@@ -675,7 +675,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
return ret;
}
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -ENOMEM;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index eedd829a2f87..2b267e71ebf5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -122,125 +122,6 @@ static int ipgre_tunnel_init(struct net_device *dev);
static int ipgre_net_id __read_mostly;
static int gre_tap_net_id __read_mostly;
-static int ip_gre_calc_hlen(__be16 o_flags)
-{
- int addend = 4;
-
- if (o_flags & TUNNEL_CSUM)
- addend += 4;
- if (o_flags & TUNNEL_KEY)
- addend += 4;
- if (o_flags & TUNNEL_SEQ)
- addend += 4;
- return addend;
-}
-
-static __be16 gre_flags_to_tnl_flags(__be16 flags)
-{
- __be16 tflags = 0;
-
- if (flags & GRE_CSUM)
- tflags |= TUNNEL_CSUM;
- if (flags & GRE_ROUTING)
- tflags |= TUNNEL_ROUTING;
- if (flags & GRE_KEY)
- tflags |= TUNNEL_KEY;
- if (flags & GRE_SEQ)
- tflags |= TUNNEL_SEQ;
- if (flags & GRE_STRICT)
- tflags |= TUNNEL_STRICT;
- if (flags & GRE_REC)
- tflags |= TUNNEL_REC;
- if (flags & GRE_VERSION)
- tflags |= TUNNEL_VERSION;
-
- return tflags;
-}
-
-static __be16 tnl_flags_to_gre_flags(__be16 tflags)
-{
- __be16 flags = 0;
-
- if (tflags & TUNNEL_CSUM)
- flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
- flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
- flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
- flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
- flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
- flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
- flags |= GRE_VERSION;
-
- return flags;
-}
-
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err)
-{
- const struct gre_base_hdr *greh;
- __be32 *options;
- int hdr_len;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
-
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
- hdr_len = ip_gre_calc_hlen(tpi->flags);
-
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- tpi->proto = greh->protocol;
-
- options = (__be32 *)(greh + 1);
- if (greh->flags & GRE_CSUM) {
- if (skb_checksum_simple_validate(skb)) {
- *csum_err = true;
- return -EINVAL;
- }
-
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
- null_compute_pseudo);
- options++;
- }
-
- if (greh->flags & GRE_KEY) {
- tpi->key = *options;
- options++;
- } else {
- tpi->key = 0;
- }
- if (unlikely(greh->flags & GRE_SEQ)) {
- tpi->seq = *options;
- options++;
- } else {
- tpi->seq = 0;
- }
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
- tpi->proto = htons(ETH_P_IP);
- if ((*(u8 *)options & 0xF0) != 0x40) {
- hdr_len += 4;
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
- }
- }
- return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
-}
-
static void ipgre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
{
@@ -341,7 +222,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
struct tnl_ptk_info tpi;
bool csum_err = false;
- if (parse_gre_header(skb, &tpi, &csum_err)) {
+ if (gre_parse_header(skb, &tpi, &csum_err) < 0) {
if (!csum_err) /* ignore csum errors. */
return;
}
@@ -379,24 +260,22 @@ static __be32 tunnel_id_to_key(__be64 x)
#endif
}
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
{
- struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
- struct ip_tunnel_net *itn;
const struct iphdr *iph;
struct ip_tunnel *tunnel;
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else
- itn = net_generic(net, ipgre_net_id);
-
iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
+ raw_proto, false) < 0)
+ goto drop;
+
skb_pop_mac_header(skb);
if (tunnel->collect_md) {
__be16 flags;
@@ -412,13 +291,41 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
- return PACKET_REJECT;
+ return PACKET_NEXT;
+
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
+ int res;
+
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
+ if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
+ /* ipgre tunnels in collect metadata mode should receive
+ * also ETH_P_TEB traffic.
+ */
+ itn = net_generic(net, ipgre_net_id);
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
+ }
+ return res;
}
static int gre_rcv(struct sk_buff *skb)
{
struct tnl_ptk_info tpi;
bool csum_err = false;
+ int hdr_len;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
@@ -428,10 +335,11 @@ static int gre_rcv(struct sk_buff *skb)
}
#endif
- if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+ if (hdr_len < 0)
goto drop;
- if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -440,49 +348,6 @@ drop:
return 0;
}
-static __sum16 gre_checksum(struct sk_buff *skb)
-{
- __wsum csum;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- csum = lco_csum(skb);
- else
- csum = skb_checksum(skb, 0, skb->len, 0);
- return csum_fold(csum);
-}
-
-static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
- __be16 proto, __be32 key, __be32 seq)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(flags);
- greh->protocol = proto;
-
- if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (flags & TUNNEL_SEQ) {
- *ptr = seq;
- ptr--;
- }
- if (flags & TUNNEL_KEY) {
- *ptr = key;
- ptr--;
- }
- if (flags & TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
- *ptr = 0;
- *(__sum16 *)ptr = gre_checksum(skb);
- }
- }
-}
-
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params,
__be16 proto)
@@ -493,8 +358,9 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->o_seqno++;
/* Push GRE header. */
- build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ gre_build_header(skb, tunnel->tun_hlen,
+ tunnel->parms.o_flags, proto, tunnel->parms.o_key,
+ htonl(tunnel->o_seqno));
skb_set_inner_protocol(skb, proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
@@ -522,7 +388,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
@@ -552,7 +419,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
fl.saddr);
}
- tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
+ tunnel_hlen = gre_calc_hlen(key->tun_flags);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr);
@@ -571,8 +438,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_rt;
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
- build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
- tunnel_id_to_key(tun_info->key.tun_id), 0);
+ gre_build_header(skb, tunnel_hlen, flags, proto,
+ tunnel_id_to_key(tun_info->key.tun_id), 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -612,7 +479,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
const struct iphdr *tnl_params;
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
}
@@ -654,7 +521,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
}
@@ -694,8 +561,8 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
if (err)
return err;
- p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
- p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
+ p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
+ p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
return -EFAULT;
@@ -739,7 +606,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
greh = (struct gre_base_hdr *)(iph+1);
- greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
+ greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
greh->protocol = htons(type);
memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
@@ -840,7 +707,7 @@ static void __gre_tunnel_init(struct net_device *dev)
int t_hlen;
tunnel = netdev_priv(dev);
- tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
@@ -885,7 +752,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
netif_keep_dst(dev);
dev->addr_len = 4;
- if (iph->daddr) {
+ if (iph->daddr && !tunnel->collect_md) {
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
if (!iph->saddr)
@@ -894,8 +761,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
dev->header_ops = &ipgre_header_ops;
}
#endif
- } else
+ } else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ }
return ip_tunnel_init(dev);
}
@@ -938,6 +806,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
if (flags & (GRE_VERSION|GRE_ROUTING))
return -EINVAL;
+ if (data[IFLA_GRE_COLLECT_METADATA] &&
+ data[IFLA_GRE_ENCAP_TYPE] &&
+ nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
+ return -EINVAL;
+
return 0;
}
@@ -1155,8 +1028,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+ gre_tnl_flags_to_gre_flags(p->o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e3d782746d9d..4b351af3e67b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -218,17 +218,17 @@ static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_b
protocol = -ret;
goto resubmit;
}
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
} else {
if (!raw) {
if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
kfree_skb(skb);
} else {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
}
}
@@ -273,7 +273,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
--ANK (980813)
*/
if (skb_cow(skb, skb_headroom(skb))) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+ __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -282,7 +282,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
if (ip_options_compile(dev_net(dev), opt, skb)) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+ __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
goto drop;
}
@@ -313,6 +313,13 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip_rcv(skb);
+ if (!skb)
+ return NET_RX_SUCCESS;
+
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
@@ -337,7 +344,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
iph->tos, skb->dev);
if (unlikely(err)) {
if (err == -EXDEV)
- NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER);
+ __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
goto drop;
}
}
@@ -358,9 +365,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
- IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
+ __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST) {
- IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
+ __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
} else if (skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) {
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
@@ -409,11 +416,11 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
net = dev_net(dev);
- IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
+ __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto out;
}
@@ -439,9 +446,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
- IP_ADD_STATS_BH(net,
- IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
- max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+ __IP_ADD_STATS(net,
+ IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
@@ -453,7 +460,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
@@ -463,7 +470,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -471,6 +478,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Remove any debris in the socket control block */
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ IPCB(skb)->iif = skb->skb_iif;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
@@ -480,9 +488,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
ip_rcv_finish);
csum_error:
- IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS);
+ __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
- IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index bdb222c0c6a2..5805762d7fc7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1193,7 +1193,12 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
ipv6_sk_rxinfo(sk);
if (prepare && skb_rtable(skb)) {
- pktinfo->ipi_ifindex = inet_iif(skb);
+ /* skb->cb is overloaded: prior to this point it is IP{6}CB
+ * which has interface index (iif) as the first member of the
+ * underlying inet{6}_skb_parm struct. This code then overlays
+ * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
+ * element so the iif is picked up from the prior IPCB
+ */
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
} else {
pktinfo->ipi_ifindex = 0;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6aad0192443d..a69ed94bda1b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
- fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
dev->flags |= IFF_POINTOPOINT;
+
+ dst_cache_reset(&tunnel->dst_cache);
}
if (!tdev && tunnel->parms.link)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 786fa7ca28e0..9118b0e640ba 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -157,7 +157,7 @@ int iptunnel_handle_offloads(struct sk_buff *skb,
}
if (skb_is_gso(skb)) {
- err = skb_unclone(skb, GFP_ATOMIC);
+ err = skb_header_unclone(skb, GFP_ATOMIC);
if (unlikely(err))
return err;
skb_shinfo(skb)->gso_type |= gso_type_mask;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5cf10b777b7e..a917903d5e97 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
int err;
+ int mtu;
if (!dst) {
dev->stats.tx_carrier_errors++;
@@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_count = 0;
}
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ } else {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
+
+ dst_release(dst);
+ goto tx_error;
+ }
+
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 60f5161abcb4..2033f929aa66 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -34,27 +34,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
MODULE_DESCRIPTION("arptables core");
-/*#define DEBUG_ARP_TABLES*/
-/*#define DEBUG_ARP_TABLES_USER*/
-
-#ifdef DEBUG_ARP_TABLES
-#define dprintf(format, args...) pr_debug(format, ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_ARP_TABLES_USER
-#define duprintf(format, args...) pr_debug(format, ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-#ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x) WARN_ON(!(x))
-#else
-#define ARP_NF_ASSERT(x)
-#endif
-
void *arpt_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(arpt, ARPT);
@@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
- ARPT_INV_ARPOP)) {
- dprintf("ARP operation field mismatch.\n");
- dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
- arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+ ARPT_INV_ARPOP))
return 0;
- }
if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
- ARPT_INV_ARPHRD)) {
- dprintf("ARP hardware address format mismatch.\n");
- dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
- arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+ ARPT_INV_ARPHRD))
return 0;
- }
if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
- ARPT_INV_ARPPRO)) {
- dprintf("ARP protocol address format mismatch.\n");
- dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
- arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+ ARPT_INV_ARPPRO))
return 0;
- }
if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
- ARPT_INV_ARPHLN)) {
- dprintf("ARP hardware address length mismatch.\n");
- dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
- arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+ ARPT_INV_ARPHLN))
return 0;
- }
src_devaddr = arpptr;
arpptr += dev->addr_len;
@@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
ARPT_INV_SRCDEVADDR) ||
FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
- ARPT_INV_TGTDEVADDR)) {
- dprintf("Source or target device address mismatch.\n");
-
+ ARPT_INV_TGTDEVADDR))
return 0;
- }
if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
ARPT_INV_SRCIP) ||
FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
- ARPT_INV_TGTIP)) {
- dprintf("Source or target IP address mismatch.\n");
-
- dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
- &src_ipaddr,
- &arpinfo->smsk.s_addr,
- &arpinfo->src.s_addr,
- arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
- dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
- &tgt_ipaddr,
- &arpinfo->tmsk.s_addr,
- &arpinfo->tgt.s_addr,
- arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+ ARPT_INV_TGTIP))
return 0;
- }
/* Look for ifname matches. */
ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
- dprintf("VIA in mismatch (%s vs %s).%s\n",
- indev, arpinfo->iniface,
- arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
+ if (FWINV(ret != 0, ARPT_INV_VIA_IN))
return 0;
- }
ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
- dprintf("VIA out mismatch (%s vs %s).%s\n",
- outdev, arpinfo->outiface,
- arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
+ if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
return 0;
- }
return 1;
#undef FWINV
@@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
static inline int arp_checkentry(const struct arpt_arp *arp)
{
- if (arp->flags & ~ARPT_F_MASK) {
- duprintf("Unknown flag bits set: %08X\n",
- arp->flags & ~ARPT_F_MASK);
+ if (arp->flags & ~ARPT_F_MASK)
return 0;
- }
- if (arp->invflags & ~ARPT_INV_MASK) {
- duprintf("Unknown invflag bits set: %08X\n",
- arp->invflags & ~ARPT_INV_MASK);
+ if (arp->invflags & ~ARPT_INV_MASK)
return 0;
- }
return 1;
}
@@ -406,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
= (void *)arpt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
- if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
- pr_notice("arptables: loop hook %u pos %u %08X.\n",
- hook, pos, e->comefrom);
+ if (e->comefrom & (1 << NF_ARP_NUMHOOKS))
return 0;
- }
+
e->comefrom
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
@@ -423,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if ((strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("mark_source_chains: bad "
- "negative verdict (%i)\n",
- t->verdict);
+ t->verdict < -NF_MAX_VERDICT - 1)
return 0;
- }
/* Return: backtrack through the last
* big jump.
@@ -462,8 +389,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
e = (struct arpt_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
@@ -480,8 +405,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
-next:
- duprintf("Finished chain %u\n", hook);
+next: ;
}
return 1;
}
@@ -489,7 +413,6 @@ next:
static inline int check_target(struct arpt_entry *e, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
- int ret;
struct xt_tgchk_param par = {
.table = name,
.entryinfo = e,
@@ -499,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
.family = NFPROTO_ARP,
};
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
- if (ret < 0) {
- duprintf("arp_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- return ret;
- }
- return 0;
+ return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
}
static inline int
@@ -513,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
{
struct xt_entry_target *t;
struct xt_target *target;
+ unsigned long pcnt;
int ret;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
ret = PTR_ERR(target);
goto out;
}
@@ -569,17 +487,12 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
(unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p\n", e);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset
- < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target))
return -EINVAL;
- }
if (!arp_checkentry(&e->arp))
return -EINVAL;
@@ -596,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
- if (!check_underflow(e)) {
- pr_debug("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ if (!check_underflow(e))
return -EINVAL;
- }
+
newinfo->underflow[h] = underflows[h];
}
}
@@ -646,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
- duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
@@ -663,31 +572,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
return ret;
- if (i != repl->num_entries) {
- duprintf("translate_table: %u not %u entries\n",
- i, repl->num_entries);
+ if (i != repl->num_entries)
return -EINVAL;
- }
/* Check hooks all assigned */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
/* Only hooks which are valid */
if (!(repl->valid_hooks & (1 << i)))
continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, repl->hook_entry[i]);
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
- }
- if (newinfo->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, repl->underflow[i]);
+ if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
- }
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -895,11 +794,8 @@ static int get_info(struct net *net, void __user *user,
struct xt_table *t;
int ret;
- if (*len != sizeof(struct arpt_getinfo)) {
- duprintf("length %u != %Zu\n", *len,
- sizeof(struct arpt_getinfo));
+ if (*len != sizeof(struct arpt_getinfo))
return -EINVAL;
- }
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
@@ -955,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
struct arpt_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct arpt_get_entries) + get.size) {
- duprintf("get_entries: %u != %Zu\n", *len,
- sizeof(struct arpt_get_entries) + get.size);
+ if (*len != sizeof(struct arpt_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n",
- private->number);
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
- else {
- duprintf("get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else
ret = -EAGAIN;
- }
+
module_put(t->me);
xt_table_unlock(t);
} else
@@ -1019,8 +907,6 @@ static int __do_replace(struct net *net, const char *name,
/* You lied! */
if (valid_hooks != t->valid_hooks) {
- duprintf("Valid hook crap: %08X vs %08X\n",
- valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
@@ -1030,8 +916,6 @@ static int __do_replace(struct net *net, const char *name,
goto put_module;
/* Update module usage count based on number of rules */
- duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
- oldinfo->number, oldinfo->initial_entries, newinfo->number);
if ((oldinfo->number > oldinfo->initial_entries) ||
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
@@ -1101,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user,
if (ret != 0)
goto free_newinfo;
- duprintf("arp_tables: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
@@ -1200,20 +1082,14 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
unsigned int entry_offset;
int ret, off;
- duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
(unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p, limit = %p\n", e, limit);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset < sizeof(struct compat_arpt_entry) +
- sizeof(struct compat_xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ sizeof(struct compat_xt_entry_target))
return -EINVAL;
- }
if (!arp_checkentry(&e->arp))
return -EINVAL;
@@ -1230,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
- t->u.user.name);
ret = PTR_ERR(target);
goto out;
}
@@ -1301,7 +1175,6 @@ static int translate_compat_table(struct xt_table_info **pinfo,
size = compatr->size;
info->number = compatr->num_entries;
- duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
@@ -1316,11 +1189,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
}
ret = -EINVAL;
- if (j != compatr->num_entries) {
- duprintf("translate_compat_table: %u not %u entries\n",
- j, compatr->num_entries);
+ if (j != compatr->num_entries)
goto out_unlock;
- }
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
@@ -1411,8 +1281,6 @@ static int compat_do_replace(struct net *net, void __user *user,
if (ret != 0)
goto free_newinfo;
- duprintf("compat_do_replace: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
@@ -1445,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
break;
default:
- duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1528,17 +1395,13 @@ static int compat_get_entries(struct net *net,
struct compat_arpt_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
- duprintf("compat_get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct compat_arpt_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
xt_compat_lock(NFPROTO_ARP);
@@ -1547,16 +1410,13 @@ static int compat_get_entries(struct net *net,
const struct xt_table_info *private = t->private;
struct xt_table_info info;
- duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
if (!ret && get.size == info.size) {
ret = compat_copy_entries_to_user(private->size,
t, uptr->entrytable);
- } else if (!ret) {
- duprintf("compat_get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ } else if (!ret)
ret = -EAGAIN;
- }
+
xt_compat_flush_offsets(NFPROTO_ARP);
module_put(t->me);
xt_table_unlock(t);
@@ -1608,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
break;
default:
- duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1651,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
}
default:
- duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1696,7 +1554,6 @@ int arpt_register_table(struct net *net,
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(newinfo, loc_cpu_entry, repl);
- duprintf("arpt_register_table: translate table gives %d\n", ret);
if (ret != 0)
goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 735d1ee8c1ab..54906e0e8e0c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,34 +35,12 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv4 packet filter");
-/*#define DEBUG_IP_FIREWALL*/
-/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) pr_info(format , ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) pr_info(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
#ifdef CONFIG_NETFILTER_DEBUG
#define IP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define IP_NF_ASSERT(x)
#endif
-#if 0
-/* All the better to debug you with... */
-#define static
-#define inline
-#endif
-
void *ipt_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(ipt, IPT);
@@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip,
if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
IPT_INV_SRCIP) ||
FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
- IPT_INV_DSTIP)) {
- dprintf("Source or dest mismatch.\n");
-
- dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
- &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
- ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
- dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
- &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
- ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+ IPT_INV_DSTIP))
return false;
- }
ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
- dprintf("VIA in mismatch (%s vs %s).%s\n",
- indev, ipinfo->iniface,
- ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
+ if (FWINV(ret != 0, IPT_INV_VIA_IN))
return false;
- }
ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
- dprintf("VIA out mismatch (%s vs %s).%s\n",
- outdev, ipinfo->outiface,
- ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
+ if (FWINV(ret != 0, IPT_INV_VIA_OUT))
return false;
- }
/* Check specific protocol */
if (ipinfo->proto &&
- FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
- dprintf("Packet protocol %hi does not match %hi.%s\n",
- ip->protocol, ipinfo->proto,
- ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
+ FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
return false;
- }
/* If we have a fragment rule but the packet is not a fragment
* then we return zero */
- if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
- dprintf("Fragment rule but not fragment.%s\n",
- ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+ if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
return false;
- }
return true;
}
@@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip,
static bool
ip_checkentry(const struct ipt_ip *ip)
{
- if (ip->flags & ~IPT_F_MASK) {
- duprintf("Unknown flag bits set: %08X\n",
- ip->flags & ~IPT_F_MASK);
+ if (ip->flags & ~IPT_F_MASK)
return false;
- }
- if (ip->invflags & ~IPT_INV_MASK) {
- duprintf("Unknown invflag bits set: %08X\n",
- ip->invflags & ~IPT_INV_MASK);
+ if (ip->invflags & ~IPT_INV_MASK)
return false;
- }
return true;
}
@@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb,
e = get_entry(table_base, private->hook_entry[hook]);
- pr_debug("Entering %s(hook %u), UF %p\n",
- table->name, hook,
- get_entry(table_base, private->underflow[hook]));
-
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
@@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb,
if (stackidx == 0) {
e = get_entry(table_base,
private->underflow[hook]);
- pr_debug("Underflow (this is normal) "
- "to %p\n", e);
} else {
e = jumpstack[--stackidx];
- pr_debug("Pulled %p out from pos %u\n",
- e, stackidx);
e = ipt_next_entry(e);
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
- !(e->ip.flags & IPT_F_GOTO)) {
+ !(e->ip.flags & IPT_F_GOTO))
jumpstack[stackidx++] = e;
- pr_debug("Pushed %p into pos %u\n",
- e, stackidx - 1);
- }
e = get_entry(table_base, v);
continue;
@@ -429,18 +366,13 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
xt_write_recseq_end(addend);
local_bh_enable();
-#ifdef DEBUG_ALLOW_ALL
- return NF_ACCEPT;
-#else
if (acpar.hotdrop)
return NF_DROP;
else return verdict;
-#endif
}
static bool find_jump_target(const struct xt_table_info *t,
@@ -480,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
= (void *)ipt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
- if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
- pr_err("iptables: loop hook %u pos %u %08X.\n",
- hook, pos, e->comefrom);
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS))
return 0;
- }
+
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
@@ -496,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
if ((strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("mark_source_chains: bad "
- "negative verdict (%i)\n",
- t->verdict);
+ t->verdict < -NF_MAX_VERDICT - 1)
return 0;
- }
/* Return: backtrack through the last
big jump. */
do {
e->comefrom ^= (1<<NF_INET_NUMHOOKS);
-#ifdef DEBUG_IP_FIREWALL_USER
- if (e->comefrom
- & (1 << NF_INET_NUMHOOKS)) {
- duprintf("Back unset "
- "on hook %u "
- "rule %u\n",
- hook, pos);
- }
-#endif
oldpos = pos;
pos = e->counters.pcnt;
e->counters.pcnt = 0;
@@ -543,8 +460,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
e = (struct ipt_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
@@ -561,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
-next:
- duprintf("Finished chain %u\n", hook);
+next: ;
}
return 1;
}
@@ -584,18 +498,12 @@ static int
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
- int ret;
par->match = m->u.kernel.match;
par->matchinfo = m->data;
- ret = xt_check_match(par, m->u.match_size - sizeof(*m),
- ip->proto, ip->invflags & IPT_INV_PROTO);
- if (ret < 0) {
- duprintf("check failed for `%s'.\n", par->match->name);
- return ret;
- }
- return 0;
+ return xt_check_match(par, m->u.match_size - sizeof(*m),
+ ip->proto, ip->invflags & IPT_INV_PROTO);
}
static int
@@ -606,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
m->u.kernel.match = match;
ret = check_match(m, par);
@@ -634,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
.hook_mask = e->comefrom,
.family = NFPROTO_IPV4,
};
- int ret;
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
- e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
- if (ret < 0) {
- duprintf("check failed for `%s'.\n",
- t->u.kernel.target->name);
- return ret;
- }
- return 0;
+ return xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
}
static int
@@ -656,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -678,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
ret = PTR_ERR(target);
goto cleanup_matches;
}
@@ -732,17 +632,12 @@ check_entry_size_and_hooks(struct ipt_entry *e,
if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
(unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p\n", e);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset
- < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
return -EINVAL;
- }
if (!ip_checkentry(&e->ip))
return -EINVAL;
@@ -759,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
- if (!check_underflow(e)) {
- pr_debug("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ if (!check_underflow(e))
return -EINVAL;
- }
+
newinfo->underflow[h] = underflows[h];
}
}
@@ -816,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
- duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -833,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
++newinfo->stacksize;
}
- if (i != repl->num_entries) {
- duprintf("translate_table: %u not %u entries\n",
- i, repl->num_entries);
+ if (i != repl->num_entries)
return -EINVAL;
- }
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
/* Only hooks which are valid */
if (!(repl->valid_hooks & (1 << i)))
continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, repl->hook_entry[i]);
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
- }
- if (newinfo->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, repl->underflow[i]);
+ if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
- }
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1081,11 +963,8 @@ static int get_info(struct net *net, void __user *user,
struct xt_table *t;
int ret;
- if (*len != sizeof(struct ipt_getinfo)) {
- duprintf("length %u != %zu\n", *len,
- sizeof(struct ipt_getinfo));
+ if (*len != sizeof(struct ipt_getinfo))
return -EINVAL;
- }
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
@@ -1143,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
struct ipt_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct ipt_get_entries) + get.size) {
- duprintf("get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct ipt_get_entries) + get.size)
return -EINVAL;
- }
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET, get.name);
if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
- else {
- duprintf("get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else
ret = -EAGAIN;
- }
+
module_put(t->me);
xt_table_unlock(t);
} else
@@ -1203,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
/* You lied! */
if (valid_hooks != t->valid_hooks) {
- duprintf("Valid hook crap: %08X vs %08X\n",
- valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
@@ -1214,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto put_module;
/* Update module usage count based on number of rules */
- duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
- oldinfo->number, oldinfo->initial_entries, newinfo->number);
if ((oldinfo->number > oldinfo->initial_entries) ||
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
@@ -1284,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (ret != 0)
goto free_newinfo;
- duprintf("Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
@@ -1411,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m,
match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("compat_check_calc_match: `%s' not found\n",
- m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
+
m->u.kernel.match = match;
*size += xt_compat_match_offset(match);
return 0;
@@ -1447,20 +1310,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
unsigned int j;
int ret, off;
- duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
(unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p, limit = %p\n", e, limit);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset < sizeof(struct compat_ipt_entry) +
- sizeof(struct compat_xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ sizeof(struct compat_xt_entry_target))
return -EINVAL;
- }
if (!ip_checkentry(&e->ip))
return -EINVAL;
@@ -1484,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
- t->u.user.name);
ret = PTR_ERR(target);
goto release_matches;
}
@@ -1567,7 +1422,6 @@ translate_compat_table(struct net *net,
size = compatr->size;
info->number = compatr->num_entries;
- duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
xt_compat_init_offsets(AF_INET, compatr->num_entries);
@@ -1582,11 +1436,8 @@ translate_compat_table(struct net *net,
}
ret = -EINVAL;
- if (j != compatr->num_entries) {
- duprintf("translate_compat_table: %u not %u entries\n",
- j, compatr->num_entries);
+ if (j != compatr->num_entries)
goto out_unlock;
- }
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
@@ -1683,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (ret != 0)
goto free_newinfo;
- duprintf("compat_do_replace: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
@@ -1718,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
break;
default:
- duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1768,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
struct compat_ipt_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
- duprintf("compat_get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
xt_compat_lock(AF_INET);
@@ -1788,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
- duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
- if (!ret && get.size == info.size) {
+ if (!ret && get.size == info.size)
ret = compat_copy_entries_to_user(private->size,
t, uptr->entrytable);
- } else if (!ret) {
- duprintf("compat_get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else if (!ret)
ret = -EAGAIN;
- }
+
xt_compat_flush_offsets(AF_INET);
module_put(t->me);
xt_table_unlock(t);
@@ -1850,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
default:
- duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1902,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
default:
- duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -2004,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
- duprintf("Dropping evil ICMP tinygram.\n");
par->hotdrop = true;
return false;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e3c46e8e2762..ae1a71a97132 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net)
in->ctl_table[0].data = &nf_conntrack_max;
in->ctl_table[1].data = &net->ct.count;
- in->ctl_table[2].data = &net->ct.htable_size;
+ in->ctl_table[2].data = &nf_conntrack_htable_size;
in->ctl_table[3].data = &net->ct.sysctl_checksum;
in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f0dfe92a00d6..c6f3c406f707 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -31,15 +31,14 @@ struct ct_iter_state {
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
{
- struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_nulls_node *n;
for (st->bucket = 0;
- st->bucket < net->ct.htable_size;
+ st->bucket < nf_conntrack_htable_size;
st->bucket++) {
n = rcu_dereference(
- hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct hlist_nulls_node *head)
{
- struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
- if (++st->bucket >= net->ct.htable_size)
+ if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
}
head = rcu_dereference(
- hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
}
return head;
}
@@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+static bool ct_seq_should_skip(const struct nf_conn *ct,
+ const struct net *net,
+ const struct nf_conntrack_tuple_hash *hash)
+{
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ return true;
+
+ if (nf_ct_l3num(ct) != AF_INET)
+ return true;
+
+ if (!net_eq(nf_ct_net(ct), net))
+ return true;
+
+ return false;
+}
+
static int ct_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_tuple_hash *hash = v;
@@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v)
int ret = 0;
NF_CT_ASSERT(ct);
- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ if (ct_seq_should_skip(ct, seq_file_net(s), hash))
return 0;
+ if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ return 0;
- /* we only want to print DIR_ORIGINAL */
- if (NF_CT_DIRECTION(hash))
- goto release;
- if (nf_ct_l3num(ct) != AF_INET)
+ /* check if we raced w. object reuse */
+ if (!nf_ct_is_confirmed(ct) ||
+ ct_seq_should_skip(ct, seq_file_net(s), hash))
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
@@ -220,13 +236,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
n = rcu_dereference(
- hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
if (n)
return n;
}
@@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
- struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(hlist_next_rcu(head));
@@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
head = rcu_dereference(
- hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
}
return head;
}
@@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
+ if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
+ return 0;
+
if (exp->tuple.src.l3num != AF_INET)
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 60398a9370e7..a1f2830d8110 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -915,11 +915,11 @@ static int ip_error(struct sk_buff *skb)
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
case EHOSTUNREACH:
- IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
+ __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
break;
case ENETUNREACH:
- IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
+ __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
}
goto out;
@@ -934,7 +934,7 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
- IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
+ __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
code = ICMP_PKT_FILTERED;
@@ -2146,6 +2146,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
+ int master_idx;
int orig_oif;
int err = -ENETUNREACH;
@@ -2155,6 +2156,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
orig_oif = fl4->flowi4_oif;
+ master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
+ if (master_idx)
+ fl4->flowi4_oif = master_idx;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4c04f09338e3..e3c4043c27de 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -312,11 +312,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
if (mss == 0) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d73858991af..5c7ed147449c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -430,14 +430,15 @@ EXPORT_SYMBOL(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
- if (sk->sk_tsflags || tsflags) {
+ if (tsflags) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
- if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+ if (tsflags & SOF_TIMESTAMPING_TX_ACK)
+ tcb->txstamp_ack = 1;
+ if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
- tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP);
}
}
@@ -908,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
int copy, i;
bool can_coalesce;
- if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
+ if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+ !tcp_skb_can_collapse_to(skb)) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -1082,6 +1084,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
+ bool process_backlog = false;
bool sg;
long timeo;
@@ -1134,11 +1137,12 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
/* This should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- mss_now = tcp_send_mss(sk, &size_goal, flags);
-
/* Ok commence sending. */
copied = 0;
+restart:
+ mss_now = tcp_send_mss(sk, &size_goal, flags);
+
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
@@ -1156,7 +1160,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
copy = max - skb->len;
}
- if (copy <= 0) {
+ if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
new_segment:
/* Allocate new segment. If the interface is SG,
* allocate skb fitting to single page.
@@ -1164,6 +1168,10 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
+ if (process_backlog && sk_flush_backlog(sk)) {
+ process_backlog = false;
+ goto restart;
+ }
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
sk->sk_allocation,
@@ -1171,6 +1179,7 @@ new_segment:
if (!skb)
goto wait_for_memory;
+ process_backlog = true;
/*
* Check whether we can use HW checksum.
*/
@@ -1250,6 +1259,8 @@ new_segment:
copied += copy;
if (!msg_data_left(msg)) {
tcp_tx_timestamp(sk, sockc.tsflags, skb);
+ if (unlikely(flags & MSG_EOR))
+ TCP_SKB_CB(skb)->eor = 1;
goto out;
}
@@ -1443,14 +1454,10 @@ static void tcp_prequeue_process(struct sock *sk)
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
- /* RX process wants to run with disabled BHs, though it is not
- * necessary */
- local_bh_disable();
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
- local_bh_enable();
/* Clear memory counter. */
tp->ucopy.memory = 0;
@@ -1777,7 +1784,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
chunk = len - tp->ucopy.len;
if (chunk != 0) {
- NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
}
@@ -1789,7 +1796,7 @@ do_prequeue:
chunk = len - tp->ucopy.len;
if (chunk != 0) {
- NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -1875,7 +1882,7 @@ skip_copy:
tcp_prequeue_process(sk);
if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -2065,13 +2072,13 @@ void tcp_close(struct sock *sk, long timeout)
sk->sk_prot->disconnect(sk, 0);
} else if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
- NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
- NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
* zapping the connection.
@@ -2148,7 +2155,7 @@ adjudge_to_death:
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(sock_net(sk),
+ __NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
const int tmo = tcp_fin_time(sk);
@@ -2167,7 +2174,7 @@ adjudge_to_death:
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(sock_net(sk),
+ __NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
}
}
@@ -3091,7 +3098,7 @@ void tcp_done(struct sock *sk)
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index fd1405d37c14..36087bca9f48 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -197,15 +197,15 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
+static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_state == TCP_CA_Open) {
struct bictcp *ca = inet_csk_ca(sk);
- cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
- ca->delayed_ack += cnt;
+ ca->delayed_ack += sample->pkts_acked -
+ (ca->delayed_ack >> ACK_RATIO_SHIFT);
}
}
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 167b6a3e1b98..03725b294286 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
ca->last_ack = now_us;
if (after(now_us, ca->round_start + base_owd)) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTTRAINDETECT);
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
return;
}
@@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
125U);
if (ca->rtt.min > thresh) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTDELAYDETECT);
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
@@ -294,12 +294,12 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}
-static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
+static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample)
{
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- if (rtt_us <= 0)
+ if (sample->rtt_us <= 0)
return;
/* A heuristic for filtering delayed ACKs, adapted from:
@@ -307,20 +307,20 @@ static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
* delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010.
*/
if (tp->sacked_out == 0) {
- if (num_acked == 1 && ca->delack) {
+ if (sample->pkts_acked == 1 && ca->delack) {
/* A delayed ACK is only used for the minimum if it is
* provenly lower than an existing non-zero minimum.
*/
- ca->rtt.min = min(ca->rtt.min, rtt_us);
+ ca->rtt.min = min(ca->rtt.min, sample->rtt_us);
ca->delack--;
return;
- } else if (num_acked > 1 && ca->delack < 5) {
+ } else if (sample->pkts_acked > 1 && ca->delack < 5) {
ca->delack++;
}
}
- ca->rtt.min = min_not_zero(ca->rtt.min, rtt_us);
- ca->rtt.max = max(ca->rtt.max, rtt_us);
+ ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us);
+ ca->rtt.max = max(ca->rtt.max, sample->rtt_us);
}
static u32 tcp_cdg_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 448c2615fece..c99230efcd52 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->last_ack = now;
if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
ca->found |= HYSTART_ACK_TRAIN;
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTTRAINDETECT);
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
@@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->curr_rtt > ca->delay_min +
HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
ca->found |= HYSTART_DELAY;
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTDELAYDETECT);
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
@@ -437,21 +437,21 @@ static void hystart_update(struct sock *sk, u32 delay)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
+static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
u32 delay;
/* Some calls are for duplicates without timetamps */
- if (rtt_us < 0)
+ if (sample->rtt_us < 0)
return;
/* Discard delay samples right after fast recovery */
if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
return;
- delay = (rtt_us << 3) / USEC_PER_MSEC;
+ delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
if (delay == 0)
delay = 1;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index cffd8f9ed1a9..54d9f9b0120f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -255,9 +255,9 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
spin_lock(&fastopenq->lock);
req1 = fastopenq->rskq_rst_head;
if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
+ __NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
spin_unlock(&fastopenq->lock);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
return false;
}
fastopenq->rskq_rst_head = req1->dl_next;
@@ -282,7 +282,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct sock *child;
if (foc->len == 0) /* Client requests a cookie */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
@@ -311,13 +311,13 @@ fastopen:
child = tcp_fastopen_create_child(sk, skb, dst, req);
if (child) {
foc->len = -1;
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVE);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVE);
return child;
}
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
} else if (foc->len > 0) /* Client presents an invalid cookie */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
valid_foc.exp = foc->exp;
*foc = valid_foc;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 82f0d9ed60f5..4a4d8e76738f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -99,7 +99,7 @@ static inline void measure_rtt(struct sock *sk, u32 srtt)
}
static void measure_achieved_throughput(struct sock *sk,
- u32 pkts_acked, s32 rtt)
+ const struct ack_sample *sample)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
@@ -107,10 +107,10 @@ static void measure_achieved_throughput(struct sock *sk,
u32 now = tcp_time_stamp;
if (icsk->icsk_ca_state == TCP_CA_Open)
- ca->pkts_acked = pkts_acked;
+ ca->pkts_acked = sample->pkts_acked;
- if (rtt > 0)
- measure_rtt(sk, usecs_to_jiffies(rtt));
+ if (sample->rtt_us > 0)
+ measure_rtt(sk, usecs_to_jiffies(sample->rtt_us));
if (!use_bandwidth_switch)
return;
@@ -122,7 +122,7 @@ static void measure_achieved_throughput(struct sock *sk,
return;
}
- ca->packetcount += pkts_acked;
+ ca->packetcount += sample->pkts_acked;
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
now - ca->lasttime >= ca->minRTT &&
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 2ab9bbb6faff..c8e6d86be114 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -82,30 +82,31 @@ static void tcp_illinois_init(struct sock *sk)
}
/* Measure RTT for each ack. */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
+static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample)
{
struct illinois *ca = inet_csk_ca(sk);
+ s32 rtt_us = sample->rtt_us;
- ca->acked = pkts_acked;
+ ca->acked = sample->pkts_acked;
/* dup ack, no rtt sample */
- if (rtt < 0)
+ if (rtt_us < 0)
return;
/* ignore bogus values, this prevents wraparound in alpha math */
- if (rtt > RTT_MAX)
- rtt = RTT_MAX;
+ if (rtt_us > RTT_MAX)
+ rtt_us = RTT_MAX;
/* keep track of minimum RTT seen so far */
- if (ca->base_rtt > rtt)
- ca->base_rtt = rtt;
+ if (ca->base_rtt > rtt_us)
+ ca->base_rtt = rtt_us;
/* and max */
- if (ca->max_rtt < rtt)
- ca->max_rtt = rtt;
+ if (ca->max_rtt < rtt_us)
+ ca->max_rtt = rtt_us;
++ca->cnt_rtt;
- ca->sum_rtt += rtt;
+ ca->sum_rtt += rtt_us;
}
/* Maximum queuing delay */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520dbe0bf..d6c8f4cd0800 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -869,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
else
mib_idx = LINUX_MIB_TCPSACKREORDER;
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1062,7 +1062,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
dup_sack = true;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1) {
u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1071,7 +1071,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
!before(start_seq_0, start_seq_1)) {
dup_sack = true;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPDSACKOFORECV);
}
}
@@ -1289,7 +1289,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (skb->len > 0) {
BUG_ON(!tcp_skb_pcount(skb));
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
return false;
}
@@ -1303,6 +1303,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
}
TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+ TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
TCP_SKB_CB(prev)->end_seq++;
@@ -1313,7 +1314,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
return true;
}
@@ -1368,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
+ if (!tcp_skb_can_collapse_to(prev))
+ goto fallback;
+
in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
!before(end_seq, TCP_SKB_CB(skb)->end_seq);
@@ -1469,7 +1473,7 @@ noop:
return skb;
fallback:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
return NULL;
}
@@ -1657,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
mib_idx = LINUX_MIB_TCPSACKDISCARD;
}
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
if (i == 0)
first_sack_index = -1;
continue;
@@ -1909,7 +1913,7 @@ void tcp_enter_loss(struct sock *sk)
skb = tcp_write_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
tp->fackets_out = 0;
}
@@ -2395,7 +2399,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
else
mib_idx = LINUX_MIB_TCPFULLUNDO;
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2417,7 +2421,7 @@ static bool tcp_try_undo_dsack(struct sock *sk)
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
return true;
}
return false;
@@ -2432,10 +2436,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
tcp_undo_cwnd_reduction(sk, true);
DBGUNDO(sk, "partial loss");
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
if (frto_undo)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPSPURIOUSRTOS);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
if (frto_undo || tcp_is_sack(tp))
tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2559,7 +2563,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
icsk->icsk_mtup.probe_size = 0;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
}
static void tcp_mtup_probe_success(struct sock *sk)
@@ -2579,7 +2583,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
icsk->icsk_mtup.probe_size = 0;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
/* Do a simple retransmit without using the backoff mechanisms in
@@ -2643,7 +2647,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
else
mib_idx = LINUX_MIB_TCPSACKRECOVERY;
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
tp->prior_ssthresh = 0;
tcp_init_undo(tp);
@@ -2736,7 +2740,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
DBGUNDO(sk, "partial recovery");
tcp_undo_cwnd_reduction(sk, true);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
tcp_try_keep_open(sk);
return true;
}
@@ -3087,8 +3091,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
return;
shinfo = skb_shinfo(skb);
- if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
- !before(shinfo->tskey, prior_snd_una) &&
+ if (!before(shinfo->tskey, prior_snd_una) &&
before(shinfo->tskey, tcp_sk(sk)->snd_una))
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
}
@@ -3245,8 +3248,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_rearm_rto(sk);
}
- if (icsk->icsk_ca_ops->pkts_acked)
- icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
+ if (icsk->icsk_ca_ops->pkts_acked) {
+ struct ack_sample sample = { .pkts_acked = pkts_acked,
+ .rtt_us = ca_rtt_us };
+
+ icsk->icsk_ca_ops->pkts_acked(sk, &sample);
+ }
#if FASTRETRANS_DEBUG > 0
WARN_ON((int)tp->sacked_out < 0);
@@ -3352,9 +3359,10 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
{
u32 delta = ack - tp->snd_una;
- u64_stats_update_begin(&tp->syncp);
+ sock_owned_by_me((struct sock *)tp);
+ u64_stats_update_begin_raw(&tp->syncp);
tp->bytes_acked += delta;
- u64_stats_update_end(&tp->syncp);
+ u64_stats_update_end_raw(&tp->syncp);
tp->snd_una = ack;
}
@@ -3363,9 +3371,10 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
{
u32 delta = seq - tp->rcv_nxt;
- u64_stats_update_begin(&tp->syncp);
+ sock_owned_by_me((struct sock *)tp);
+ u64_stats_update_begin_raw(&tp->syncp);
tp->bytes_received += delta;
- u64_stats_update_end(&tp->syncp);
+ u64_stats_update_end_raw(&tp->syncp);
tp->rcv_nxt = seq;
}
@@ -3431,7 +3440,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- NET_INC_STATS_BH(net, mib_idx);
+ NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
}
@@ -3464,7 +3473,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
challenge_count = 0;
}
if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
}
@@ -3513,8 +3522,8 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
tcp_try_keep_open(sk);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPLOSSPROBERECOVERY);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPLOSSPROBERECOVERY);
} else if (!(flag & (FLAG_SND_UNA_ADVANCED |
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
/* Pure dupack: original and TLP probe arrived; no loss */
@@ -3618,14 +3627,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
u32 ack_ev_flags = CA_ACK_SLOWPATH;
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
@@ -4128,7 +4137,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
else
mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
tp->rx_opt.dsack = 1;
tp->duplicate_sack[0].start_seq = seq;
@@ -4152,7 +4161,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -4302,7 +4311,7 @@ static bool tcp_try_coalesce(struct sock *sk,
atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
@@ -4390,7 +4399,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tcp_ecn_check_ce(tp, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
tcp_drop(sk, skb);
return;
}
@@ -4399,7 +4408,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -4454,7 +4463,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
@@ -4493,7 +4502,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
__skb_unlink(skb1, &tp->out_of_order_queue);
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb1);
}
@@ -4608,14 +4617,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
__set_current_state(TASK_RUNNING);
- local_bh_enable();
if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
eaten = (chunk == skb->len);
tcp_rcv_space_adjust(sk);
}
- local_bh_disable();
}
if (eaten <= 0) {
@@ -4658,7 +4665,7 @@ queue_and_out:
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an immediate ack. */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
out_of_window:
@@ -4704,7 +4711,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
__skb_unlink(skb, list);
__kfree_skb(skb);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
return next;
}
@@ -4863,7 +4870,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
bool res = false;
if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
@@ -4892,7 +4899,7 @@ static int tcp_prune_queue(struct sock *sk)
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
@@ -4922,7 +4929,7 @@ static int tcp_prune_queue(struct sock *sk)
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
@@ -5131,7 +5138,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
int chunk = skb->len - hlen;
int err;
- local_bh_enable();
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
else
@@ -5143,32 +5149,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
tcp_rcv_space_adjust(sk);
}
- local_bh_disable();
return err;
}
-static __sum16 __tcp_checksum_complete_user(struct sock *sk,
- struct sk_buff *skb)
-{
- __sum16 result;
-
- if (sock_owned_by_user(sk)) {
- local_bh_enable();
- result = __tcp_checksum_complete(skb);
- local_bh_disable();
- } else {
- result = __tcp_checksum_complete(skb);
- }
- return result;
-}
-
-static inline bool tcp_checksum_complete_user(struct sock *sk,
- struct sk_buff *skb)
-{
- return !skb_csum_unnecessary(skb) &&
- __tcp_checksum_complete_user(sk, skb);
-}
-
/* Does PAWS and seqno based validation of an incoming segment, flags will
* play significant role here.
*/
@@ -5181,7 +5164,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
@@ -5233,8 +5216,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (th->syn) {
syn_challenge:
if (syn_inerr)
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk, skb);
goto discard;
}
@@ -5349,7 +5332,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk);
return;
} else { /* Header too small */
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -5377,12 +5360,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
__skb_pull(skb, tcp_header_len);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHPHITSTOUSER);
eaten = 1;
}
}
if (!eaten) {
- if (tcp_checksum_complete_user(sk, skb))
+ if (tcp_checksum_complete(skb))
goto csum_error;
if ((int)skb->truesize > sk->sk_forward_alloc)
@@ -5399,7 +5383,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_rcv_rtt_measure_ts(sk, skb);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
@@ -5426,7 +5410,7 @@ no_ack:
}
slow_path:
- if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
+ if (len < (th->doff << 2) || tcp_checksum_complete(skb))
goto csum_error;
if (!th->ack && !th->rst && !th->syn)
@@ -5456,8 +5440,8 @@ step5:
return;
csum_error:
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
discard:
tcp_drop(sk, skb);
@@ -5549,12 +5533,14 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
break;
}
tcp_rearm_rto(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
}
tp->syn_data_acked = tp->syn_data;
if (tp->syn_data_acked)
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVE);
tcp_fastopen_add_skb(sk, synack);
@@ -5589,7 +5575,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
@@ -5958,7 +5945,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}
@@ -6015,7 +6002,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return 1;
}
@@ -6153,10 +6140,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
if (net->ipv4.sysctl_tcp_syncookies) {
msg = "Sending cookies";
want_cookie = true;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
} else
#endif
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
if (!queue->synflood_warned &&
net->ipv4.sysctl_tcp_syncookies != 2 &&
@@ -6217,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
* timeout.
*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
}
@@ -6264,7 +6251,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (dst && strict &&
!tcp_peer_is_proven(req, dst, true,
tmp_opt.saw_tstamp)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d2a5763e5abc..8219d0d8dc83 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -320,7 +320,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
* an established socket here.
*/
if (seq != tcp_rsk(req)->snt_isn) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
@@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
th->dest, iph->saddr, ntohs(th->source),
inet_iif(icmp_skb));
if (!sk) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {
@@ -396,13 +396,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
*/
if (sock_owned_by_user(sk)) {
if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
}
if (sk->sk_state == TCP_CLOSE)
goto out;
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
@@ -413,7 +413,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -692,13 +692,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
arg.tos = ip_hdr(skb)->tos;
+ local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+ __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
+ local_bh_enable();
#ifdef CONFIG_TCP_MD5SIG
out:
@@ -774,12 +776,14 @@ static void tcp_v4_send_ack(struct net *net,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
+ local_bh_disable();
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ local_bh_enable();
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1151,12 +1155,12 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
return false;
if (hash_expected && !hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}
if (!hash_expected && hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
@@ -1342,7 +1346,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
return newsk;
exit_overflow:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit_nonewsk:
dst_release(dst);
exit:
@@ -1432,8 +1436,8 @@ discard:
return 0;
csum_err:
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
@@ -1506,16 +1510,16 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
- if (tp->ucopy.memory > sk->sk_rcvbuf) {
+ if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
+ tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
struct sk_buff *skb1;
BUG_ON(sock_owned_by_user(sk));
+ __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
+ skb_queue_len(&tp->ucopy.prequeue));
- while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+ while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb1);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPPREQUEUEDROPPED);
- }
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
@@ -1547,7 +1551,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto discard_it;
/* Count it even if it's bad */
- TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1629,7 +1633,7 @@ process:
}
}
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
@@ -1662,7 +1666,7 @@ process:
} else if (unlikely(sk_add_backlog(sk, skb,
sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
- NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1679,9 +1683,9 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
- TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+ __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ __TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(NULL, skb);
}
@@ -1835,7 +1839,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_free_fastopen_req(tp);
tcp_saved_syn_free(tp);
+ local_bh_disable();
sk_sockets_allocated_dec(sk);
+ local_bh_enable();
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
sock_release_memcg(sk);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 1e70fa8fa793..c67ece1390c2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -260,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
* newReno in increase case.
* We work it out by following the idea from TCP-LP's paper directly
*/
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
+static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
- if (rtt_us > 0)
- tcp_lp_rtt_sample(sk, rtt_us);
+ if (sample->rtt_us > 0)
+ tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4c53e7c86586..4b95ec4ed2c8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -235,7 +235,7 @@ kill:
}
if (paws_reject)
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
+ __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
@@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
}
tcp_update_metrics(sk);
@@ -545,7 +545,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rack.mstamp.v64 = 0;
newtp->rack.advanced = 0;
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
}
@@ -710,7 +710,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
&tcp_rsk(req)->last_oow_ack_time))
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
return NULL;
}
@@ -729,7 +729,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
}
@@ -752,7 +752,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
return NULL;
}
@@ -791,7 +791,7 @@ embryonic_reset:
}
if (!fastopen) {
inet_csk_reqsk_queue_drop(sk, req);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
}
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9d3b4b364652..8daefd8b1b49 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -949,7 +949,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
+ skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -1111,11 +1111,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
tcp_verify_left_out(tp);
}
+static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->txstamp_ack ||
+ (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
+}
+
static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
- if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
+ if (unlikely(tcp_has_tx_tstamp(skb)) &&
!before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
@@ -1128,6 +1134,12 @@ static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
}
}
+static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
+{
+ TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
+ TCP_SKB_CB(skb)->eor = 0;
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -1173,6 +1185,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
TCP_SKB_CB(buff)->tcp_flags = flags;
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
+ tcp_skb_fragment_eor(skb, buff);
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
/* Copy and checksum data tail into the new buffer. */
@@ -1733,6 +1746,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB(buff)->sacked = 0;
+ tcp_skb_fragment_eor(skb, buff);
+
buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2206,14 +2221,13 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Thanks to skb fast clones, we can detect if a prior transmit of
* a packet is still in a qdisc or driver queue.
* In this case, there is very little point doing a retransmit !
- * Note: This is called from BH context only.
*/
static bool skb_still_in_host_queue(const struct sock *sk,
const struct sk_buff *skb)
{
if (unlikely(skb_fclone_busy(sk, skb))) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
return true;
}
return false;
@@ -2275,7 +2289,7 @@ void tcp_send_loss_probe(struct sock *sk)
tp->tlp_high_seq = tp->snd_nxt;
probe_sent:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
/* Reset s.t. tcp_rearm_rto will restart timer from now */
inet_csk(sk)->icsk_pending = 0;
rearm_timer:
@@ -2446,13 +2460,12 @@ u32 __tcp_select_window(struct sock *sk)
void tcp_skb_collapse_tstamp(struct sk_buff *skb,
const struct sk_buff *next_skb)
{
- const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);
- u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
-
- if (unlikely(tsflags)) {
+ if (unlikely(tcp_has_tx_tstamp(next_skb))) {
+ const struct skb_shared_info *next_shinfo =
+ skb_shinfo(next_skb);
struct skb_shared_info *shinfo = skb_shinfo(skb);
- shinfo->tx_flags |= tsflags;
+ shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
shinfo->tskey = next_shinfo->tskey;
TCP_SKB_CB(skb)->txstamp_ack |=
TCP_SKB_CB(next_skb)->txstamp_ack;
@@ -2494,6 +2507,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
* packet counting does not break.
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
+ TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
/* changed transmit queue under us so clear hints */
tcp_clear_retrans_hints_partial(tp);
@@ -2545,6 +2559,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!tcp_can_collapse(sk, skb))
break;
+ if (!tcp_skb_can_collapse_to(to))
+ break;
+
space -= skb->len;
if (first) {
@@ -2656,7 +2673,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
/* Update global TCP statistics. */
TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
tp->total_retrans += segs;
}
return err;
@@ -2681,7 +2698,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tp->retrans_stamp = tcp_skb_timestamp(skb);
} else if (err != -EBUSY) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
if (tp->undo_retrans < 0)
@@ -2805,7 +2822,7 @@ begin_fwd:
if (tcp_retransmit_skb(sk, skb, segs))
return;
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
@@ -3042,7 +3059,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
@@ -3540,8 +3557,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
tcp_rsk(req)->txhash = net_tx_rndhash();
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
if (!res) {
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
}
return res;
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 5353085fd0b2..e36df4fcfeba 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -65,8 +65,8 @@ int tcp_rack_mark_lost(struct sock *sk)
if (scb->sacked & TCPCB_SACKED_RETRANS) {
scb->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPLOSTRETRANSMIT);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPLOSTRETRANSMIT);
}
} else if (!(scb->sacked & TCPCB_RETRANS)) {
/* Original data are sent sequentially so stop early
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 373b03e78aaa..debdd8b33e69 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -30,7 +30,7 @@ static void tcp_write_err(struct sock *sk)
sk->sk_error_report(sk);
tcp_done(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
/* Do not allow orphaned sockets to eat all our resources.
@@ -68,7 +68,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
return 0;
@@ -162,8 +162,8 @@ static int tcp_write_timeout(struct sock *sk)
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
if (tp->syn_data && icsk->icsk_retransmits == 1)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
syn_set = true;
@@ -178,8 +178,8 @@ static int tcp_write_timeout(struct sock *sk)
tp->bytes_acked <= tp->rx_opt.mss_clamp) {
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -209,6 +209,7 @@ static int tcp_write_timeout(struct sock *sk)
return 0;
}
+/* Called with BH disabled */
void tcp_delack_timer_handler(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -228,7 +229,7 @@ void tcp_delack_timer_handler(struct sock *sk)
if (!skb_queue_empty(&tp->ucopy.prequeue)) {
struct sk_buff *skb;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
@@ -248,7 +249,7 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
tcp_send_ack(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
out:
@@ -265,7 +266,7 @@ static void tcp_delack_timer(unsigned long data)
tcp_delack_timer_handler(sk);
} else {
inet_csk(sk)->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
sock_hold(sk);
@@ -431,7 +432,7 @@ void tcp_retransmit_timer(struct sock *sk)
} else {
mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
- NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ __NET_INC_STATS(sock_net(sk), mib_idx);
}
tcp_enter_loss(sk);
@@ -493,6 +494,7 @@ out_reset_timer:
out:;
}
+/* Called with BH disabled */
void tcp_write_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -549,7 +551,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
{
struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
- NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS);
+ __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
}
EXPORT_SYMBOL(tcp_syn_ack_timeout);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13951c4087d4..4c4bac1b5eab 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -107,16 +107,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init);
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
-void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
+void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct vegas *vegas = inet_csk_ca(sk);
u32 vrtt;
- if (rtt_us < 0)
+ if (sample->rtt_us < 0)
return;
/* Never allow zero rtt or baseRTT */
- vrtt = rtt_us + 1;
+ vrtt = sample->rtt_us + 1;
/* Filter to find propagation delay: */
if (vrtt < vegas->baseRTT)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index ef9da5306c68..248cfc0ff9ae 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -17,7 +17,7 @@ struct vegas {
void tcp_vegas_init(struct sock *sk);
void tcp_vegas_state(struct sock *sk, u8 ca_state);
-void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
+void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample);
void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info);
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 0d094b995cd9..40171e163cff 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,16 +69,17 @@ static void tcp_veno_init(struct sock *sk)
}
/* Do rtt sampling needed for Veno. */
-static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
+static void tcp_veno_pkts_acked(struct sock *sk,
+ const struct ack_sample *sample)
{
struct veno *veno = inet_csk_ca(sk);
u32 vrtt;
- if (rtt_us < 0)
+ if (sample->rtt_us < 0)
return;
/* Never allow zero rtt or baseRTT */
- vrtt = rtt_us + 1;
+ vrtt = sample->rtt_us + 1;
/* Filter to find propagation delay: */
if (vrtt < veno->basertt)
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index c10732e39837..4b03a2e2a050 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -99,12 +99,13 @@ static void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets.
* but all westwood needs is the last sample of srtt.
*/
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt)
+static void tcp_westwood_pkts_acked(struct sock *sk,
+ const struct ack_sample *sample)
{
struct westwood *w = inet_csk_ca(sk);
- if (rtt > 0)
- w->rtt = usecs_to_jiffies(rtt);
+ if (sample->rtt_us > 0)
+ w->rtt = usecs_to_jiffies(sample->rtt_us);
}
/*
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 3e6a472e6b88..028eb046ea40 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -56,15 +56,16 @@ static void tcp_yeah_init(struct sock *sk)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
+static void tcp_yeah_pkts_acked(struct sock *sk,
+ const struct ack_sample *sample)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct yeah *yeah = inet_csk_ca(sk);
if (icsk->icsk_ca_state == TCP_CA_Open)
- yeah->pkts_acked = pkts_acked;
+ yeah->pkts_acked = sample->pkts_acked;
- tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
+ tcp_vegas_pkts_acked(sk, sample);
}
static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 76ea0a8be090..f67f52ba4809 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -688,7 +688,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex, udptable,
NULL);
if (!sk) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
}
@@ -882,13 +882,13 @@ send:
err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
} else
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_OUTDATAGRAMS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
@@ -1157,8 +1157,8 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -1242,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
- IS_UDPLITE(sk));
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+ IS_UDPLITE(sk));
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
__skb_unlink(skb, rcvq);
__skb_queue_tail(&list_kill, skb);
@@ -1352,16 +1352,16 @@ try_again:
trace_kfree_skb(skb, udp_recvmsg);
if (!peeked) {
atomic_inc(&sk->sk_drops);
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
}
skb_free_datagram_locked(sk, skb);
return err;
}
if (!peeked)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_ts_and_drops(msg, sk, skb);
@@ -1386,8 +1386,8 @@ try_again:
csum_copy_err:
slow = lock_sock_fast(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
- UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
- UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
}
unlock_sock_fast(sk, slow);
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- is_udplite);
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
@@ -1580,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
ret = encap_rcv(sk, skb);
if (ret <= 0) {
- UDP_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INDATAGRAMS,
- is_udplite);
+ __UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
+ is_udplite);
return -ret;
}
}
@@ -1633,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- is_udplite);
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ is_udplite);
goto drop;
}
@@ -1653,9 +1653,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return rc;
csum_error:
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
@@ -1715,10 +1715,10 @@ start_lookup:
if (unlikely(!nskb)) {
atomic_inc(&sk->sk_drops);
- UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP_INC_STATS_BH(net, UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
+ __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ __UDP_INC_STATS(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
continue;
}
if (udp_queue_rcv_skb(sk, nskb) > 0)
@@ -1736,8 +1736,8 @@ start_lookup:
consume_skb(skb);
} else {
kfree_skb(skb);
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -1851,7 +1851,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1878,9 +1878,9 @@ csum_error:
proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
ulen);
- UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
- UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 097060def7f0..6b7459c92bb2 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -350,6 +350,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
+ /* Set encapsulation before calling into inner gro_complete() functions
+ * to make them set up the inner offsets.
+ */
+ skb->encapsulation = 1;
+
rcu_read_lock();
sk = (*lookup)(skb, uh->source, uh->dest);
if (sk && udp_sk(sk)->gro_complete)
@@ -360,9 +365,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
if (skb->remcsum_offload)
skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
- skb->encapsulation = 1;
- skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
-
return err;
}
EXPORT_SYMBOL(udp_gro_complete);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11e875ffd7ac..3f8411328de5 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -218,6 +218,7 @@ config IPV6_GRE
tristate "IPv6: GRE tunnel"
select IPV6_TUNNEL
select NET_IP_TUNNEL
+ depends on NET_IPGRE_DEMUX
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f5a77a9dd34e..47f837a58e0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3175,35 +3175,9 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-/* If the host route is cached on the addr struct make sure it is associated
- * with the proper table. e.g., enslavement can change and if so the cached
- * host route needs to move to the new table.
- */
-static void l3mdev_check_host_rt(struct inet6_dev *idev,
- struct inet6_ifaddr *ifp)
-{
- if (ifp->rt) {
- u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
-
- if (tb_id != ifp->rt->rt6i_table->tb6_id) {
- ip6_del_rt(ifp->rt);
- ifp->rt = NULL;
- }
- }
-}
-#else
-static void l3mdev_check_host_rt(struct inet6_dev *idev,
- struct inet6_ifaddr *ifp)
-{
-}
-#endif
-
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- l3mdev_check_host_rt(idev, ifp);
-
if (!ifp->rt) {
struct rt6_info *rt;
@@ -3303,6 +3277,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
if (event == NETDEV_UP) {
+ /* restore routes for permanent addresses */
+ addrconf_permanent_addr(dev);
+
if (!addrconf_qdisc_ok(dev)) {
/* device is not ready yet. */
pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
@@ -3336,9 +3313,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- /* restore routes for permanent addresses */
- addrconf_permanent_addr(dev);
-
switch (dev->type) {
#if IS_ENABLED(CONFIG_IPV6_SIT)
case ARPHRD_SIT:
@@ -3555,6 +3529,8 @@ restart:
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ struct rt6_info *rt = NULL;
+
addrconf_del_dad_work(ifa);
write_unlock_bh(&idev->lock);
@@ -3567,6 +3543,9 @@ restart:
ifa->state = 0;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
+
+ rt = ifa->rt;
+ ifa->rt = NULL;
} else {
state = ifa->state;
ifa->state = INET6_IFADDR_STATE_DEAD;
@@ -3577,6 +3556,9 @@ restart:
spin_unlock_bh(&ifa->lock);
+ if (rt)
+ ip6_del_rt(rt);
+
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
@@ -5344,10 +5326,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (rt)
ip6_del_rt(rt);
}
- dst_hold(&ifp->rt->dst);
-
- ip6_del_rt(ifp->rt);
-
+ if (ifp->rt) {
+ dst_hold(&ifp->rt->dst);
+ ip6_del_rt(ifp->rt);
+ }
rt_genid_bump_ipv6(net);
break;
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ea9ee5cce5cf..00d0c2903173 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -727,14 +727,13 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
- struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag,
- struct sockcm_cookie *sockc)
+ struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
struct ipv6_rt_hdr *rthdr;
struct ipv6_opt_hdr *hdr;
+ struct ipv6_txoptions *opt = ipc6->opt;
int len;
int err = 0;
@@ -953,8 +952,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
- *hlimit = *(int *)CMSG_DATA(cmsg);
- if (*hlimit < -1 || *hlimit > 0xff) {
+ ipc6->hlimit = *(int *)CMSG_DATA(cmsg);
+ if (ipc6->hlimit < -1 || ipc6->hlimit > 0xff) {
err = -EINVAL;
goto exit_f;
}
@@ -974,7 +973,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *tclass = tc;
+ ipc6->tclass = tc;
break;
}
@@ -992,7 +991,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *dontfrag = df;
+ ipc6->dontfrag = df;
break;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ea7c4d64a00a..8de5dd7aaa05 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
return 1;
}
- IP6_INC_STATS_BH(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
return -1;
}
@@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -334,8 +334,8 @@ looped_back:
* processed by own
*/
if (!addr) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -360,8 +360,8 @@ looped_back:
goto unknown_rh;
/* Silently discard invalid RTH type 2 */
if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -379,8 +379,8 @@ looped_back:
n = hdr->hdrlen >> 1;
if (hdr->segments_left > n) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((&hdr->segments_left) -
skb_network_header(skb)));
@@ -393,8 +393,8 @@ looped_back:
if (skb_cloned(skb)) {
/* the copy is a forwarded packet */
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return -1;
}
@@ -416,14 +416,14 @@ looped_back:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
IPPROTO_ROUTING) < 0) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -434,8 +434,8 @@ looped_back:
}
if (ipv6_addr_is_multicast(addr)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -454,8 +454,8 @@ looped_back:
if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0);
kfree_skb(skb);
@@ -470,7 +470,7 @@ looped_back:
return -1;
unknown_rh:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
(&hdr->type) - skb_network_header(skb));
return -1;
@@ -568,28 +568,28 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6b573ebe49de..4527285fcaa2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -401,10 +401,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
struct flowi6 fl6;
struct icmpv6_msg msg;
struct sockcm_cookie sockc_unused = {0};
+ struct ipcm6_cookie ipc6;
int iif = 0;
int addr_type = 0;
int len;
- int hlimit;
int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
@@ -446,6 +446,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
+ else
+ iif = l3mdev_master_ifindex(skb->dev);
/*
* Must not send error if the source does not uniquely
@@ -500,14 +502,14 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
-
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = np->tclass;
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -526,9 +528,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit,
- np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag, &sockc_unused);
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -563,9 +565,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct flowi6 fl6;
struct icmpv6_msg msg;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int err = 0;
- int hlimit;
- u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
struct sockcm_cookie sockc_unused = {0};
@@ -607,22 +608,24 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
msg.offset = 0;
msg.type = ICMPV6_ECHO_REPLY;
- tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
+
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag, &sockc_unused);
+ &sockc_unused);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -674,7 +677,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
return;
out:
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
}
/*
@@ -710,7 +713,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -728,7 +731,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -812,9 +815,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
return 0;
csum_error:
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 28542cb2b387..d08fd2d48a78 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -23,10 +23,76 @@
#include <net/protocol.h>
#include <uapi/linux/ila.h>
+struct ila_locator {
+ union {
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+struct ila_identifier {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 __space:4;
+ u8 csum_neutral:1;
+ u8 type:3;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 type:3;
+ u8 csum_neutral:1;
+ u8 __space:4;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ u8 __space2[7];
+ };
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+enum {
+ ILA_ATYPE_IID = 0,
+ ILA_ATYPE_LUID,
+ ILA_ATYPE_VIRT_V4,
+ ILA_ATYPE_VIRT_UNI_V6,
+ ILA_ATYPE_VIRT_MULTI_V6,
+ ILA_ATYPE_RSVD_1,
+ ILA_ATYPE_RSVD_2,
+ ILA_ATYPE_RSVD_3,
+};
+
+#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
+
+struct ila_addr {
+ union {
+ struct in6_addr addr;
+ struct {
+ struct ila_locator loc;
+ struct ila_identifier ident;
+ };
+ };
+};
+
+static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
+{
+ return (struct ila_addr *)addr;
+}
+
+static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
+{
+ return (iaddr->ident.type != ILA_ATYPE_IID);
+}
+
struct ila_params {
- __be64 locator;
- __be64 locator_match;
+ struct ila_locator locator;
+ struct ila_locator locator_match;
__wsum csum_diff;
+ u8 csum_mode;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
@@ -38,7 +104,14 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
return csum_partial(diff, sizeof(diff), 0);
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+static inline bool ila_csum_neutral_set(struct ila_identifier ident)
+{
+ return !!(ident.csum_neutral);
+}
+
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+
+void ila_init_saved_csum(struct ila_params *p);
int ila_lwt_init(void);
void ila_lwt_fini(void);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 30613050e4ca..0e94042d1289 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -15,20 +15,52 @@
static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
{
- if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ return compute_csum_diff8((__be32 *)&iaddr->loc,
+ (__be32 *)&p->locator);
+}
+
+static void ila_csum_do_neutral(struct ila_addr *iaddr,
+ struct ila_params *p)
+{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
+ __wsum diff, fval;
+
+ /* Check if checksum adjust value has been cached */
+ if (p->locator_match.v64) {
+ diff = p->csum_diff;
+ } else {
+ diff = compute_csum_diff8((__be32 *)iaddr,
(__be32 *)&p->locator);
+ }
+
+ fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
+ ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+
+ diff = csum_add(diff, fval);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+
+ /* Flip the csum-neutral bit. Either we are doing a SIR->ILA
+ * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method
+ * and the C-bit is not set, or we are doing an ILA-SIR
+ * tranlsation and the C-bit is set.
+ */
+ iaddr->ident.csum_neutral ^= 1;
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
{
__wsum diff;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
size_t nhoff = sizeof(struct ipv6hdr);
- /* First update checksum */
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
@@ -68,7 +100,46 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
}
/* Now change destination address */
- *(__be64 *)&ip6h->daddr = p->locator;
+ iaddr->loc = p->locator;
+}
+
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ /* First deal with the transport checksum */
+ if (ila_csum_neutral_set(iaddr->ident)) {
+ /* C-bit is set in the locator indicating that this
+ * is a locator being translated to a SIR address.
+ * Perform (receiver) checksum-neutral translation.
+ */
+ ila_csum_do_neutral(iaddr, p);
+ } else {
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ ila_csum_do_neutral(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
+ }
+ }
+
+ /* Now change destination address */
+ iaddr->loc = p->locator;
+}
+
+void ila_init_saved_csum(struct ila_params *p)
+{
+ if (!p->locator_match.v64)
+ return;
+
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match,
+ (__be32 *)&p->locator);
}
static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 9db3621b2126..1dfb64166d7d 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
return dst->lwtstate->orig_output(net, sk, skb);
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
return dst->lwtstate->orig_input(skb);
@@ -53,6 +53,7 @@ drop:
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int ila_build_state(struct net_device *dev, struct nlattr *nla,
@@ -64,11 +65,28 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
size_t encap_len = sizeof(*p);
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
+ struct ila_addr *iaddr;
int ret;
if (family != AF_INET6)
return -EINVAL;
+ if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ iaddr = (struct ila_addr *)&cfg6->fc_dst;
+
+ if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation for a non-ILA address or checksum
+ * neutral flag to be set.
+ */
+ return -EINVAL;
+ }
+
ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
ila_nl_policy);
if (ret < 0)
@@ -84,16 +102,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
newts->len = encap_len;
p = ila_params_lwtunnel(newts);
- p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+ p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
- if (cfg6->fc_dst_len > sizeof(__be64)) {
- /* Precompute checksum difference for translation since we
- * know both the old locator and the new one.
- */
- p->locator_match = *(__be64 *)&cfg6->fc_dst;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
- }
+ /* Precompute checksum difference for translation since we
+ * know both the old locator and the new one.
+ */
+ p->locator_match = iaddr->loc;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match, (__be32 *)&p->locator);
+
+ if (tb[ILA_ATTR_CSUM_MODE])
+ p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ ila_init_saved_csum(p);
newts->type = LWTUNNEL_ENCAP_ILA;
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
@@ -109,9 +130,11 @@ static int ila_fill_encap_info(struct sk_buff *skb,
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
- if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator,
+ if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
ILA_ATTR_PAD))
goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
+ goto nla_put_failure;
return 0;
@@ -121,8 +144,9 @@ nla_put_failure:
static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
- /* No encapsulation overhead */
- return 0;
+ return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ 0;
}
static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -130,7 +154,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ila_params *a_p = ila_params_lwtunnel(a);
struct ila_params *b_p = ila_params_lwtunnel(b);
- return (a_p->locator != b_p->locator);
+ return (a_p->locator.v64 != b_p->locator.v64);
}
static const struct lwtunnel_encap_ops ila_encap_ops = {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 0e9e579410da..a90e57229c6c 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -11,13 +11,11 @@
struct ila_xlat_params {
struct ila_params ip;
- __be64 identifier;
int ifindex;
- unsigned int dir;
};
struct ila_map {
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct rhash_head node;
struct ila_map __rcu *next;
struct rcu_head rcu;
@@ -66,31 +64,29 @@ static __always_inline void __ila_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static inline u32 ila_identifier_hash(__be64 identifier)
+static inline u32 ila_locator_hash(struct ila_locator loc)
{
- u32 *v = (u32 *)&identifier;
+ u32 *v = (u32 *)loc.v32;
return jhash_2words(v[0], v[1], hashrnd);
}
-static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier)
+static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
+ struct ila_locator loc)
{
- return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask];
+ return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
}
-static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc,
- int ifindex, unsigned int dir)
+static inline int ila_cmp_wildcards(struct ila_map *ila,
+ struct ila_addr *iaddr, int ifindex)
{
- return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) ||
- (ila->p.ifindex && ila->p.ifindex != ifindex) ||
- !(ila->p.dir & dir);
+ return (ila->xp.ifindex && ila->xp.ifindex != ifindex);
}
-static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p)
+static inline int ila_cmp_params(struct ila_map *ila,
+ struct ila_xlat_params *xp)
{
- return (ila->p.ip.locator_match != p->ip.locator_match) ||
- (ila->p.ifindex != p->ifindex) ||
- (ila->p.dir != p->dir);
+ return (ila->xp.ifindex != xp->ifindex);
}
static int ila_cmpfn(struct rhashtable_compare_arg *arg,
@@ -98,17 +94,14 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg,
{
const struct ila_map *ila = obj;
- return (ila->p.identifier != *(__be64 *)arg->key);
+ return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key);
}
static inline int ila_order(struct ila_map *ila)
{
int score = 0;
- if (ila->p.ip.locator_match)
- score += 1 << 0;
-
- if (ila->p.ifindex)
+ if (ila->xp.ifindex)
score += 1 << 1;
return score;
@@ -117,7 +110,7 @@ static inline int ila_order(struct ila_map *ila)
static const struct rhashtable_params rht_params = {
.nelem_hint = 1024,
.head_offset = offsetof(struct ila_map, node),
- .key_offset = offsetof(struct ila_map, p.identifier),
+ .key_offset = offsetof(struct ila_map, xp.ip.locator_match),
.key_len = sizeof(u64), /* identifier */
.max_size = 1048576,
.min_size = 256,
@@ -136,50 +129,45 @@ static struct genl_family ila_nl_family = {
};
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
- [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
- [ILA_ATTR_DIR] = { .type = NLA_U32, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
- struct ila_xlat_params *p)
+ struct ila_xlat_params *xp)
{
- memset(p, 0, sizeof(*p));
-
- if (info->attrs[ILA_ATTR_IDENTIFIER])
- p->identifier = (__force __be64)nla_get_u64(
- info->attrs[ILA_ATTR_IDENTIFIER]);
+ memset(xp, 0, sizeof(*xp));
if (info->attrs[ILA_ATTR_LOCATOR])
- p->ip.locator = (__force __be64)nla_get_u64(
+ xp->ip.locator.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR]);
if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
- p->ip.locator_match = (__force __be64)nla_get_u64(
+ xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_IFINDEX])
- p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
+ if (info->attrs[ILA_ATTR_CSUM_MODE])
+ xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- if (info->attrs[ILA_ATTR_DIR])
- p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
+ if (info->attrs[ILA_ATTR_IFINDEX])
+ xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
return 0;
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
+static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
int ifindex,
- unsigned int dir,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params);
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
+ rht_params);
while (ila) {
- if (!ila_cmp_wildcards(ila, loc, ifindex, dir))
+ if (!ila_cmp_wildcards(ila, iaddr, ifindex))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -188,15 +176,16 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p,
+static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ ila = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
while (ila) {
- if (!ila_cmp_params(ila, p))
+ if (!ila_cmp_params(ila, xp))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -221,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, int dir);
+static int ila_xlat_addr(struct sk_buff *skb);
static unsigned int
ila_nf_input(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- ila_xlat_addr(skb, ILA_DIR_IN);
+ ila_xlat_addr(skb);
return NF_ACCEPT;
}
@@ -241,11 +230,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
},
};
-static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
if (!ilan->hooks_registered) {
@@ -264,22 +253,16 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
if (!ila)
return -ENOMEM;
- ila->p = *p;
+ ila_init_saved_csum(&xp->ip);
- if (p->ip.locator_match) {
- /* Precompute checksum difference for translation since we
- * know both the old identifier and the new one.
- */
- ila->p.ip.csum_diff = compute_csum_diff8(
- (__be32 *)&p->ip.locator_match,
- (__be32 *)&p->ip.locator);
- }
+ ila->xp = *xp;
order = ila_order(ila);
spin_lock(lock);
- head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ head = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
if (!head) {
/* New entry for the rhash_table */
@@ -289,7 +272,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
struct ila_map *tila = head, *prev = NULL;
do {
- if (!ila_cmp_params(tila, p)) {
+ if (!ila_cmp_params(tila, xp)) {
err = -EEXIST;
goto out;
}
@@ -326,23 +309,23 @@ out:
return err;
}
-static int ila_del_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head, *prev;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = -ENOENT;
spin_lock(lock);
head = rhashtable_lookup_fast(&ilan->rhash_table,
- &p->identifier, rht_params);
+ &xp->ip.locator_match, rht_params);
ila = head;
prev = NULL;
while (ila) {
- if (ila_cmp_params(ila, p)) {
+ if (ila_cmp_params(ila, xp)) {
prev = ila;
ila = rcu_dereference_protected(ila->next,
lockdep_is_held(lock));
@@ -404,31 +387,28 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
int err;
- err = parse_nl_config(info, &p);
+ err = parse_nl_config(info, &xp);
if (err)
return err;
- ila_del_mapping(net, &p);
+ ila_del_mapping(net, &xp);
return 0;
}
static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
{
- if (nla_put_u64_64bit(msg, ILA_ATTR_IDENTIFIER,
- (__force u64)ila->p.identifier,
- ILA_ATTR_PAD) ||
- nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
- (__force u64)ila->p.ip.locator,
+ if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
+ (__force u64)ila->xp.ip.locator.v64,
ILA_ATTR_PAD) ||
nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH,
- (__force u64)ila->p.ip.locator_match,
+ (__force u64)ila->xp.ip.locator_match.v64,
ILA_ATTR_PAD) ||
- nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir))
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
+ nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
return -1;
return 0;
@@ -460,11 +440,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
struct net *net = genl_info_net(info);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct sk_buff *msg;
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct ila_map *ila;
int ret;
- ret = parse_nl_config(info, &p);
+ ret = parse_nl_config(info, &xp);
if (ret)
return ret;
@@ -474,7 +454,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
rcu_read_lock();
- ila = ila_lookup_by_params(&p, ilan);
+ ila = ila_lookup_by_params(&xp, ilan);
if (ila) {
ret = ila_dump_info(ila,
info->snd_portid,
@@ -617,45 +597,32 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, int dir)
+static int ila_xlat_addr(struct sk_buff *skb)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ila_net *ilan = net_generic(net, ila_net_id);
- __be64 identifier, locator_match;
- size_t nhoff;
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
/* Assumes skb contains a valid IPv6 header that is pulled */
- identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8];
- locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0];
- nhoff = sizeof(struct ipv6hdr);
+ if (!ila_addr_is_ila(iaddr)) {
+ /* Type indicates this is not an ILA address */
+ return 0;
+ }
rcu_read_lock();
- ila = ila_lookup_wildcards(identifier, locator_match,
- skb->dev->ifindex, dir, ilan);
+ ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- update_ipv6_locator(skb, &ila->p.ip);
+ ila_update_ipv6_locator(skb, &ila->xp.ip);
rcu_read_unlock();
return 0;
}
-int ila_xlat_incoming(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_IN);
-}
-EXPORT_SYMBOL(ila_xlat_incoming);
-
-int ila_xlat_outgoing(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_OUT);
-}
-EXPORT_SYMBOL(ila_xlat_outgoing);
-
int ila_xlat_init(void)
{
int ret;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index f1678388fb0d..00cf28ad4565 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -222,7 +222,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
sk_nulls_del_node_init_rcu((struct sock *)tw);
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ea071fad67a0..1bcef2369d64 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -240,6 +240,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
return tb;
}
+EXPORT_SYMBOL_GPL(fib6_new_table);
struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 35d3ddc328f8..b912f0dbaf72 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -373,7 +373,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
struct msghdr msg;
struct flowi6 flowi6;
struct sockcm_cookie sockc_junk;
- int junk;
+ struct ipcm6_cookie ipc6;
err = -ENOMEM;
fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
@@ -390,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
msg.msg_control = (void *)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
- err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
- &junk, &junk, &junk, &sockc_junk);
+ ipc6.opt = fl->opt;
+ err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
if (err)
goto done;
err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ca5a2c5675c5..ee62ec469ab3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -54,6 +54,7 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
+#include <net/gre.h>
static bool log_ecn_error = true;
@@ -342,7 +343,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
goto failed_free;
/* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & GRE_SEQ))
+ if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
dev_hold(dev);
@@ -443,137 +444,41 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->err_time = jiffies;
}
-static int ip6gre_rcv(struct sk_buff *skb)
+static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
{
const struct ipv6hdr *ipv6h;
- u8 *h;
- __be16 flags;
- __sum16 csum = 0;
- __be32 key = 0;
- u32 seqno = 0;
struct ip6_tnl *tunnel;
- int offset = 4;
- __be16 gre_proto;
- int err;
-
- if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
- goto drop;
ipv6h = ipv6_hdr(skb);
- h = skb->data;
- flags = *(__be16 *)h;
-
- if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
- /* - Version must be 0.
- - We do not support routing headers.
- */
- if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop;
-
- if (flags&GRE_CSUM) {
- csum = skb_checksum_simple_validate(skb);
- offset += 4;
- }
- if (flags&GRE_KEY) {
- key = *(__be32 *)(h + offset);
- offset += 4;
- }
- if (flags&GRE_SEQ) {
- seqno = ntohl(*(__be32 *)(h + offset));
- offset += 4;
- }
- }
-
- gre_proto = *(__be16 *)(h + 2);
-
tunnel = ip6gre_tunnel_lookup(skb->dev,
- &ipv6h->saddr, &ipv6h->daddr, key,
- gre_proto);
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ tpi->proto);
if (tunnel) {
- struct pcpu_sw_netstats *tstats;
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto drop;
-
- if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
- tunnel->dev->stats.rx_dropped++;
- goto drop;
- }
-
- skb->protocol = gre_proto;
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IPv6
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
- skb->protocol = htons(ETH_P_IPV6);
- if ((*(h + offset) & 0xF0) != 0x40)
- offset += 4;
- }
-
- skb->mac_header = skb->network_header;
- __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
-
- if (((flags&GRE_CSUM) && csum) ||
- (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- tunnel->dev->stats.rx_crc_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- if (tunnel->parms.i_flags&GRE_SEQ) {
- if (!(flags&GRE_SEQ) ||
- (tunnel->i_seqno &&
- (s32)(seqno - tunnel->i_seqno) < 0)) {
- tunnel->dev->stats.rx_fifo_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- tunnel->i_seqno = seqno + 1;
- }
-
- /* Warning: All skb pointers will be invalidated! */
- if (tunnel->dev->type == ARPHRD_ETHER) {
- if (!pskb_may_pull(skb, ETH_HLEN)) {
- tunnel->dev->stats.rx_length_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- ipv6h = ipv6_hdr(skb);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- }
-
- __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+ return PACKET_RCVD;
+ }
- skb_reset_network_header(skb);
+ return PACKET_REJECT;
+}
- err = IP6_ECN_decapsulate(ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
- goto drop;
- }
- }
+static int gre_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+ int hdr_len;
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+ if (hdr_len < 0)
+ goto drop;
- netif_rx(skb);
+ if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
+ goto drop;
+ if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
- }
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
return 0;
@@ -584,199 +489,40 @@ struct ipv6_tel_txoption {
__u8 dst_opt[8];
};
-static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
- memset(opt, 0, sizeof(struct ipv6_tel_txoption));
-
- opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
- opt->dst_opt[3] = 1;
- opt->dst_opt[4] = encap_limit;
- opt->dst_opt[5] = IPV6_TLV_PADN;
- opt->dst_opt[6] = 1;
-
- opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
- opt->ops.opt_nflen = 8;
+ return iptunnel_handle_offloads(skb,
+ csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
-static __sum16 gre6_checksum(struct sk_buff *skb)
-{
- __wsum csum;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- csum = lco_csum(skb);
- else
- csum = skb_checksum(skb, sizeof(struct ipv6hdr),
- skb->len - sizeof(struct ipv6hdr), 0);
- return csum_fold(csum);
-}
-
-static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
- struct net_device *dev,
- __u8 dsfield,
- struct flowi6 *fl6,
- int encap_limit,
- __u32 *pmtu)
+static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
+ struct net_device *dev, __u8 dsfield,
+ struct flowi6 *fl6, int encap_limit,
+ __u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- struct net *net = tunnel->net;
- struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int min_headroom = 0; /* The extra header space needed */
- int gre_hlen;
- struct ipv6_tel_txoption opt;
- int mtu;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct net_device_stats *stats = &tunnel->dev->stats;
- int err = -1;
- u8 proto;
- __be16 protocol;
+ __be16 protocol = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : proto;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
- if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
- gre_hlen = 0;
- ipv6h = (struct ipv6hdr *)skb->data;
- fl6->daddr = ipv6h->daddr;
- } else {
- gre_hlen = tunnel->hlen;
+ if (dev->header_ops && dev->type == ARPHRD_IP6GRE)
+ fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr;
+ else
fl6->daddr = tunnel->parms.raddr;
- }
-
- if (!fl6->flowi6_mark)
- dst = dst_cache_get(&tunnel->dst_cache);
-
- if (!dst) {
- dst = ip6_route_output(net, NULL, fl6);
-
- if (dst->error)
- goto tx_err_link_failure;
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
- goto tx_err_link_failure;
- }
- ndst = dst;
- }
-
- tdev = dst->dev;
- if (tdev == dev) {
- stats->collisions++;
- net_warn_ratelimited("%s: Local routing loop detected!\n",
- tunnel->parms.name);
- goto tx_err_dst_release;
- }
-
- mtu = dst_mtu(dst) - sizeof(*ipv6h);
- if (encap_limit >= 0) {
- min_headroom += 8;
- mtu -= 8;
- }
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu && !skb_is_gso(skb)) {
- *pmtu = mtu;
- err = -EMSGSIZE;
- goto tx_err_dst_release;
- }
-
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
-
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
- skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
-
- min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
-
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
-
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (min_headroom > dev->needed_headroom)
- dev->needed_headroom = min_headroom;
- if (unlikely(err))
- goto tx_err_dst_release;
- }
-
- if (!fl6->flowi6_mark && ndst)
- dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
- skb_dst_set(skb, dst);
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
- proto = NEXTHDR_GRE;
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
- }
-
- err = iptunnel_handle_offloads(skb,
- (tunnel->parms.o_flags & GRE_CSUM) ?
- SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
- if (err)
- goto tx_err_dst_release;
-
- skb_push(skb, gre_hlen);
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*ipv6h));
-
- /*
- * Push down and install the IP header.
- */
- ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
- ipv6h->hop_limit = tunnel->parms.hop_limit;
- ipv6h->nexthdr = proto;
- ipv6h->saddr = fl6->saddr;
- ipv6h->daddr = fl6->daddr;
-
- ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
- protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
- ((__be16 *)(ipv6h + 1))[1] = protocol;
-
- if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
-
- if (tunnel->parms.o_flags&GRE_SEQ) {
- ++tunnel->o_seqno;
- *ptr = htonl(tunnel->o_seqno);
- ptr--;
- }
- if (tunnel->parms.o_flags&GRE_KEY) {
- *ptr = tunnel->parms.o_key;
- ptr--;
- }
- if ((tunnel->parms.o_flags & GRE_CSUM) &&
- !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
- *ptr = 0;
- *(__sum16 *)ptr = gre6_checksum(skb);
- }
- }
+ /* Push GRE header. */
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
skb_set_inner_protocol(skb, protocol);
- ip6tunnel_xmit(NULL, skb, dev);
- return 0;
-tx_err_link_failure:
- stats->tx_carrier_errors++;
- dst_link_failure(skb);
-tx_err_dst_release:
- dst_release(dst);
- return err;
+ return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
+ NEXTHDR_GRE);
}
static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
@@ -795,7 +541,6 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv4_get_dsfield(iph);
@@ -805,7 +550,12 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ if (err)
+ return -1;
+
+ err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ skb->protocol);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -845,7 +595,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -855,7 +604,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
+ return -1;
+
+ err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
+ &mtu, skb->protocol);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -899,7 +652,11 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = skb->protocol;
- err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+ err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ if (err)
+ return err;
+
+ err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
return err;
}
@@ -943,7 +700,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
- int addend = sizeof(struct ipv6hdr) + 4;
+ int t_hlen;
if (dev->type != ARPHRD_ETHER) {
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -970,16 +727,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
else
dev->flags &= ~IFF_POINTOPOINT;
- /* Precalculate GRE options length */
- if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
- if (t->parms.o_flags&GRE_CSUM)
- addend += 4;
- if (t->parms.o_flags&GRE_KEY)
- addend += 4;
- if (t->parms.o_flags&GRE_SEQ)
- addend += 4;
- }
- t->hlen = addend;
+ t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
+
+ t->hlen = t->tun_hlen;
+
+ t_hlen = t->hlen + sizeof(struct ipv6hdr);
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
@@ -993,10 +745,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
return;
if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+ dev->hard_header_len = rt->dst.dev->hard_header_len +
+ t_hlen;
if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - addend;
+ dev->mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->type == ARPHRD_ETHER)
@@ -1042,8 +795,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
p->link = u->link;
p->i_key = u->i_key;
p->o_key = u->o_key;
- p->i_flags = u->i_flags;
- p->o_flags = u->o_flags;
+ p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
+ p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
memcpy(p->name, u->name, sizeof(u->name));
}
@@ -1060,8 +813,8 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
u->link = p->link;
u->i_key = p->i_key;
u->o_key = p->o_key;
- u->i_flags = p->i_flags;
- u->o_flags = p->o_flags;
+ u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
memcpy(u->name, p->name, sizeof(u->name));
}
@@ -1075,6 +828,8 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
struct net *net = t->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ign->fb_tunnel_dev) {
@@ -1174,15 +929,6 @@ done:
return err;
}
-static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len)
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr, unsigned int len)
@@ -1226,7 +972,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_uninit = ip6gre_tunnel_uninit,
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_do_ioctl = ip6gre_tunnel_ioctl,
- .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1242,17 +988,11 @@ static void ip6gre_dev_free(struct net_device *dev)
static void ip6gre_tunnel_setup(struct net_device *dev)
{
- struct ip6_tnl *t;
-
dev->netdev_ops = &ip6gre_netdev_ops;
dev->destructor = ip6gre_dev_free;
dev->type = ARPHRD_IP6GRE;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
- t = netdev_priv(dev);
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
netif_keep_dst(dev);
@@ -1262,6 +1002,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
int ret;
+ int t_hlen;
tunnel = netdev_priv(dev);
@@ -1280,6 +1021,17 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
return ret;
}
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+
+ tunnel->hlen = tunnel->tun_hlen;
+
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
return 0;
}
@@ -1318,7 +1070,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
static struct inet6_protocol ip6gre_protocol __read_mostly = {
- .handler = ip6gre_rcv,
+ .handler = gre_rcv,
.err_handler = ip6gre_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -1462,10 +1214,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ parms->i_flags = gre_flags_to_tnl_flags(
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ parms->o_flags = gre_flags_to_tnl_flags(
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1514,7 +1268,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1560,7 +1314,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
- if (!(nt->parms.o_flags & GRE_SEQ)) {
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
/* TCP segmentation offload is not supported when we
* generate output sequences.
*/
@@ -1657,8 +1411,10 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct __ip6_tnl_parm *p = &t->parms;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+ gre_tnl_flags_to_gre_flags(p->o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c05c425c2389..f185cbcda114 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,13 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_rcv(skb);
+ if (!skb)
+ return NET_RX_SUCCESS;
+
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
@@ -78,11 +85,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
idev = __in6_dev_get(skb->dev);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+ __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -109,10 +116,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
- IP6_ADD_STATS_BH(net, idev,
- IPSTATS_MIB_NOECTPKTS +
+ __IP6_ADD_STATS(net, idev,
+ IPSTATS_MIB_NOECTPKTS +
(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
- max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* A packet received on an interface with a destination address
@@ -169,12 +176,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
/* pkt_len may be zero if Jumbo payload option is present */
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
- IP6_INC_STATS_BH(net,
- idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net,
+ idev, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
hdr = ipv6_hdr(skb);
@@ -182,7 +189,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->nexthdr == NEXTHDR_HOP) {
if (ipv6_parse_hopopts(skb) < 0) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return NET_RX_DROP;
}
@@ -197,7 +204,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
net, NULL, skb, dev, NULL,
ip6_rcv_finish);
err:
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
drop:
rcu_read_unlock();
kfree_skb(skb);
@@ -259,18 +266,18 @@ resubmit:
if (ret > 0)
goto resubmit;
else if (ret == 0)
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
} else {
if (!raw) {
if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP6_INC_STATS_BH(net, idev,
- IPSTATS_MIB_INUNKNOWNPROTOS);
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
}
kfree_skb(skb);
} else {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
}
}
@@ -278,7 +285,7 @@ resubmit:
return 0;
discard:
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
kfree_skb(skb);
return 0;
@@ -297,7 +304,7 @@ int ip6_mc_input(struct sk_buff *skb)
const struct ipv6hdr *hdr;
bool deliver;
- IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+ __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
skb->len);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 171518e3ca21..cbf127ae7c67 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -395,8 +395,8 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -427,8 +427,8 @@ int ip6_forward(struct sk_buff *skb)
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -441,15 +441,15 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb_dst(skb);
@@ -505,17 +505,17 @@ int ip6_forward(struct sk_buff *skb)
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INTOOBIGERRORS);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_FRAGFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INTOOBIGERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_OUTDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
@@ -525,14 +525,14 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -1182,12 +1182,12 @@ static void ip6_append_data_mtu(unsigned int *mtu,
}
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
- struct inet6_cork *v6_cork,
- int hlimit, int tclass, struct ipv6_txoptions *opt,
+ struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
struct rt6_info *rt, struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
+ struct ipv6_txoptions *opt = ipc6->opt;
/*
* setup for corking
@@ -1229,8 +1229,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
dst_hold(&rt->dst);
cork->base.dst = &rt->dst;
cork->fl.u.ip6 = *fl6;
- v6_cork->hop_limit = hlimit;
- v6_cork->tclass = tclass;
+ v6_cork->hop_limit = ipc6->hlimit;
+ v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
@@ -1258,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- unsigned int flags, int dontfrag,
+ unsigned int flags, struct ipcm6_cookie *ipc6,
const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
@@ -1298,7 +1298,7 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- if (cork->length + length > mtu - headersize && dontfrag &&
+ if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
@@ -1564,9 +1564,9 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen, int hlimit,
- int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag,
+ void *from, int length, int transhdrlen,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+ struct rt6_info *rt, unsigned int flags,
const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1580,12 +1580,12 @@ int ip6_append_data(struct sock *sk,
/*
* setup for corking
*/
- err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
- tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &inet->cork, &np->cork,
+ ipc6, rt, fl6);
if (err)
return err;
- exthdrlen = (opt ? opt->opt_flen : 0);
+ exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
@@ -1595,8 +1595,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, dontfrag,
- sockc);
+ from, length, transhdrlen, flags, ipc6, sockc);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1752,15 +1751,14 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- int hlimit, int tclass,
- struct ipv6_txoptions *opt, struct flowi6 *fl6,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag, const struct sockcm_cookie *sockc)
+ const struct sockcm_cookie *sockc)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
- int exthdrlen = (opt ? opt->opt_flen : 0);
+ int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
if (flags & MSG_PROBE)
@@ -1772,17 +1770,17 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.addr = 0;
cork.base.opt = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
if (err)
return ERR_PTR(err);
- if (dontfrag < 0)
- dontfrag = inet6_sk(sk)->dontfrag;
+ if (ipc6->dontfrag < 0)
+ ipc6->dontfrag = inet6_sk(sk)->dontfrag;
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, dontfrag, sockc);
+ flags, ipc6, sockc);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1f20345cbc97..e79330f214bd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
@@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
-/**
- * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
- * @skb: received socket buffer
- * @protocol: ethernet protocol ID
- * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
- *
- * Return: 0
- **/
-
-static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
- __u8 ipproto,
- int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
+static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb),
+ bool log_ecn_err)
{
- struct ip6_tnl *t;
+ struct pcpu_sw_netstats *tstats;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u8 tproto;
int err;
- rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
- if (t) {
- struct pcpu_sw_netstats *tstats;
+ if ((!(tpi->flags & TUNNEL_CSUM) &&
+ (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
+ ((tpi->flags & TUNNEL_CSUM) &&
+ !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
- tproto = ACCESS_ONCE(t->parms.proto);
- if (tproto != ipproto && tproto != 0) {
- rcu_read_unlock();
- goto discard;
+ if (tunnel->parms.i_flags & TUNNEL_SEQ) {
+ if (!(tpi->flags & TUNNEL_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
+ tunnel->i_seqno = ntohl(tpi->seq) + 1;
+ }
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- rcu_read_unlock();
- goto discard;
- }
+ skb->protocol = tpi->proto;
- if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
- t->dev->stats.rx_dropped++;
- rcu_read_unlock();
- goto discard;
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
- skb->mac_header = skb->network_header;
- skb_reset_network_header(skb);
- skb->protocol = htons(protocol);
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-
- __skb_tunnel_rx(skb, t->dev, t->net);
-
- err = dscp_ecn_decapsulate(t, ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++t->dev->stats.rx_frame_errors;
- ++t->dev->stats.rx_errors;
- rcu_read_unlock();
- goto discard;
- }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ } else {
+ skb->dev = tunnel->dev;
+ }
+
+ skb_reset_network_header(skb);
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+ err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_err)
+ net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
}
+ }
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
- netif_rx(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
- rcu_read_unlock();
- return 0;
+ gro_cells_receive(&tunnel->gro_cells, skb);
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ bool log_ecn_err)
+{
+ return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ log_ecn_err);
+}
+EXPORT_SYMBOL(ip6_tnl_rcv);
+
+static const struct tnl_ptk_info tpi_v6 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IPV6),
+};
+
+static const struct tnl_ptk_info tpi_v4 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
+ const struct tnl_ptk_info *tpi,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int ret = -1;
+
+ rcu_read_lock();
+ t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+
+ if (t) {
+ u8 tproto = ACCESS_ONCE(t->parms.proto);
+
+ if (tproto != ipproto && tproto != 0)
+ goto drop;
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
+ goto drop;
+ ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+ log_ecn_error);
}
+
rcu_read_unlock();
- return 1;
-discard:
+ return ret;
+
+drop:
+ rcu_read_unlock();
kfree_skb(skb);
return 0;
}
static int ip4ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
- ip4ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
+ ip4ip6_dscp_ecn_decapsulate);
}
static int ip6ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
- ip6ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
+ ip6ip6_dscp_ecn_decapsulate);
}
struct ipv6_tel_txoption {
@@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
/**
- * ip6_tnl_xmit2 - encapsulate packet and send
+ * ip6_tnl_xmit - encapsulate packet and send
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
* @dsfield: dscp code for outer header
- * @fl: flow of tunneled packet
+ * @fl6: flow of tunneled packet
* @encap_limit: encapsulation limit
* @pmtu: Path MTU is stored if packet is too big
+ * @proto: next header value
*
* Description:
* Build new header and do some sanity checks on the packet before sending
@@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
* %-EMSGSIZE message too big. return mtu in this case.
**/
-static int ip6_tnl_xmit2(struct sk_buff *skb,
- struct net_device *dev,
- __u8 dsfield,
- struct flowi6 *fl6,
- int encap_limit,
- __u32 *pmtu)
+int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
+ struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
+ __u8 proto)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = t->net;
@@ -952,7 +1011,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
struct net_device *tdev;
int mtu;
unsigned int max_headroom = sizeof(struct ipv6hdr);
- u8 proto;
int err = -1;
/* NBMA tunnel */
@@ -1014,12 +1072,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
}
+ if (t->err_count > 0) {
+ if (time_before(jiffies,
+ t->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ t->err_count--;
+
+ dst_link_failure(skb);
+ } else {
+ t->err_count = 0;
+ }
+ }
+
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
/*
@@ -1045,9 +1114,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
- skb->transport_header = skb->network_header;
-
- proto = fl6->flowi6_proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
@@ -1058,6 +1124,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb->encapsulation = 1;
}
+ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ + dst->header_len;
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1076,6 +1147,7 @@ tx_err_dst_release:
dst_release(dst);
return err;
}
+EXPORT_SYMBOL(ip6_tnl_xmit);
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1099,7 +1171,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
dsfield = ipv4_get_dsfield(iph);
@@ -1109,7 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPIP);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -1153,7 +1225,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -1163,7 +1234,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPV6);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1174,7 +1246,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
}
static netdev_tx_t
-ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
@@ -1370,6 +1442,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
@@ -1464,8 +1538,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
* %-EINVAL if mtu too small
**/
-static int
-ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
@@ -1481,6 +1554,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_change_mtu);
int ip6_tnl_get_iflink(const struct net_device *dev)
{
@@ -1493,7 +1567,7 @@ EXPORT_SYMBOL(ip6_tnl_get_iflink);
static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
- .ndo_start_xmit = ip6_tnl_xmit,
+ .ndo_start_xmit = ip6_tnl_start_xmit,
.ndo_do_ioctl = ip6_tnl_ioctl,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats = ip6_get_stats,
@@ -1549,13 +1623,25 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
- if (ret) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
- }
+ if (ret)
+ goto free_stats;
+
+ ret = gro_cells_init(&t->gro_cells, dev);
+ if (ret)
+ goto destroy_dst;
+
+ t->hlen = 0;
+ t->tun_hlen = 0;
return 0;
+
+destroy_dst:
+ dst_cache_destroy(&t->dst_cache);
+free_stats:
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+
+ return ret;
}
/**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bf678324fd52..f2e2013f8346 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1984,10 +1984,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4ff4b29894eb..a9895e15ee9c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -473,7 +473,7 @@ sticky_done:
struct msghdr msg;
struct flowi6 fl6;
struct sockcm_cookie sockc_junk;
- int junk;
+ struct ipcm6_cookie ipc6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -503,9 +503,9 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void *)(opt+1);
+ ipc6.opt = opt;
- retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
- &junk, &junk, &sockc_junk);
+ retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
if (retv)
goto done;
update:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73e606c719ef..63e06c3dd319 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,34 +39,12 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv6 packet filter");
-/*#define DEBUG_IP_FIREWALL*/
-/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) pr_info(format , ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) pr_info(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
#ifdef CONFIG_NETFILTER_DEBUG
#define IP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define IP_NF_ASSERT(x)
#endif
-#if 0
-/* All the better to debug you with... */
-#define static
-#define inline
-#endif
-
void *ip6t_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(ip6t, IP6T);
@@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb,
if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
&ip6info->src), IP6T_INV_SRCIP) ||
FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
- &ip6info->dst), IP6T_INV_DSTIP)) {
- dprintf("Source or dest mismatch.\n");
-/*
- dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
- ipinfo->smsk.s_addr, ipinfo->src.s_addr,
- ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
- dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
- ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
- ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+ &ip6info->dst), IP6T_INV_DSTIP))
return false;
- }
ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
- dprintf("VIA in mismatch (%s vs %s).%s\n",
- indev, ip6info->iniface,
- ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
+ if (FWINV(ret != 0, IP6T_INV_VIA_IN))
return false;
- }
ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
- dprintf("VIA out mismatch (%s vs %s).%s\n",
- outdev, ip6info->outiface,
- ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
+ if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
return false;
- }
/* ... might want to do something with class and flowlabel here ... */
@@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb,
}
*fragoff = _frag_off;
- dprintf("Packet protocol %hi ?= %s%hi.\n",
- protohdr,
- ip6info->invflags & IP6T_INV_PROTO ? "!":"",
- ip6info->proto);
-
if (ip6info->proto == protohdr) {
if (ip6info->invflags & IP6T_INV_PROTO)
return false;
@@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb,
static bool
ip6_checkentry(const struct ip6t_ip6 *ipv6)
{
- if (ipv6->flags & ~IP6T_F_MASK) {
- duprintf("Unknown flag bits set: %08X\n",
- ipv6->flags & ~IP6T_F_MASK);
+ if (ipv6->flags & ~IP6T_F_MASK)
return false;
- }
- if (ipv6->invflags & ~IP6T_INV_MASK) {
- duprintf("Unknown invflag bits set: %08X\n",
- ipv6->invflags & ~IP6T_INV_MASK);
+ if (ipv6->invflags & ~IP6T_INV_MASK)
return false;
- }
+
return true;
}
@@ -446,13 +397,9 @@ ip6t_do_table(struct sk_buff *skb,
xt_write_recseq_end(addend);
local_bh_enable();
-#ifdef DEBUG_ALLOW_ALL
- return NF_ACCEPT;
-#else
if (acpar.hotdrop)
return NF_DROP;
else return verdict;
-#endif
}
static bool find_jump_target(const struct xt_table_info *t,
@@ -492,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
= (void *)ip6t_get_target_c(e);
int visited = e->comefrom & (1 << hook);
- if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
- pr_err("iptables: loop hook %u pos %u %08X.\n",
- hook, pos, e->comefrom);
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS))
return 0;
- }
+
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
@@ -508,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
if ((strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("mark_source_chains: bad "
- "negative verdict (%i)\n",
- t->verdict);
+ t->verdict < -NF_MAX_VERDICT - 1)
return 0;
- }
/* Return: backtrack through the last
big jump. */
do {
e->comefrom ^= (1<<NF_INET_NUMHOOKS);
-#ifdef DEBUG_IP_FIREWALL_USER
- if (e->comefrom
- & (1 << NF_INET_NUMHOOKS)) {
- duprintf("Back unset "
- "on hook %u "
- "rule %u\n",
- hook, pos);
- }
-#endif
oldpos = pos;
pos = e->counters.pcnt;
e->counters.pcnt = 0;
@@ -555,8 +487,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
e = (struct ip6t_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
@@ -573,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
-next:
- duprintf("Finished chain %u\n", hook);
+next: ;
}
return 1;
}
@@ -595,19 +524,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ip6t_ip6 *ipv6 = par->entryinfo;
- int ret;
par->match = m->u.kernel.match;
par->matchinfo = m->data;
- ret = xt_check_match(par, m->u.match_size - sizeof(*m),
- ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
- if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
- par.match->name);
- return ret;
- }
- return 0;
+ return xt_check_match(par, m->u.match_size - sizeof(*m),
+ ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
}
static int
@@ -618,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
+
m->u.kernel.match = match;
ret = check_match(m, par);
@@ -646,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
.hook_mask = e->comefrom,
.family = NFPROTO_IPV6,
};
- int ret;
t = ip6t_get_target(e);
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
- e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
- if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- return ret;
- }
- return 0;
+ return xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ipv6.proto,
+ e->ipv6.invflags & IP6T_INV_PROTO);
}
static int
@@ -669,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -691,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
ret = PTR_ERR(target);
goto cleanup_matches;
}
@@ -744,17 +660,12 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
(unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p\n", e);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset
- < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target))
return -EINVAL;
- }
if (!ip6_checkentry(&e->ipv6))
return -EINVAL;
@@ -771,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
- if (!check_underflow(e)) {
- pr_debug("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ if (!check_underflow(e))
return -EINVAL;
- }
+
newinfo->underflow[h] = underflows[h];
}
}
@@ -828,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
- duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -845,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
++newinfo->stacksize;
}
- if (i != repl->num_entries) {
- duprintf("translate_table: %u not %u entries\n",
- i, repl->num_entries);
+ if (i != repl->num_entries)
return -EINVAL;
- }
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
/* Only hooks which are valid */
if (!(repl->valid_hooks & (1 << i)))
continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, repl->hook_entry[i]);
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
- }
- if (newinfo->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, repl->underflow[i]);
+ if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
- }
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1093,11 +991,8 @@ static int get_info(struct net *net, void __user *user,
struct xt_table *t;
int ret;
- if (*len != sizeof(struct ip6t_getinfo)) {
- duprintf("length %u != %zu\n", *len,
- sizeof(struct ip6t_getinfo));
+ if (*len != sizeof(struct ip6t_getinfo))
return -EINVAL;
- }
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
@@ -1155,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
struct ip6t_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct ip6t_get_entries) + get.size) {
- duprintf("get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct ip6t_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR_OR_NULL(t)) {
struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
- else {
- duprintf("get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else
ret = -EAGAIN;
- }
+
module_put(t->me);
xt_table_unlock(t);
} else
@@ -1215,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
/* You lied! */
if (valid_hooks != t->valid_hooks) {
- duprintf("Valid hook crap: %08X vs %08X\n",
- valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
@@ -1226,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto put_module;
/* Update module usage count based on number of rules */
- duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
- oldinfo->number, oldinfo->initial_entries, newinfo->number);
if ((oldinfo->number > oldinfo->initial_entries) ||
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
@@ -1296,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (ret != 0)
goto free_newinfo;
- duprintf("ip_tables: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
@@ -1422,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m,
match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("compat_check_calc_match: `%s' not found\n",
- m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
+
m->u.kernel.match = match;
*size += xt_compat_match_offset(match);
return 0;
@@ -1458,20 +1338,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
unsigned int j;
int ret, off;
- duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
(unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p, limit = %p\n", e, limit);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset < sizeof(struct compat_ip6t_entry) +
- sizeof(struct compat_xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ sizeof(struct compat_xt_entry_target))
return -EINVAL;
- }
if (!ip6_checkentry(&e->ipv6))
return -EINVAL;
@@ -1495,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
- t->u.user.name);
ret = PTR_ERR(target);
goto release_matches;
}
@@ -1575,7 +1447,6 @@ translate_compat_table(struct net *net,
size = compatr->size;
info->number = compatr->num_entries;
- duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
xt_compat_init_offsets(AF_INET6, compatr->num_entries);
@@ -1590,11 +1461,8 @@ translate_compat_table(struct net *net,
}
ret = -EINVAL;
- if (j != compatr->num_entries) {
- duprintf("translate_compat_table: %u not %u entries\n",
- j, compatr->num_entries);
+ if (j != compatr->num_entries)
goto out_unlock;
- }
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
@@ -1685,8 +1553,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (ret != 0)
goto free_newinfo;
- duprintf("compat_do_replace: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
@@ -1720,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
break;
default:
- duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1770,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
struct compat_ip6t_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
- duprintf("compat_get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct compat_ip6t_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
xt_compat_lock(AF_INET6);
@@ -1790,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
- duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
- if (!ret && get.size == info.size) {
+ if (!ret && get.size == info.size)
ret = compat_copy_entries_to_user(private->size,
t, uptr->entrytable);
- } else if (!ret) {
- duprintf("compat_get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else if (!ret)
ret = -EAGAIN;
- }
+
xt_compat_flush_offsets(AF_INET6);
module_put(t->me);
xt_table_unlock(t);
@@ -1852,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
default:
- duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1904,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
default:
- duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -2006,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
- duprintf("Dropping evil ICMP tinygram.\n");
par->hotdrop = true;
return false;
}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5d778dd11f66..06bed74cf5ee 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -60,7 +60,7 @@ synproxy_send_tcp(struct net *net,
fl6.fl6_dport = nth->dest;
security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
- if (dst == NULL || dst->error) {
+ if (dst->error) {
dst_release(dst);
goto free_nskb;
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index da1cff79e447..3ee3e444a66b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -58,11 +58,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int iif = 0;
struct flowi6 fl6;
int err;
- int hlimit;
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
struct sockcm_cookie junk = {0};
+ struct ipcm6_cookie ipc6;
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -139,13 +139,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET6;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = np->tclass;
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
- 0, hlimit,
- np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag, &junk);
+ 0, &ipc6, &fl6, rt,
+ MSG_DONTWAIT, &junk);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b07ce21983aa..896350df6423 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -746,10 +746,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct raw6_frag_vec rfv;
struct flowi6 fl6;
struct sockcm_cookie sockc;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
u16 proto;
int err;
@@ -770,6 +768,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = sk->sk_mark;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+ ipc6.opt = NULL;
+
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -827,10 +830,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -846,7 +848,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!opt) {
opt = txopt_get(np);
opt_to_free = opt;
- }
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -881,14 +883,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -897,10 +899,11 @@ back_from_confirm:
if (inet->hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
+ ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc);
+ len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ msg->msg_flags, &sockc);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e2ea31175ef9..2160d5d009cb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -145,12 +145,12 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev)
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
@@ -223,8 +223,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((u8 *)&fhdr->frag_off -
skb_network_header(skb)));
@@ -258,8 +258,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, payload_len));
return -1;
@@ -361,8 +361,8 @@ found:
discard_fq:
inet_frag_kill(&fq->q, &ip6_frags);
err:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
@@ -500,7 +500,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
skb_network_header_len(head));
rcu_read_lock();
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
@@ -513,7 +513,7 @@ out_oom:
net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail:
rcu_read_lock();
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
return -1;
}
@@ -528,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len == 0)
@@ -544,8 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (!(fhdr->frag_off & htons(0xFFF9))) {
/* It is not a fragmented frame */
skb->transport_header += sizeof(struct frag_hdr);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
+ __IP6_INC_STATS(net,
+ ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
@@ -566,13 +566,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return ret;
}
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
fail_hdr:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d916d6ab9ad2..c42fa1deb152 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1190,7 +1190,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct dst_entry *dst;
bool any_src;
- dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ dst = l3mdev_get_rt6_dst(net, fl6);
if (dst)
return dst;
@@ -1769,6 +1769,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
+static struct rt6_info *ip6_nh_lookup_table(struct net *net,
+ struct fib6_config *cfg,
+ const struct in6_addr *gw_addr)
+{
+ struct flowi6 fl6 = {
+ .flowi6_oif = cfg->fc_ifindex,
+ .daddr = *gw_addr,
+ .saddr = cfg->fc_prefsrc,
+ };
+ struct fib6_table *table;
+ struct rt6_info *rt;
+ int flags = 0;
+
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table)
+ return NULL;
+
+ if (!ipv6_addr_any(&cfg->fc_prefsrc))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+
+ /* if table lookup failed, fall back to full lookup */
+ if (rt == net->ipv6.ip6_null_entry) {
+ ip6_rt_put(rt);
+ rt = NULL;
+ }
+
+ return rt;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1940,7 +1971,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt;
+ struct rt6_info *grt = NULL;
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
@@ -1952,7 +1983,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ if (cfg->fc_table)
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr);
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL,
+ cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
if (!grt)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index aab91fa86c5e..59c483937aec 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -155,11 +155,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
if (mss == 0) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 800265c7fd3f..c4efaa97280c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
skb->dev->ifindex);
if (!sk) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -352,13 +352,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
@@ -368,7 +368,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -649,12 +649,12 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
if (hash_expected && !hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}
if (!hash_expected && hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
@@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else
+ else {
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
+
fl6.flowi6_oif = oif;
+ }
+
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
@@ -825,9 +830,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
- TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+ TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
return;
}
@@ -1165,7 +1170,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
return newsk;
out_overflow:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
dst_release(dst);
out:
@@ -1276,8 +1281,8 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1359,7 +1364,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
/*
* Count it even if it's bad.
*/
- TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1421,7 +1426,7 @@ process:
}
}
if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
@@ -1454,7 +1459,7 @@ process:
} else if (unlikely(sk_add_backlog(sk, skb,
sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
- NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1472,9 +1477,9 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
- TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+ __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ __TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
tcp_v6_send_reset(NULL, skb);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8d8b2cd8ec5b..aca06094110f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -423,24 +423,22 @@ try_again:
if (!peeked) {
atomic_inc(&sk->sk_drops);
if (is_udp4)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS,
- is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ is_udplite);
else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS,
- is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ is_udplite);
}
skb_free_datagram_locked(sk, skb);
return err;
}
if (!peeked) {
if (is_udp4)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
+ is_udplite);
else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
+ is_udplite);
}
sock_recv_ts_and_drops(msg, sk, skb);
@@ -487,15 +485,15 @@ csum_copy_err:
slow = lock_sock_fast(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
if (is_udp4) {
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
} else {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
}
}
unlock_sock_fast(sk, slow);
@@ -523,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), udptable, skb);
if (!sk) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -572,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
@@ -630,9 +628,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
ret = encap_rcv(sk, skb);
if (ret <= 0) {
- UDP_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INDATAGRAMS,
- is_udplite);
+ __UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
+ is_udplite);
return -ret;
}
}
@@ -666,8 +664,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
}
@@ -686,9 +684,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return rc;
csum_error:
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
@@ -771,10 +769,10 @@ start_lookup:
nskb = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!nskb)) {
atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
+ __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ __UDP6_INC_STATS(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
continue;
}
@@ -793,8 +791,8 @@ start_lookup:
consume_skb(skb);
} else {
kfree_skb(skb);
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -887,7 +885,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
kfree_skb(skb);
@@ -901,9 +899,9 @@ short_packet:
daddr, ntohs(uh->dest));
goto discard;
csum_error:
- UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
- UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
@@ -1015,13 +1013,14 @@ send:
err = ip6_send_skb(skb);
if (err) {
if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
- } else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_OUTDATAGRAMS, is_udplite);
+ } else {
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
+ }
return err;
}
@@ -1065,11 +1064,9 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int ulen = len;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
int connected = 0;
@@ -1077,6 +1074,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sockcm_cookie sockc;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+
/* destination address check */
if (sin6) {
if (addr_len < offsetof(struct sockaddr, sa_data))
@@ -1201,10 +1202,9 @@ do_udp_sendmsg:
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1225,6 +1225,7 @@ do_udp_sendmsg:
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
+ ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
@@ -1254,11 +1255,11 @@ do_udp_sendmsg:
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -1269,9 +1270,9 @@ back_from_confirm:
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt,
+ sizeof(struct udphdr), &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc);
+ msg->msg_flags, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1292,14 +1293,12 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
up->len += ulen;
- err = ip6_append_data(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
- (struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
- &sockc);
+ err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
@@ -1342,8 +1341,8 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index fcfbe579434a..d8b7267280c3 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -181,7 +181,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
skb = new_skb;
}
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
len = skb->len;
/* Now queue the packet in the transport layer */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index afca2eb4dfa7..6edfa9980314 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
sizeof(udp_conf.peer_ip6));
udp_conf.use_udp6_tx_checksums =
- cfg->udp6_zero_tx_checksums;
+ ! cfg->udp6_zero_tx_checksums;
udp_conf.use_udp6_rx_checksums =
- cfg->udp6_zero_rx_checksums;
+ ! cfg->udp6_zero_rx_checksums;
} else
#endif
{
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 46e07267e503..c6f5df1bed12 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -495,10 +495,8 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst = NULL;
struct flowi6 fl6;
struct sockcm_cookie sockc_unused = {0};
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
int transhdrlen = 4; /* zero session-id */
int ulen = len + transhdrlen;
int err;
@@ -520,6 +518,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = sk->sk_mark;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+
if (lsa) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -564,11 +566,11 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc_unused);
- if (err < 0) {
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
+ &sockc_unused);
+ if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
@@ -588,6 +590,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
+ ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
@@ -612,14 +615,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
@@ -627,9 +630,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
- ulen, transhdrlen, hlimit, tclass, opt,
+ ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc_unused);
+ msg->msg_flags, &sockc_unused);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index e925037fa0df..6651a78e100c 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -97,3 +97,66 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
return tb_id;
}
EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
+
+/**
+ * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns
+ * cached route for L3 master device if relevant
+ * to flow
+ * @net: network namespace for device index lookup
+ * @fl6: IPv6 flow struct for lookup
+ */
+
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
+ const struct flowi6 *fl6)
+{
+ struct dst_entry *dst = NULL;
+ struct net_device *dev;
+
+ if (fl6->flowi6_oif) {
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+ if (dev && netif_is_l3_slave(dev))
+ dev = netdev_master_upper_dev_get_rcu(dev);
+
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_get_rt6_dst)
+ dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
+
+ rcu_read_unlock();
+ }
+
+ return dst;
+}
+EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
+
+/**
+ * l3mdev_get_saddr - get source address for a flow based on an interface
+ * enslaved to an L3 master device
+ * @net: network namespace for device index lookup
+ * @ifindex: Interface index
+ * @fl4: IPv4 flow struct
+ */
+
+int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
+{
+ struct net_device *dev;
+ int rc = 0;
+
+ if (ifindex) {
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev && netif_is_l3_slave(dev))
+ dev = netdev_master_upper_dev_get_rcu(dev);
+
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_get_saddr)
+ rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b3c52e3f689a..8ae3ed97d95c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
struct llc_pktinfo info;
+ memset(&info, 0, sizeof(info));
info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
llc_pdu_decode_dsap(skb, &info.lpi_sap);
llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6a33f0b4d839..c59af3eb9fa4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
@@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = register_netdevice(ndev);
if (ret) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 85ca189bdc3d..2cb3c626cd43 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
+static void ip_vs_conn_expire(unsigned long data);
/*
* Returns hash value for IPVS connection entry
@@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
}
EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
+static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
+{
+ __ip_vs_conn_put(cp);
+ ip_vs_conn_expire((unsigned long)cp);
+}
+
/*
* Put back the conn and restart its timer with its timeout
*/
-void ip_vs_conn_put(struct ip_vs_conn *cp)
+static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
{
unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
0 : cp->timeout;
@@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
__ip_vs_conn_put(cp);
}
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+ if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
+ (atomic_read(&cp->refcnt) == 1) &&
+ !timer_pending(&cp->timer))
+ /* expire connection immediately */
+ __ip_vs_conn_put_notimer(cp);
+ else
+ __ip_vs_conn_put_timer(cp);
+}
/*
* Fill a no_client_port connection with a client port number
@@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data)
if (cp->control)
ip_vs_control_del(cp);
- if (cp->flags & IP_VS_CONN_F_NFCT) {
+ if ((cp->flags & IP_VS_CONN_F_NFCT) &&
+ !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
/* Do not access conntracks during subsys cleanup
* because nf_conntrack_find_get can not be used after
* conntrack cleanup for the net.
@@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
- call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ ip_vs_conn_rcu_free(&cp->rcu_head);
+ else
+ call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
atomic_dec(&ipvs->conn_count);
return;
}
@@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data)
if (ipvs->sync_state & IP_VS_STATE_MASTER)
ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
- ip_vs_conn_put(cp);
+ __ip_vs_conn_put_timer(cp);
}
/* Modify timer, so that it expires as soon as possible.
@@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
return 1;
}
+static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
+{
+ struct ip_vs_service *svc;
+
+ if (!cp->dest)
+ return false;
+ svc = rcu_dereference(cp->dest->svc);
+ return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET);
+}
+
/* Called from keventd and must protect itself from softirqs */
void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
@@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask;
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
- if (cp->flags & IP_VS_CONN_F_TEMPLATE)
- /* connection template */
- continue;
if (cp->ipvs != ipvs)
continue;
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+ if (atomic_read(&cp->n_control) ||
+ !ip_vs_conn_ops_mode(cp))
+ continue;
+ else
+ /* connection template of OPS */
+ goto try_drop;
+ }
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
case IP_VS_TCP_S_SYN_RECV:
@@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
continue;
}
} else {
+try_drop:
if (!todrop_entry(cp))
continue;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b9a4082afa3a..1207f20d24e4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
#ifdef CONFIG_IP_VS_DEBUG
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
+EXPORT_SYMBOL(ip_vs_new_conn_out);
static int ip_vs_net_id __read_mostly;
/* netns cnt used for uniqueness */
@@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ret = cp->packet_xmit(skb, cp, pd->pp, iph);
/* do not touch skb anymore */
- atomic_inc(&cp->in_pkts);
+ if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+ atomic_inc(&cp->control->in_pkts);
+ else
+ atomic_inc(&cp->in_pkts);
ip_vs_conn_put(cp);
return ret;
}
@@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
}
}
+/* Generic function to create new connections for outgoing RS packets
+ *
+ * Pre-requisites for successful connection creation:
+ * 1) Virtual Service is NOT fwmark based:
+ * In fwmark-VS actual vaddr and vport are unknown to IPVS
+ * 2) Real Server and Virtual Service were NOT configured without port:
+ * This is to allow match of different VS to the same RS ip-addr
+ */
+struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest,
+ struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph,
+ __be16 dport,
+ __be16 cport)
+{
+ struct ip_vs_conn_param param;
+ struct ip_vs_conn *ct = NULL, *cp = NULL;
+ const union nf_inet_addr *vaddr, *daddr, *caddr;
+ union nf_inet_addr snet;
+ __be16 vport;
+ unsigned int flags;
+
+ EnterFunction(12);
+ vaddr = &svc->addr;
+ vport = svc->port;
+ daddr = &iph->saddr;
+ caddr = &iph->daddr;
+
+ /* check pre-requisites are satisfied */
+ if (svc->fwmark)
+ return NULL;
+ if (!vport || !dport)
+ return NULL;
+
+ /* for persistent service first create connection template */
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
+ /* apply netmask the same way ingress-side does */
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ ipv6_addr_prefix(&snet.in6, &caddr->in6,
+ (__force __u32)svc->netmask);
+ else
+#endif
+ snet.ip = caddr->ip & svc->netmask;
+ /* fill params and create template if not existent */
+ if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
+ &snet, 0, vaddr,
+ vport, &param) < 0)
+ return NULL;
+ ct = ip_vs_ct_in_get(&param);
+ if (!ct) {
+ ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
+ IP_VS_CONN_F_TEMPLATE, dest, 0);
+ if (!ct) {
+ kfree(param.pe_data);
+ return NULL;
+ }
+ ct->timeout = svc->timeout;
+ } else {
+ kfree(param.pe_data);
+ }
+ }
+
+ /* connection flags */
+ flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
+ iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
+ /* create connection */
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+ caddr, cport, vaddr, vport, &param);
+ cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
+ if (!cp) {
+ if (ct)
+ ip_vs_conn_put(ct);
+ return NULL;
+ }
+ if (ct) {
+ ip_vs_control_add(cp, ct);
+ ip_vs_conn_put(ct);
+ }
+ ip_vs_conn_stats(cp, svc);
+
+ /* return connection (will be used to handle outgoing packet) */
+ IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
+ "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+ ip_vs_fwd_tag(cp),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ cp->flags, atomic_read(&cp->refcnt));
+ LeaveFunction(12);
+ return cp;
+}
+
+/* Handle outgoing packets which are considered requests initiated by
+ * real servers, so that subsequent responses from external client can be
+ * routed to the right real server.
+ * Used also for outgoing responses in OPS mode.
+ *
+ * Connection management is handled by persistent-engine specific callback.
+ */
+static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
+ struct netns_ipvs *ipvs,
+ int af, struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_conn *cp = NULL;
+ __be16 _ports[2], *pptr;
+
+ if (hooknum == NF_INET_LOCAL_IN)
+ return NULL;
+
+ pptr = frag_safe_skb_hp(skb, iph->len,
+ sizeof(_ports), _ports, iph);
+ if (!pptr)
+ return NULL;
+
+ rcu_read_lock();
+ dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
+ &iph->saddr, pptr[0]);
+ if (dest) {
+ struct ip_vs_service *svc;
+ struct ip_vs_pe *pe;
+
+ svc = rcu_dereference(dest->svc);
+ if (svc) {
+ pe = rcu_dereference(svc->pe);
+ if (pe && pe->conn_out)
+ cp = pe->conn_out(svc, dest, skb, iph,
+ pptr[0], pptr[1]);
+ }
+ }
+ rcu_read_unlock();
+
+ return cp;
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum);
+
+ /* Check for real-server-started requests */
+ if (atomic_read(&ipvs->conn_out_counter)) {
+ /* Currently only for UDP:
+ * connection oriented protocols typically use
+ * ephemeral ports for outgoing connections, so
+ * related incoming responses would not match any VS
+ */
+ if (pp->protocol == IPPROTO_UDP) {
+ cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
+ if (likely(cp))
+ return handle_response(af, skb, pd, cp, &iph,
+ hooknum);
+ }
+ }
+
if (sysctl_nat_icmp_send(ipvs) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
@@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
if (ipvs->sync_state & IP_VS_STATE_MASTER)
ip_vs_sync_conn(ipvs, cp, pkts);
+ else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+ /* increment is done inside ip_vs_sync_conn too */
+ atomic_inc(&cp->control->in_pkts);
ip_vs_conn_put(cp);
return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f35ebc02fa5c..c3c809b2e712 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
return false;
}
+/* Find real service record by <proto,addr,port>.
+ * In case of multiple records with the same <proto,addr,port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
+ __u16 protocol,
+ const union nf_inet_addr *daddr,
+ __be16 dport)
+{
+ unsigned int hash;
+ struct ip_vs_dest *dest;
+
+ /* Check for "full" addressed entries */
+ hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->port == dport &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ (dest->protocol == protocol || dest->vfwmark)) {
+ /* HIT */
+ return dest;
+ }
+ }
+
+ return NULL;
+}
+
/* Lookup destination by {addr,port} in the given service
* Called under RCU lock.
*/
@@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
atomic_inc(&ipvs->ftpsvc_counter);
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
+ if (svc->pe && svc->pe->conn_out)
+ atomic_inc(&ipvs->conn_out_counter);
ip_vs_start_estimator(ipvs, &svc->stats);
@@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
struct ip_vs_scheduler *sched = NULL, *old_sched;
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
+ bool new_pe_conn_out, old_pe_conn_out;
/*
* Lookup the scheduler, by 'u->sched_name'
@@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
svc->netmask = u->netmask;
old_pe = rcu_dereference_protected(svc->pe, 1);
- if (pe != old_pe)
+ if (pe != old_pe) {
rcu_assign_pointer(svc->pe, pe);
+ /* check for optional methods in new pe */
+ new_pe_conn_out = (pe && pe->conn_out) ? true : false;
+ old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
+ if (new_pe_conn_out && !old_pe_conn_out)
+ atomic_inc(&svc->ipvs->conn_out_counter);
+ if (old_pe_conn_out && !new_pe_conn_out)
+ atomic_dec(&svc->ipvs->conn_out_counter);
+ }
out:
ip_vs_scheduler_put(old_sched);
@@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/* Unbind persistence engine, keep svc->pe */
old_pe = rcu_dereference_protected(svc->pe, 1);
+ if (old_pe && old_pe->conn_out)
+ atomic_dec(&ipvs->conn_out_counter);
ip_vs_pe_put(old_pe);
/*
@@ -3969,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
(unsigned long) ipvs);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
+ atomic_set(&ipvs->conn_out_counter, 0);
/* procfs stats */
ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 30434fb133df..f04fd8df210b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
return;
+ /* Never alter conntrack for OPS conns (no reply is expected) */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ return;
+
/* Alter reply only in original direction */
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return;
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0a6eb5c0d9e9..d07ef9e31c12 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
return cp->pe_data_len;
}
+static struct ip_vs_conn *
+ip_vs_sip_conn_out(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest,
+ struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph,
+ __be16 dport,
+ __be16 cport)
+{
+ if (likely(iph->protocol == IPPROTO_UDP))
+ return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
+ /* currently no need to handle other than UDP */
+ return NULL;
+}
+
static struct ip_vs_pe ip_vs_sip_pe =
{
.name = "sip",
@@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe =
.ct_match = ip_vs_sip_ct_match,
.hashkey_raw = ip_vs_sip_hashkey_raw,
.show_pe_data = ip_vs_sip_show_pe_data,
+ .conn_out = ip_vs_sip_conn_out,
};
static int __init ip_vs_sip_init(void)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2fd607408998..566c64e3ec50 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,6 +54,7 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
+#include <net/netns/hash.h>
#define NF_CONNTRACK_VERSION "0.5.0"
@@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+
+static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly bool nf_conntrack_locks_all;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
spin_lock_nested(&nf_conntrack_locks[h1],
SINGLE_DEPTH_NESTING);
}
- if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
nf_conntrack_double_unlock(h1, h2);
return true;
}
@@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
-unsigned int nf_conntrack_hash_rnd __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
+static unsigned int nf_conntrack_hash_rnd __read_mostly;
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+ const struct net *net)
{
unsigned int n;
+ u32 seed;
+
+ get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
/* The direction must be ignored, so we hash everything up to the
* destination ports (which is a multiple of 4) and treat the last
* three bytes manually.
*/
+ seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
+ return jhash2((u32 *)tuple, n, seed ^
(((__force __u16)tuple->dst.u.all << 16) |
tuple->dst.protonum));
}
-static u32 __hash_bucket(u32 hash, unsigned int size)
-{
- return reciprocal_scale(hash, size);
-}
-
-static u32 hash_bucket(u32 hash, const struct net *net)
+static u32 scale_hash(u32 hash)
{
- return __hash_bucket(hash, net->ct.htable_size);
+ return reciprocal_scale(hash, nf_conntrack_htable_size);
}
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- unsigned int size)
+static u32 __hash_conntrack(const struct net *net,
+ const struct nf_conntrack_tuple *tuple,
+ unsigned int size)
{
- return __hash_bucket(hash_conntrack_raw(tuple), size);
+ return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
}
-static inline u_int32_t hash_conntrack(const struct net *net,
- const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack(const struct net *net,
+ const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, net->ct.htable_size);
+ return scale_hash(hash_conntrack_raw(tuple, net));
}
bool
@@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
}
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- if (l4proto && l4proto->destroy)
+ if (l4proto->destroy)
l4proto->destroy(ct);
rcu_read_unlock();
@@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
local_bh_disable();
do {
- sequence = read_seqcount_begin(&net->ct.generation);
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
reply_hash = hash_conntrack(net,
@@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+ const struct nf_conntrack_zone *zone,
+ const struct net *net)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
*/
return nf_ct_tuple_equal(tuple, &h->tuple) &&
nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
- nf_ct_is_confirmed(ct);
+ nf_ct_is_confirmed(ct) &&
+ net_eq(net, nf_ct_net(ct));
}
/*
@@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket = hash_bucket(hash, net);
+ unsigned int bucket, sequence;
- /* Disable BHs the entire time since we normally need to disable them
- * at least once for the stats anyway.
- */
- local_bh_disable();
begin:
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
- if (nf_ct_key_equal(h, tuple, zone)) {
- NF_CT_STAT_INC(net, found);
- local_bh_enable();
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ bucket = scale_hash(hash);
+ ct_hash = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
+ NF_CT_STAT_INC_ATOMIC(net, found);
return h;
}
- NF_CT_STAT_INC(net, searched);
+ NF_CT_STAT_INC_ATOMIC(net, searched);
}
/*
* if the nulls value we got at the end of this lookup is
@@ -489,10 +499,9 @@ begin:
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(n) != bucket) {
- NF_CT_STAT_INC(net, search_restart);
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
goto begin;
}
- local_bh_enable();
return NULL;
}
@@ -514,7 +523,7 @@ begin:
!atomic_inc_not_zero(&ct->ct_general.use)))
h = NULL;
else {
- if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
+ if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
nf_ct_put(ct);
goto begin;
}
@@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple));
+ hash_conntrack_raw(tuple, net));
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int hash,
unsigned int reply_hash)
{
- struct net *net = nf_ct_net(ct);
-
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.hash[hash]);
+ &nf_conntrack_hash[hash]);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
- &net->ct.hash[reply_hash]);
+ &nf_conntrack_hash[reply_hash]);
}
int
@@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
local_bh_disable();
do {
- sequence = read_seqcount_begin(&net->ct.generation);
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
reply_hash = hash_conntrack(net,
@@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &h->tuple) &&
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
- NF_CT_DIRECTION(h)))
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ zone, net))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &h->tuple) &&
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
- NF_CT_DIRECTION(h)))
+
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
goto out;
add_timer(&ct->timeout);
@@ -599,6 +603,62 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+static inline void nf_ct_acct_update(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int len)
+{
+ struct nf_conn_acct *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ struct nf_conn_counter *counter = acct->counter;
+
+ atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
+ }
+}
+
+static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ const struct nf_conn *loser_ct)
+{
+ struct nf_conn_acct *acct;
+
+ acct = nf_conn_acct_find(loser_ct);
+ if (acct) {
+ struct nf_conn_counter *counter = acct->counter;
+ unsigned int bytes;
+
+ /* u32 should be fine since we must have seen one packet. */
+ bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+ nf_ct_acct_update(ct, ctinfo, bytes);
+ }
+}
+
+/* Resolve race on insertion if this protocol allows this. */
+static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_tuple_hash *h)
+{
+ /* This is the conntrack entry already in hashes that won race. */
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ struct nf_conntrack_l4proto *l4proto;
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->allow_clash &&
+ !nf_ct_is_dying(ct) &&
+ atomic_inc_not_zero(&ct->ct_general.use)) {
+ nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
+ nf_conntrack_put(skb->nfct);
+ /* Assign conntrack already in hashes to this skbuff. Don't
+ * modify skb->nfctinfo to ensure consistent stateful filtering.
+ */
+ skb->nfct = &ct->ct_general;
+ return NF_ACCEPT;
+ }
+ NF_CT_STAT_INC(net, drop);
+ return NF_DROP;
+}
+
/* Confirm a connection given skb; places it in hash table */
int
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -613,6 +673,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
enum ip_conntrack_info ctinfo;
struct net *net;
unsigned int sequence;
+ int ret = NF_DROP;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
@@ -628,10 +689,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
local_bh_disable();
do {
- sequence = read_seqcount_begin(&net->ct.generation);
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
/* reuse the hash saved before */
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
- hash = hash_bucket(hash, net);
+ hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -655,23 +716,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
*/
nf_ct_del_from_dying_or_unconfirmed_list(ct);
- if (unlikely(nf_ct_is_dying(ct)))
- goto out;
+ if (unlikely(nf_ct_is_dying(ct))) {
+ nf_ct_add_to_dying_list(ct);
+ goto dying;
+ }
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &h->tuple) &&
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
- NF_CT_DIRECTION(h)))
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ zone, net))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &h->tuple) &&
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
- NF_CT_DIRECTION(h)))
+
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
goto out;
/* Timer relative to confirmation time, not original
@@ -710,10 +770,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
out:
nf_ct_add_to_dying_list(ct);
+ ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+dying:
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
- return NF_DROP;
+ return ret;
}
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
@@ -726,29 +788,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
struct net *net = nf_ct_net(ignored_conntrack);
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ unsigned int hash, sequence;
struct hlist_nulls_node *n;
struct nf_conn *ct;
- unsigned int hash;
zone = nf_ct_zone(ignored_conntrack);
- hash = hash_conntrack(net, tuple);
- /* Disable BHs the entire time since we need to disable them at
- * least once for the stats anyway.
- */
- rcu_read_lock_bh();
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
+ rcu_read_lock();
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hash = hash_conntrack(net, tuple);
+ ct_hash = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (ct != ignored_conntrack &&
- nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
- NF_CT_STAT_INC(net, found);
- rcu_read_unlock_bh();
+ nf_ct_key_equal(h, tuple, zone, net)) {
+ NF_CT_STAT_INC_ATOMIC(net, found);
+ rcu_read_unlock();
return 1;
}
- NF_CT_STAT_INC(net, searched);
+ NF_CT_STAT_INC_ATOMIC(net, searched);
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return 0;
}
@@ -762,71 +826,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
{
/* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
- struct nf_conn *ct = NULL, *tmp;
+ struct nf_conn *tmp;
struct hlist_nulls_node *n;
- unsigned int i = 0, cnt = 0;
- int dropped = 0;
- unsigned int hash, sequence;
+ unsigned int i, hash, sequence;
+ struct nf_conn *ct = NULL;
spinlock_t *lockp;
+ bool ret = false;
+
+ i = 0;
local_bh_disable();
restart:
- sequence = read_seqcount_begin(&net->ct.generation);
- hash = hash_bucket(_hash, net);
- for (; i < net->ct.htable_size; i++) {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ for (; i < NF_CT_EVICTION_RANGE; i++) {
+ hash = scale_hash(_hash++);
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
- if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
spin_unlock(lockp);
goto restart;
}
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
- hnnode) {
+ hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
+ hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
- !nf_ct_is_dying(tmp) &&
- atomic_inc_not_zero(&tmp->ct_general.use)) {
+
+ if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+ !net_eq(nf_ct_net(tmp), net) ||
+ nf_ct_is_dying(tmp))
+ continue;
+
+ if (atomic_inc_not_zero(&tmp->ct_general.use)) {
ct = tmp;
break;
}
- cnt++;
}
- hash = (hash + 1) % net->ct.htable_size;
spin_unlock(lockp);
-
- if (ct || cnt >= NF_CT_EVICTION_RANGE)
+ if (ct)
break;
-
}
+
local_bh_enable();
if (!ct)
- return dropped;
+ return false;
- if (del_timer(&ct->timeout)) {
+ /* kill only if in same netns -- might have moved due to
+ * SLAB_DESTROY_BY_RCU rules
+ */
+ if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
if (nf_ct_delete(ct, 0, 0)) {
- dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop);
+ ret = true;
}
}
- nf_ct_put(ct);
- return dropped;
-}
-
-void init_nf_conntrack_hash_rnd(void)
-{
- unsigned int rand;
- /*
- * Why not initialize nf_conntrack_rnd in a "init()" function ?
- * Because there isn't enough entropy when system initializing,
- * and we initialize it as late as possible.
- */
- do {
- get_random_bytes(&rand, sizeof(rand));
- } while (!rand);
- cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+ nf_ct_put(ct);
+ return ret;
}
static struct nf_conn *
@@ -838,12 +894,6 @@ __nf_conntrack_alloc(struct net *net,
{
struct nf_conn *ct;
- if (unlikely(!nf_conntrack_hash_rnd)) {
- init_nf_conntrack_hash_rnd();
- /* recompute the hash as nf_conntrack_hash_rnd is initialized */
- hash = hash_conntrack_raw(orig);
- }
-
/* We don't want any race condition at early drop stage */
atomic_inc(&net->ct.count);
@@ -860,7 +910,7 @@ __nf_conntrack_alloc(struct net *net,
* Do not use kmem_cache_zalloc(), as this cache uses
* SLAB_DESTROY_BY_RCU.
*/
- ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
+ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
if (ct == NULL)
goto out;
@@ -887,7 +937,7 @@ __nf_conntrack_alloc(struct net *net,
atomic_set(&ct->ct_general.use, 0);
return ct;
out_free:
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
out:
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
@@ -914,7 +964,7 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
}
@@ -1061,7 +1111,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple);
+ hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1270,17 +1320,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
}
acct:
- if (do_acct) {
- struct nf_conn_acct *acct;
-
- acct = nf_conn_acct_find(ct);
- if (acct) {
- struct nf_conn_counter *counter = acct->counter;
-
- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
- atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
- }
- }
+ if (do_acct)
+ nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
@@ -1289,18 +1330,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
const struct sk_buff *skb,
int do_acct)
{
- if (do_acct) {
- struct nf_conn_acct *acct;
-
- acct = nf_conn_acct_find(ct);
- if (acct) {
- struct nf_conn_counter *counter = acct->counter;
-
- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
- atomic64_add(skb->len - skb_network_offset(skb),
- &counter[CTINFO2DIR(ctinfo)].bytes);
- }
- }
+ if (do_acct)
+ nf_ct_acct_update(ct, ctinfo, skb->len);
if (del_timer(&ct->timeout)) {
ct->timeout.function((unsigned long)ct);
@@ -1396,16 +1427,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
int cpu;
spinlock_t *lockp;
- for (; *bucket < net->ct.htable_size; (*bucket)++) {
+ for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < net->ct.htable_size) {
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+ if (*bucket < nf_conntrack_htable_size) {
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
+ if (net_eq(nf_ct_net(ct), net) &&
+ iter(ct, data))
goto found;
}
}
@@ -1443,6 +1475,9 @@ void nf_ct_iterate_cleanup(struct net *net,
might_sleep();
+ if (atomic_read(&net->ct.count) == 0)
+ return;
+
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
@@ -1494,6 +1529,8 @@ void nf_conntrack_cleanup_end(void)
while (untrack_refs() > 0)
schedule();
+ nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
nf_ct_extend_unregister(&nf_ct_zone_extend);
#endif
@@ -1544,15 +1581,12 @@ i_see_dead_people:
}
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_proto_pernet_fini(net);
nf_conntrack_helper_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_tstamp_pernet_fini(net);
nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
- kfree(net->ct.slabname);
free_percpu(net->ct.stat);
free_percpu(net->ct.pcpu_lists);
}
@@ -1607,7 +1641,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
local_bh_disable();
nf_conntrack_all_lock();
- write_seqcount_begin(&init_net.ct.generation);
+ write_seqcount_begin(&nf_conntrack_generation);
/* Lookups in the old hash might happen in parallel, which means we
* might get false negatives during connection lookup. New connections
@@ -1615,26 +1649,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
* though since that required taking the locks.
*/
- for (i = 0; i < init_net.ct.htable_size; i++) {
- while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
- h = hlist_nulls_entry(init_net.ct.hash[i].first,
- struct nf_conntrack_tuple_hash, hnnode);
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+ h = hlist_nulls_entry(nf_conntrack_hash[i].first,
+ struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
- bucket = __hash_conntrack(&h->tuple, hashsize);
+ bucket = __hash_conntrack(nf_ct_net(ct),
+ &h->tuple, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
- old_size = init_net.ct.htable_size;
- old_hash = init_net.ct.hash;
+ old_size = nf_conntrack_htable_size;
+ old_hash = nf_conntrack_hash;
- init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
- init_net.ct.hash = hash;
+ nf_conntrack_hash = hash;
+ nf_conntrack_htable_size = hashsize;
- write_seqcount_end(&init_net.ct.generation);
+ write_seqcount_end(&nf_conntrack_generation);
nf_conntrack_all_unlock();
local_bh_enable();
+ synchronize_net();
nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
@@ -1655,7 +1691,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
int nf_conntrack_init_start(void)
{
int max_factor = 8;
- int i, ret, cpu;
+ int ret = -ENOMEM;
+ int i, cpu;
+
+ seqcount_init(&nf_conntrack_generation);
for (i = 0; i < CONNTRACK_LOCKS; i++)
spin_lock_init(&nf_conntrack_locks[i]);
@@ -1682,8 +1721,19 @@ int nf_conntrack_init_start(void)
* entries. */
max_factor = 4;
}
+
+ nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
+ if (!nf_conntrack_hash)
+ return -ENOMEM;
+
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+ nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+ sizeof(struct nf_conn), 0,
+ SLAB_DESTROY_BY_RCU, NULL);
+ if (!nf_conntrack_cachep)
+ goto err_cachep;
+
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
nf_conntrack_max);
@@ -1760,6 +1810,9 @@ err_tstamp:
err_acct:
nf_conntrack_expect_fini();
err_expect:
+ kmem_cache_destroy(nf_conntrack_cachep);
+err_cachep:
+ nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
return ret;
}
@@ -1783,7 +1836,6 @@ int nf_conntrack_init_net(struct net *net)
int cpu;
atomic_set(&net->ct.count, 0);
- seqcount_init(&net->ct.generation);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
if (!net->ct.pcpu_lists)
@@ -1801,24 +1853,6 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
- if (!net->ct.slabname)
- goto err_slabname;
-
- net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
- sizeof(struct nf_conn), 0,
- SLAB_DESTROY_BY_RCU, NULL);
- if (!net->ct.nf_conntrack_cachep) {
- printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- goto err_cache;
- }
-
- net->ct.htable_size = nf_conntrack_htable_size;
- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
- if (!net->ct.hash) {
- printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
- goto err_hash;
- }
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
goto err_expect;
@@ -1850,12 +1884,6 @@ err_tstamp:
err_acct:
nf_conntrack_expect_pernet_fini(net);
err_expect:
- nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-err_hash:
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-err_cache:
- kfree(net->ct.slabname);
-err_slabname:
free_percpu(net->ct.stat);
err_pcpu_lists:
free_percpu(net->ct.pcpu_lists);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 278927ab0948..9e3693128313 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -24,6 +24,7 @@
#include <linux/moduleparam.h>
#include <linux/export.h>
#include <net/net_namespace.h>
+#include <net/netns/hash.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -35,9 +36,13 @@
unsigned int nf_ct_expect_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
+struct hlist_head *nf_ct_expect_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
+
unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static unsigned int nf_ct_expect_hashrnd __read_mostly;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
nf_ct_expect_put(exp);
}
-static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
+static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- unsigned int hash;
+ unsigned int hash, seed;
- if (unlikely(!nf_conntrack_hash_rnd)) {
- init_nf_conntrack_hash_rnd();
- }
+ get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
+
+ seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
(((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
- (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
+ (__force __u16)tuple->dst.u.all) ^ seed);
return reciprocal_scale(hash, nf_ct_expect_hsize);
}
+static bool
+nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_expect *i,
+ const struct nf_conntrack_zone *zone,
+ const struct net *net)
+{
+ return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+ net_eq(net, nf_ct_net(i->master)) &&
+ nf_ct_zone_equal_any(i->master, zone);
+}
+
struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net,
if (!net->ct.expect_count)
return NULL;
- h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
- if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone_equal_any(i->master, zone))
+ h = nf_ct_expect_dst_hash(net, tuple);
+ hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
+ if (nf_ct_exp_equal(tuple, i, zone, net))
return i;
}
return NULL;
@@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net,
if (!net->ct.expect_count)
return NULL;
- h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
+ h = nf_ct_expect_dst_hash(net, tuple);
+ hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
- nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone_equal_any(i->master, zone)) {
+ nf_ct_exp_equal(tuple, i, zone, net)) {
exp = i;
break;
}
@@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
}
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
+ net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
@@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
return a->master == b->master && a->class == b->class &&
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+ net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
@@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
struct nf_conn_help *master_help = nfct_help(exp->master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
- unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
+ unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
/* two references : one for hash insert, one for the timer */
atomic_add(2, &exp->use);
@@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
- hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+ hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
@@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
ret = -ESHUTDOWN;
goto out;
}
- h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
+ h = nf_ct_expect_dst_hash(net, &expect->tuple);
+ hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
if (del_timer(&i->timeout)) {
nf_ct_unlink_expect(i);
@@ -468,12 +484,11 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
if (n)
return n;
}
@@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
- struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
}
return head;
}
@@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
int nf_conntrack_expect_pernet_init(struct net *net)
{
- int err = -ENOMEM;
-
net->ct.expect_count = 0;
- net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
- if (net->ct.expect_hash == NULL)
- goto err1;
-
- err = exp_proc_init(net);
- if (err < 0)
- goto err2;
-
- return 0;
-err2:
- nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
-err1:
- return err;
+ return exp_proc_init(net);
}
void nf_conntrack_expect_pernet_fini(struct net *net)
{
exp_proc_remove(net);
- nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
}
int nf_conntrack_expect_init(void)
@@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void)
0, 0, NULL);
if (!nf_ct_expect_cachep)
return -ENOMEM;
+
+ nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
+ if (!nf_ct_expect_hash) {
+ kmem_cache_destroy(nf_ct_expect_cachep);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void)
{
rcu_barrier(); /* Wait for call_rcu() before destroy */
kmem_cache_destroy(nf_ct_expect_cachep);
+ nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3b40ec575cd5..f703adb7e5f7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
static unsigned int nf_ct_helper_count __read_mostly;
-static bool nf_ct_auto_assign_helper __read_mostly = true;
+static bool nf_ct_auto_assign_helper __read_mostly = false;
module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
MODULE_PARM_DESC(nf_conntrack_helper,
- "Enable automatic conntrack helper assignment (default 1)");
+ "Enable automatic conntrack helper assignment (default 0)");
#ifdef CONFIG_SYSCTL
static struct ctl_table helper_sysctl_table[] = {
@@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
- &net->ct.expect_hash[i], hnode) {
+ &nf_ct_expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((rcu_dereference_protected(
help->helper,
@@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
spin_unlock_bh(&pcpu->lock);
}
local_bh_disable();
- for (i = 0; i < net->ct.htable_size; i++) {
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < net->ct.htable_size) {
- hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ if (i < nf_conntrack_htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
unhelp(h, me);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 294a8e28cec4..a18d1ceabad5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -824,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
last = (struct nf_conn *)cb->args[1];
local_bh_disable();
- for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
+ for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
- if (cb->args[0] >= net->ct.htable_size) {
+ if (cb->args[0] >= nf_conntrack_htable_size) {
spin_unlock(lockp);
goto out;
}
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
- hnnode) {
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
+ hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (!net_eq(net, nf_ct_net(ct)))
+ continue;
+
/* Dump entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0,
* then dump everything. */
@@ -2629,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
last = (struct nf_conntrack_expect *)cb->args[1];
for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
restart:
- hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]],
+ hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
hnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
+
+ if (!net_eq(nf_ct_net(exp->master), net))
+ continue;
+
if (cb->args[1]) {
if (exp != last)
continue;
@@ -2883,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
- &net->ct.expect_hash[i],
+ &nf_ct_expect_hash[i],
hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
m_help = nfct_help(exp->master);
if (!strcmp(m_help->helper->name, name) &&
del_timer(&exp->timeout)) {
@@ -2901,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
- &net->ct.expect_hash[i],
+ &nf_ct_expect_hash[i],
hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp,
NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 478f92f834b6..4fd040575ffe 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
.name = "udp",
+ .allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
.name = "udp",
+ .allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1ac8ee13a873..9d692f5adb94 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
.name = "udplite",
+ .allow_clash = true,
.pkt_to_tuple = udplite_pkt_to_tuple,
.invert_tuple = udplite_invert_tuple,
.print_tuple = udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
.name = "udplite",
+ .allow_clash = true,
.pkt_to_tuple = udplite_pkt_to_tuple,
.invert_tuple = udplite_invert_tuple,
.print_tuple = udplite_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0f1a45bcacb2..f87e84ebcec3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -54,14 +54,13 @@ struct ct_iter_state {
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
{
- struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_nulls_node *n;
for (st->bucket = 0;
- st->bucket < net->ct.htable_size;
+ st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct hlist_nulls_node *head)
{
- struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
- if (++st->bucket >= net->ct.htable_size)
+ if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
}
head = rcu_dereference(
hlist_nulls_first_rcu(
- &net->ct.hash[st->bucket]));
+ &nf_conntrack_hash[st->bucket]));
}
return head;
}
@@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
{
.procname = "nf_conntrack_buckets",
- .data = &init_net.ct.htable_size,
+ .data = &nf_conntrack_htable_size,
.maxlen = sizeof(unsigned int),
.mode = 0444,
.proc_handler = proc_dointvec,
@@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
goto out_kmemdup;
table[1].data = &net->ct.count;
- table[2].data = &net->ct.htable_size;
table[3].data = &net->ct.sysctl_checksum;
table[4].data = &net->ct.sysctl_log_invalid;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 06a9f45771ab..6877a396f8fc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
-hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_conntrack_hash_rnd);
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- return reciprocal_scale(hash, net->ct.nat_htable_size);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -196,9 +201,10 @@ find_appropriate_src(struct net *net,
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
- hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
+ hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
@@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct,
nat = nfct_nat(ct);
nat->ct = ct;
hlist_add_head_rcu(&nat->bysource,
- &net->ct.nat_bysource[srchash]);
+ &nf_nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock);
}
@@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
-static int __net_init nf_nat_net_init(struct net *net)
-{
- /* Leave them the same for the moment. */
- net->ct.nat_htable_size = net->ct.htable_size;
- net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
- if (!net->ct.nat_bysource)
- return -ENOMEM;
- return 0;
-}
-
static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
- synchronize_rcu();
- nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
- .init = nf_nat_net_init,
.exit = nf_nat_net_exit,
};
@@ -852,8 +845,16 @@ static int __init nf_nat_init(void)
{
int ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
+
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -877,6 +878,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7a85a9dd37ad..4d292b933b5c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2317,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_TABLE] = { .type = NLA_STRING },
[NFTA_SET_NAME] = { .type = NLA_STRING,
- .len = IFNAMSIZ - 1 },
+ .len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_FLAGS] = { .type = NLA_U32 },
[NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
@@ -2401,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
- p = strnchr(name, IFNAMSIZ, '%');
+ p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2696,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- char name[IFNAMSIZ];
+ char name[NFT_SET_MAXNAMELEN];
unsigned int size;
bool create;
u64 timeout;
@@ -3375,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+static int nft_setelem_parse_flags(const struct nft_set *set,
+ const struct nlattr *attr, u32 *flags)
+{
+ if (attr == NULL)
+ return 0;
+
+ *flags = ntohl(nla_get_be32(attr));
+ if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ *flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+
+ return 0;
+}
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -3388,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u32 flags = 0;
u64 timeout;
- u32 flags;
u8 ulen;
int err;
@@ -3403,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_prepare(&tmpl);
- flags = 0;
- if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
- flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
- if (flags & ~NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
- if (!(set->flags & NFT_SET_INTERVAL) &&
- flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
- }
+ err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+ if (err < 0)
+ return err;
+ if (flags != 0)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -3582,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ struct nft_set_ext_tmpl tmpl;
struct nft_data_desc desc;
struct nft_set_elem elem;
+ struct nft_set_ext *ext;
struct nft_trans *trans;
+ u32 flags = 0;
+ void *priv;
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3596,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
goto err1;
+ nft_set_ext_prepare(&tmpl);
+
+ err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+ if (err < 0)
+ return err;
+ if (flags != 0)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+
err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
@@ -3605,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
goto err2;
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);
+
+ err = -ENOMEM;
+ elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
+ GFP_KERNEL);
+ if (elem.priv == NULL)
+ goto err2;
+
+ ext = nft_set_elem_ext(set, elem.priv);
+ if (flags)
+ *nft_set_ext_flags(ext) = flags;
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
- goto err2;
+ goto err3;
}
- elem.priv = set->ops->deactivate(set, &elem);
- if (elem.priv == NULL) {
+ priv = set->ops->deactivate(set, &elem);
+ if (priv == NULL) {
err = -ENOENT;
- goto err3;
+ goto err4;
}
+ kfree(elem.priv);
+ elem.priv = priv;
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
-err3:
+err4:
kfree(trans);
+err3:
+ kfree(elem.priv);
err2:
nft_data_uninit(&elem.key.val, desc.type);
err1:
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 2671b9deb103..3c84f14326f5 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
int i;
local_bh_disable();
- for (i = 0; i < net->ct.htable_size; i++) {
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < net->ct.htable_size) {
- hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ if (i < nf_conntrack_htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
untimeout(h, timeout);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 25998facefd0..137e308d5b24 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -198,6 +198,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ nf_connlabels_replace(ct,
+ &regs->data[priv->sreg],
+ &regs->data[priv->sreg],
+ NF_CT_LABELS_MAX_SIZE / sizeof(u32));
+ break;
+#endif
default:
break;
}
@@ -365,6 +373,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
len = FIELD_SIZEOF(struct nf_conn, mark);
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+ len = NF_CT_LABELS_MAX_SIZE;
+ err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
+ if (err)
+ return err;
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
static void nft_ct_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
+ struct nft_ct *priv = nft_expr_priv(expr);
+
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ nf_connlabels_put(ctx->net);
+ break;
+#endif
+ default:
+ break;
+ }
+
nft_ct_l3proto_module_put(ctx->afi->family);
}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 1c30f41cff5b..f762094af7c1 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -29,6 +29,17 @@ struct nft_rbtree_elem {
struct nft_set_ext ext;
};
+static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+ (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
+}
+
+static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+ const struct nft_rbtree_elem *interval)
+{
+ return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+}
static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
@@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
const struct nft_rbtree_elem *rbe, *interval = NULL;
const struct rb_node *parent;
u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+ const void *this;
int d;
spin_lock_bh(&nft_rbtree_lock);
@@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+ this = nft_set_ext_key(&rbe->ext);
+ d = memcmp(this, key, set->klen);
if (d < 0) {
parent = parent->rb_left;
+ /* In case of adjacent ranges, we always see the high
+ * part of the range in first place, before the low one.
+ * So don't update interval if the keys are equal.
+ */
+ if (interval && nft_rbtree_equal(set, this, interval))
+ continue;
interval = rbe;
} else if (d > 0)
parent = parent->rb_right;
@@ -56,9 +75,7 @@ found:
parent = parent->rb_left;
continue;
}
- if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(&rbe->ext) &
- NFT_SET_ELEM_INTERVAL_END)
+ if (nft_rbtree_interval_end(rbe))
goto out;
spin_unlock_bh(&nft_rbtree_lock);
@@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set,
else if (d > 0)
p = &parent->rb_right;
else {
- if (nft_set_elem_active(&rbe->ext, genmask))
- return -EEXIST;
- p = &parent->rb_left;
+ if (nft_set_elem_active(&rbe->ext, genmask)) {
+ if (nft_rbtree_interval_end(rbe) &&
+ !nft_rbtree_interval_end(new))
+ p = &parent->rb_left;
+ else if (!nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_end(new))
+ p = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
}
}
rb_link_node(&new->node, parent, p);
@@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
- struct nft_rbtree_elem *rbe;
+ struct nft_rbtree_elem *rbe, *this = elem->priv;
u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
@@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
parent = parent->rb_left;
continue;
}
+ if (nft_rbtree_interval_end(rbe) &&
+ !nft_rbtree_interval_end(this)) {
+ parent = parent->rb_left;
+ continue;
+ } else if (!nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_end(this)) {
+ parent = parent->rb_right;
+ continue;
+ }
nft_set_elem_change_active(set, &rbe->ext);
return rbe;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index fbb7a2b57b44..61fff422424f 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -64,18 +64,26 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
return NULL;
}
-int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id)
+int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
+ struct dest_spec_params *params)
{
struct nci_conn_info *conn_info;
list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
- if (conn_info->id == id)
- return conn_info->conn_id;
+ if (conn_info->dest_type == dest_type) {
+ if (!params)
+ return conn_info->conn_id;
+ if (conn_info) {
+ if (params->id == conn_info->dest_params->id &&
+ params->protocol == conn_info->dest_params->protocol)
+ return conn_info->conn_id;
+ }
+ }
}
return -EINVAL;
}
-EXPORT_SYMBOL(nci_get_conn_info_by_id);
+EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params);
/* ---- NCI requests ---- */
@@ -392,6 +400,83 @@ int nci_core_init(struct nci_dev *ndev)
}
EXPORT_SYMBOL(nci_core_init);
+struct nci_loopback_data {
+ u8 conn_id;
+ struct sk_buff *data;
+};
+
+static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+{
+ struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+
+ nci_send_data(ndev, data->conn_id, data->data);
+}
+
+static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
+{
+ struct nci_dev *ndev = (struct nci_dev *)context;
+ struct nci_conn_info *conn_info;
+
+ conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
+ if (!conn_info) {
+ nci_req_complete(ndev, NCI_STATUS_REJECTED);
+ return;
+ }
+
+ conn_info->rx_skb = skb;
+
+ nci_req_complete(ndev, NCI_STATUS_OK);
+}
+
+int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+ struct sk_buff **resp)
+{
+ int r;
+ struct nci_loopback_data loopback_data;
+ struct nci_conn_info *conn_info;
+ struct sk_buff *skb;
+ int conn_id = nci_get_conn_info_by_dest_type_params(ndev,
+ NCI_DESTINATION_NFCC_LOOPBACK, NULL);
+
+ if (conn_id < 0) {
+ r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK,
+ 0, 0, NULL);
+ if (r != NCI_STATUS_OK)
+ return r;
+
+ conn_id = nci_get_conn_info_by_dest_type_params(ndev,
+ NCI_DESTINATION_NFCC_LOOPBACK,
+ NULL);
+ }
+
+ conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+ if (!conn_info)
+ return -EPROTO;
+
+ /* store cb and context to be used on receiving data */
+ conn_info->data_exchange_cb = nci_nfcc_loopback_cb;
+ conn_info->data_exchange_cb_context = ndev;
+
+ skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, NCI_DATA_HDR_SIZE);
+ memcpy(skb_put(skb, data_len), data, data_len);
+
+ loopback_data.conn_id = conn_id;
+ loopback_data.data = skb;
+
+ ndev->cur_conn_id = conn_id;
+ r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+ msecs_to_jiffies(NCI_DATA_TIMEOUT));
+ if (r == NCI_STATUS_OK && resp)
+ *resp = conn_info->rx_skb;
+
+ return r;
+}
+EXPORT_SYMBOL(nci_nfcc_loopback);
+
static int nci_open_device(struct nci_dev *ndev)
{
int rc = 0;
@@ -610,9 +695,6 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
struct nci_core_conn_create_cmd *cmd;
struct core_conn_create_data data;
- if (!number_destination_params)
- return -EINVAL;
-
data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
cmd = kzalloc(data.length, GFP_KERNEL);
if (!cmd)
@@ -620,17 +702,23 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
cmd->destination_type = destination_type;
cmd->number_destination_params = number_destination_params;
- memcpy(cmd->params, params, params_len);
data.cmd = cmd;
- if (params->length > 0)
- ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
- else
- ndev->cur_id = 0;
+ if (params) {
+ memcpy(cmd->params, params, params_len);
+ if (params->length > 0)
+ memcpy(&ndev->cur_params,
+ &params->value[DEST_SPEC_PARAMS_ID_INDEX],
+ sizeof(struct dest_spec_params));
+ else
+ ndev->cur_params.id = 0;
+ } else {
+ ndev->cur_params.id = 0;
+ }
+ ndev->cur_dest_type = destination_type;
- r = __nci_request(ndev, nci_core_conn_create_req,
- (unsigned long)&data,
+ r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
kfree(cmd);
return r;
@@ -646,6 +734,7 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
+ ndev->cur_conn_id = conn_id;
return __nci_request(ndev, nci_core_conn_close_req, conn_id,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 2ada2b39e355..1e8c1a12aaec 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -734,7 +734,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
* “HCI Access”, even if the HCI Network contains multiple NFCEEs.
*/
ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id;
- ndev->cur_id = nfcee_ntf->nfcee_id;
+ ndev->cur_params.id = nfcee_ntf->nfcee_id;
nci_req_complete(ndev, status);
}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 9b6eb913d801..e3bbf1937d0e 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -226,7 +226,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
struct sk_buff *skb)
{
__u8 status = skb->data[0];
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info = NULL;
struct nci_core_conn_create_rsp *rsp;
pr_debug("status 0x%x\n", status);
@@ -241,7 +241,17 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
goto exit;
}
- conn_info->id = ndev->cur_id;
+ conn_info->dest_params = devm_kzalloc(&ndev->nfc_dev->dev,
+ sizeof(struct dest_spec_params),
+ GFP_KERNEL);
+ if (!conn_info->dest_params) {
+ status = NCI_STATUS_REJECTED;
+ goto free_conn_info;
+ }
+
+ conn_info->dest_type = ndev->cur_dest_type;
+ conn_info->dest_params->id = ndev->cur_params.id;
+ conn_info->dest_params->protocol = ndev->cur_params.protocol;
conn_info->conn_id = rsp->conn_id;
/* Note: data_exchange_cb and data_exchange_cb_context need to
@@ -251,7 +261,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
INIT_LIST_HEAD(&conn_info->list);
list_add(&conn_info->list, &ndev->conn_info_list);
- if (ndev->cur_id == ndev->hci_dev->nfcee_id)
+ if (ndev->cur_params.id == ndev->hci_dev->nfcee_id)
ndev->hci_dev->conn_info = conn_info;
conn_info->conn_id = rsp->conn_id;
@@ -259,7 +269,11 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
}
+free_conn_info:
+ if (status == NCI_STATUS_REJECTED)
+ devm_kfree(&ndev->nfc_dev->dev, conn_info);
exit:
+
nci_req_complete(ndev, status);
}
@@ -271,7 +285,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
pr_debug("status 0x%x\n", status);
if (status == NCI_STATUS_OK) {
- conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id);
+ conn_info = nci_get_conn_info_by_conn_id(ndev,
+ ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
devm_kfree(&ndev->nfc_dev->dev, conn_info);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 9741a76c7405..9f0bc49fa969 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 protonum;
l3proto = __nf_ct_l3proto_find(l3num);
- if (!l3proto) {
- pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
- return NULL;
- }
if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
&protonum) <= 0) {
pr_debug("ovs_ct_find_existing: Can't get protonum\n");
return NULL;
}
l4proto = __nf_ct_l4proto_find(l3num, protonum);
- if (!l4proto) {
- pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
- return NULL;
- }
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
protonum, net, &tuple, l3proto, l4proto)) {
pr_debug("ovs_ct_find_existing: Can't get tuple\n");
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 22d9a5316304..856bd8dba676 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -738,9 +738,9 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
len += nla_total_size(acts->orig_len);
return len
- + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
- + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
+ + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
}
/* Called with ovs_mutex or RCU read lock. */
@@ -759,7 +759,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
return -EMSGSIZE;
if (stats.n_packets &&
- nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
+ nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
+ sizeof(struct ovs_flow_stats), &stats,
+ OVS_FLOW_ATTR_PAD))
return -EMSGSIZE;
if ((u8)ntohs(tcp_flags) &&
@@ -1435,8 +1437,8 @@ static size_t ovs_dp_cmd_msg_size(void)
size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
msgsize += nla_total_size(IFNAMSIZ);
- msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
- msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
+ msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
+ msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
return msgsize;
@@ -1463,13 +1465,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
goto nla_put_failure;
get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
- if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
- &dp_stats))
+ if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
+ &dp_stats, OVS_DP_ATTR_PAD))
goto nla_put_failure;
- if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
- sizeof(struct ovs_dp_megaflow_stats),
- &dp_megaflow_stats))
+ if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
+ sizeof(struct ovs_dp_megaflow_stats),
+ &dp_megaflow_stats, OVS_DP_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
@@ -1838,8 +1840,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
- if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
- &vport_stats))
+ if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
+ sizeof(struct ovs_vport_stats), &vport_stats,
+ OVS_VPORT_ATTR_PAD))
goto nla_put_failure;
if (ovs_vport_get_upcall_portids(vport, skb))
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
new file mode 100644
index 000000000000..673fd1f86ebe
--- /dev/null
+++ b/net/qrtr/Kconfig
@@ -0,0 +1,24 @@
+# Qualcomm IPC Router configuration
+#
+
+config QRTR
+ tristate "Qualcomm IPC Router support"
+ depends on ARCH_QCOM || COMPILE_TEST
+ ---help---
+ Say Y if you intend to use Qualcomm IPC router protocol. The
+ protocol is used to communicate with services provided by other
+ hardware blocks in the system.
+
+ In order to do service lookups, a userspace daemon is required to
+ maintain a service listing.
+
+if QRTR
+
+config QRTR_SMD
+ tristate "SMD IPC Router channels"
+ depends on QCOM_SMD || COMPILE_TEST
+ ---help---
+ Say Y here to support SMD based ipcrouter channels. SMD is the
+ most common transport for IPC Router.
+
+endif # QRTR
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
new file mode 100644
index 000000000000..6c00dc623b7e
--- /dev/null
+++ b/net/qrtr/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_QRTR) := qrtr.o
+obj-$(CONFIG_QRTR_SMD) += smd.o
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
new file mode 100644
index 000000000000..c985ecbe9bd6
--- /dev/null
+++ b/net/qrtr/qrtr.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/qrtr.h>
+#include <linux/termios.h> /* For TIOCINQ/OUTQ */
+
+#include <net/sock.h>
+
+#include "qrtr.h"
+
+#define QRTR_PROTO_VER 1
+
+/* auto-bind range */
+#define QRTR_MIN_EPH_SOCKET 0x4000
+#define QRTR_MAX_EPH_SOCKET 0x7fff
+
+enum qrtr_pkt_type {
+ QRTR_TYPE_DATA = 1,
+ QRTR_TYPE_HELLO = 2,
+ QRTR_TYPE_BYE = 3,
+ QRTR_TYPE_NEW_SERVER = 4,
+ QRTR_TYPE_DEL_SERVER = 5,
+ QRTR_TYPE_DEL_CLIENT = 6,
+ QRTR_TYPE_RESUME_TX = 7,
+ QRTR_TYPE_EXIT = 8,
+ QRTR_TYPE_PING = 9,
+};
+
+/**
+ * struct qrtr_hdr - (I|R)PCrouter packet header
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply
+ * @size: length of packet, excluding this header
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr {
+ __le32 version;
+ __le32 type;
+ __le32 src_node_id;
+ __le32 src_port_id;
+ __le32 confirm_rx;
+ __le32 size;
+ __le32 dst_node_id;
+ __le32 dst_port_id;
+} __packed;
+
+#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
+#define QRTR_NODE_BCAST ((unsigned int)-1)
+#define QRTR_PORT_CTRL ((unsigned int)-2)
+
+struct qrtr_sock {
+ /* WARNING: sk must be the first member */
+ struct sock sk;
+ struct sockaddr_qrtr us;
+ struct sockaddr_qrtr peer;
+};
+
+static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
+{
+ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0);
+ return container_of(sk, struct qrtr_sock, sk);
+}
+
+static unsigned int qrtr_local_nid = -1;
+
+/* for node ids */
+static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
+/* broadcast list */
+static LIST_HEAD(qrtr_all_nodes);
+/* lock for qrtr_nodes, qrtr_all_nodes and node reference */
+static DEFINE_MUTEX(qrtr_node_lock);
+
+/* local port allocation management */
+static DEFINE_IDR(qrtr_ports);
+static DEFINE_MUTEX(qrtr_port_lock);
+
+/**
+ * struct qrtr_node - endpoint node
+ * @ep_lock: lock for endpoint management and callbacks
+ * @ep: endpoint
+ * @ref: reference count for node
+ * @nid: node id
+ * @rx_queue: receive queue
+ * @work: scheduled work struct for recv work
+ * @item: list item for broadcast list
+ */
+struct qrtr_node {
+ struct mutex ep_lock;
+ struct qrtr_endpoint *ep;
+ struct kref ref;
+ unsigned int nid;
+
+ struct sk_buff_head rx_queue;
+ struct work_struct work;
+ struct list_head item;
+};
+
+/* Release node resources and free the node.
+ *
+ * Do not call directly, use qrtr_node_release. To be used with
+ * kref_put_mutex. As such, the node mutex is expected to be locked on call.
+ */
+static void __qrtr_node_release(struct kref *kref)
+{
+ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
+
+ if (node->nid != QRTR_EP_NID_AUTO)
+ radix_tree_delete(&qrtr_nodes, node->nid);
+
+ list_del(&node->item);
+ mutex_unlock(&qrtr_node_lock);
+
+ skb_queue_purge(&node->rx_queue);
+ kfree(node);
+}
+
+/* Increment reference to node. */
+static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node)
+{
+ if (node)
+ kref_get(&node->ref);
+ return node;
+}
+
+/* Decrement reference to node and release as necessary. */
+static void qrtr_node_release(struct qrtr_node *node)
+{
+ if (!node)
+ return;
+ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
+}
+
+/* Pass an outgoing packet socket buffer to the endpoint driver. */
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+{
+ int rc = -ENODEV;
+
+ mutex_lock(&node->ep_lock);
+ if (node->ep)
+ rc = node->ep->xmit(node->ep, skb);
+ else
+ kfree_skb(skb);
+ mutex_unlock(&node->ep_lock);
+
+ return rc;
+}
+
+/* Lookup node by id.
+ *
+ * callers must release with qrtr_node_release()
+ */
+static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
+{
+ struct qrtr_node *node;
+
+ mutex_lock(&qrtr_node_lock);
+ node = radix_tree_lookup(&qrtr_nodes, nid);
+ node = qrtr_node_acquire(node);
+ mutex_unlock(&qrtr_node_lock);
+
+ return node;
+}
+
+/* Assign node id to node.
+ *
+ * This is mostly useful for automatic node id assignment, based on
+ * the source id in the incoming packet.
+ */
+static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
+{
+ if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO)
+ return;
+
+ mutex_lock(&qrtr_node_lock);
+ radix_tree_insert(&qrtr_nodes, nid, node);
+ node->nid = nid;
+ mutex_unlock(&qrtr_node_lock);
+}
+
+/**
+ * qrtr_endpoint_post() - post incoming data
+ * @ep: endpoint handle
+ * @data: data pointer
+ * @len: size of data in bytes
+ *
+ * Return: 0 on success; negative error code on failure
+ */
+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
+{
+ struct qrtr_node *node = ep->node;
+ const struct qrtr_hdr *phdr = data;
+ struct sk_buff *skb;
+ unsigned int psize;
+ unsigned int size;
+ unsigned int type;
+ unsigned int ver;
+ unsigned int dst;
+
+ if (len < QRTR_HDR_SIZE || len & 3)
+ return -EINVAL;
+
+ ver = le32_to_cpu(phdr->version);
+ size = le32_to_cpu(phdr->size);
+ type = le32_to_cpu(phdr->type);
+ dst = le32_to_cpu(phdr->dst_port_id);
+
+ psize = (size + 3) & ~3;
+
+ if (ver != QRTR_PROTO_VER)
+ return -EINVAL;
+
+ if (len != psize + QRTR_HDR_SIZE)
+ return -EINVAL;
+
+ if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
+ return -EINVAL;
+
+ skb = netdev_alloc_skb(NULL, len);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reset_transport_header(skb);
+ memcpy(skb_put(skb, len), data, len);
+
+ skb_queue_tail(&node->rx_queue, skb);
+ schedule_work(&node->work);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+
+/* Allocate and construct a resume-tx packet. */
+static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
+ u32 dst_node, u32 port)
+{
+ const int pkt_len = 20;
+ struct qrtr_hdr *hdr;
+ struct sk_buff *skb;
+ u32 *buf;
+
+ skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+ skb_reset_transport_header(skb);
+
+ hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE);
+ hdr->version = cpu_to_le32(QRTR_PROTO_VER);
+ hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ hdr->src_node_id = cpu_to_le32(src_node);
+ hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+ hdr->confirm_rx = cpu_to_le32(0);
+ hdr->size = cpu_to_le32(pkt_len);
+ hdr->dst_node_id = cpu_to_le32(dst_node);
+ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+
+ buf = (u32 *)skb_put(skb, pkt_len);
+ memset(buf, 0, pkt_len);
+ buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ buf[1] = cpu_to_le32(src_node);
+ buf[2] = cpu_to_le32(port);
+
+ return skb;
+}
+
+static struct qrtr_sock *qrtr_port_lookup(int port);
+static void qrtr_port_put(struct qrtr_sock *ipc);
+
+/* Handle and route a received packet.
+ *
+ * This will auto-reply with resume-tx packet as necessary.
+ */
+static void qrtr_node_rx_work(struct work_struct *work)
+{
+ struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
+ const struct qrtr_hdr *phdr;
+ u32 dst_node, dst_port;
+ struct qrtr_sock *ipc;
+ u32 src_node;
+ int confirm;
+
+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
+ src_node = le32_to_cpu(phdr->src_node_id);
+ dst_node = le32_to_cpu(phdr->dst_node_id);
+ dst_port = le32_to_cpu(phdr->dst_port_id);
+ confirm = !!phdr->confirm_rx;
+
+ qrtr_node_assign(node, src_node);
+
+ ipc = qrtr_port_lookup(dst_port);
+ if (!ipc) {
+ kfree_skb(skb);
+ } else {
+ if (sock_queue_rcv_skb(&ipc->sk, skb))
+ kfree_skb(skb);
+
+ qrtr_port_put(ipc);
+ }
+
+ if (confirm) {
+ skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+ if (!skb)
+ break;
+ if (qrtr_node_enqueue(node, skb))
+ break;
+ }
+ }
+}
+
+/**
+ * qrtr_endpoint_register() - register a new endpoint
+ * @ep: endpoint to register
+ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment
+ * Return: 0 on success; negative error code on failure
+ *
+ * The specified endpoint must have the xmit function pointer set on call.
+ */
+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
+{
+ struct qrtr_node *node;
+
+ if (!ep || !ep->xmit)
+ return -EINVAL;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ INIT_WORK(&node->work, qrtr_node_rx_work);
+ kref_init(&node->ref);
+ mutex_init(&node->ep_lock);
+ skb_queue_head_init(&node->rx_queue);
+ node->nid = QRTR_EP_NID_AUTO;
+ node->ep = ep;
+
+ qrtr_node_assign(node, nid);
+
+ mutex_lock(&qrtr_node_lock);
+ list_add(&node->item, &qrtr_all_nodes);
+ mutex_unlock(&qrtr_node_lock);
+ ep->node = node;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
+
+/**
+ * qrtr_endpoint_unregister - unregister endpoint
+ * @ep: endpoint to unregister
+ */
+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
+{
+ struct qrtr_node *node = ep->node;
+
+ mutex_lock(&node->ep_lock);
+ node->ep = NULL;
+ mutex_unlock(&node->ep_lock);
+
+ qrtr_node_release(node);
+ ep->node = NULL;
+}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister);
+
+/* Lookup socket by port.
+ *
+ * Callers must release with qrtr_port_put()
+ */
+static struct qrtr_sock *qrtr_port_lookup(int port)
+{
+ struct qrtr_sock *ipc;
+
+ if (port == QRTR_PORT_CTRL)
+ port = 0;
+
+ mutex_lock(&qrtr_port_lock);
+ ipc = idr_find(&qrtr_ports, port);
+ if (ipc)
+ sock_hold(&ipc->sk);
+ mutex_unlock(&qrtr_port_lock);
+
+ return ipc;
+}
+
+/* Release acquired socket. */
+static void qrtr_port_put(struct qrtr_sock *ipc)
+{
+ sock_put(&ipc->sk);
+}
+
+/* Remove port assignment. */
+static void qrtr_port_remove(struct qrtr_sock *ipc)
+{
+ int port = ipc->us.sq_port;
+
+ if (port == QRTR_PORT_CTRL)
+ port = 0;
+
+ __sock_put(&ipc->sk);
+
+ mutex_lock(&qrtr_port_lock);
+ idr_remove(&qrtr_ports, port);
+ mutex_unlock(&qrtr_port_lock);
+}
+
+/* Assign port number to socket.
+ *
+ * Specify port in the integer pointed to by port, and it will be adjusted
+ * on return as necesssary.
+ *
+ * Port may be:
+ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET]
+ * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN
+ * >QRTR_MIN_EPH_SOCKET: Specified; available to all
+ */
+static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
+{
+ int rc;
+
+ mutex_lock(&qrtr_port_lock);
+ if (!*port) {
+ rc = idr_alloc(&qrtr_ports, ipc,
+ QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1,
+ GFP_ATOMIC);
+ if (rc >= 0)
+ *port = rc;
+ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
+ rc = -EACCES;
+ } else if (*port == QRTR_PORT_CTRL) {
+ rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC);
+ } else {
+ rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC);
+ if (rc >= 0)
+ *port = rc;
+ }
+ mutex_unlock(&qrtr_port_lock);
+
+ if (rc == -ENOSPC)
+ return -EADDRINUSE;
+ else if (rc < 0)
+ return rc;
+
+ sock_hold(&ipc->sk);
+
+ return 0;
+}
+
+/* Bind socket to address.
+ *
+ * Socket should be locked upon call.
+ */
+static int __qrtr_bind(struct socket *sock,
+ const struct sockaddr_qrtr *addr, int zapped)
+{
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ int port;
+ int rc;
+
+ /* rebinding ok */
+ if (!zapped && addr->sq_port == ipc->us.sq_port)
+ return 0;
+
+ port = addr->sq_port;
+ rc = qrtr_port_assign(ipc, &port);
+ if (rc)
+ return rc;
+
+ /* unbind previous, if any */
+ if (!zapped)
+ qrtr_port_remove(ipc);
+ ipc->us.sq_port = port;
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+ return 0;
+}
+
+/* Auto bind to an ephemeral port. */
+static int qrtr_autobind(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct sockaddr_qrtr addr;
+
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ return 0;
+
+ addr.sq_family = AF_QIPCRTR;
+ addr.sq_node = qrtr_local_nid;
+ addr.sq_port = 0;
+
+ return __qrtr_bind(sock, &addr, 1);
+}
+
+/* Bind socket to specified sockaddr. */
+static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ int rc;
+
+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
+ return -EINVAL;
+
+ if (addr->sq_node != ipc->us.sq_node)
+ return -EINVAL;
+
+ lock_sock(sk);
+ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED));
+ release_sock(sk);
+
+ return rc;
+}
+
+/* Queue packet to local peer socket. */
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+{
+ const struct qrtr_hdr *phdr;
+ struct qrtr_sock *ipc;
+
+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
+
+ ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+
+ if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+ qrtr_port_put(ipc);
+ kfree_skb(skb);
+ return -ENOSPC;
+ }
+
+ qrtr_port_put(ipc);
+
+ return 0;
+}
+
+/* Queue packet for broadcast. */
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+{
+ struct sk_buff *skbn;
+
+ mutex_lock(&qrtr_node_lock);
+ list_for_each_entry(node, &qrtr_all_nodes, item) {
+ skbn = skb_clone(skb, GFP_KERNEL);
+ if (!skbn)
+ break;
+ skb_set_owner_w(skbn, skb->sk);
+ qrtr_node_enqueue(node, skbn);
+ }
+ mutex_unlock(&qrtr_node_lock);
+
+ qrtr_local_enqueue(node, skb);
+
+ return 0;
+}
+
+static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct qrtr_node *node;
+ struct qrtr_hdr *hdr;
+ struct sk_buff *skb;
+ size_t plen;
+ int rc;
+
+ if (msg->msg_flags & ~(MSG_DONTWAIT))
+ return -EINVAL;
+
+ if (len > 65535)
+ return -EMSGSIZE;
+
+ lock_sock(sk);
+
+ if (addr) {
+ if (msg->msg_namelen < sizeof(*addr)) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ if (addr->sq_family != AF_QIPCRTR) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ rc = qrtr_autobind(sock);
+ if (rc) {
+ release_sock(sk);
+ return rc;
+ }
+ } else if (sk->sk_state == TCP_ESTABLISHED) {
+ addr = &ipc->peer;
+ } else {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
+
+ node = NULL;
+ if (addr->sq_node == QRTR_NODE_BCAST) {
+ enqueue_fn = qrtr_bcast_enqueue;
+ } else if (addr->sq_node == ipc->us.sq_node) {
+ enqueue_fn = qrtr_local_enqueue;
+ } else {
+ enqueue_fn = qrtr_node_enqueue;
+ node = qrtr_node_lookup(addr->sq_node);
+ if (!node) {
+ release_sock(sk);
+ return -ECONNRESET;
+ }
+ }
+
+ plen = (len + 3) & ~3;
+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+ msg->msg_flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ goto out_node;
+
+ skb_reset_transport_header(skb);
+ skb_put(skb, len + QRTR_HDR_SIZE);
+
+ hdr = (struct qrtr_hdr *)skb_transport_header(skb);
+ hdr->version = cpu_to_le32(QRTR_PROTO_VER);
+ hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
+ hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
+ hdr->confirm_rx = cpu_to_le32(0);
+ hdr->size = cpu_to_le32(len);
+ hdr->dst_node_id = cpu_to_le32(addr->sq_node);
+ hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+
+ rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
+ &msg->msg_iter, len);
+ if (rc) {
+ kfree_skb(skb);
+ goto out_node;
+ }
+
+ if (plen != len) {
+ skb_pad(skb, plen - len);
+ skb_put(skb, plen - len);
+ }
+
+ if (ipc->us.sq_port == QRTR_PORT_CTRL) {
+ if (len < 4) {
+ rc = -EINVAL;
+ kfree_skb(skb);
+ goto out_node;
+ }
+
+ /* control messages already require the type as 'command' */
+ skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
+ } else {
+ hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+ }
+
+ rc = enqueue_fn(node, skb);
+ if (rc >= 0)
+ rc = len;
+
+out_node:
+ qrtr_node_release(node);
+ release_sock(sk);
+
+ return rc;
+}
+
+static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
+{
+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
+ const struct qrtr_hdr *phdr;
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ int copied, rc;
+
+ lock_sock(sk);
+
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ release_sock(sk);
+ return -EADDRNOTAVAIL;
+ }
+
+ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+ flags & MSG_DONTWAIT, &rc);
+ if (!skb) {
+ release_sock(sk);
+ return rc;
+ }
+
+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
+ copied = le32_to_cpu(phdr->size);
+ if (copied > size) {
+ copied = size;
+ msg->msg_flags |= MSG_TRUNC;
+ }
+
+ rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
+ if (rc < 0)
+ goto out;
+ rc = copied;
+
+ if (addr) {
+ addr->sq_family = AF_QIPCRTR;
+ addr->sq_node = le32_to_cpu(phdr->src_node_id);
+ addr->sq_port = le32_to_cpu(phdr->src_port_id);
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+out:
+ skb_free_datagram(sk, skb);
+ release_sock(sk);
+
+ return rc;
+}
+
+static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
+ int len, int flags)
+{
+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ int rc;
+
+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
+ return -EINVAL;
+
+ lock_sock(sk);
+
+ sk->sk_state = TCP_CLOSE;
+ sock->state = SS_UNCONNECTED;
+
+ rc = qrtr_autobind(sock);
+ if (rc) {
+ release_sock(sk);
+ return rc;
+ }
+
+ ipc->peer = *addr;
+ sock->state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
+
+ release_sock(sk);
+
+ return 0;
+}
+
+static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
+ int *len, int peer)
+{
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sockaddr_qrtr qaddr;
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+ if (peer) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
+
+ qaddr = ipc->peer;
+ } else {
+ qaddr = ipc->us;
+ }
+ release_sock(sk);
+
+ *len = sizeof(qaddr);
+ qaddr.sq_family = AF_QIPCRTR;
+
+ memcpy(saddr, &qaddr, sizeof(qaddr));
+
+ return 0;
+}
+
+static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct sockaddr_qrtr *sq;
+ struct sk_buff *skb;
+ struct ifreq ifr;
+ long len = 0;
+ int rc = 0;
+
+ lock_sock(sk);
+
+ switch (cmd) {
+ case TIOCOUTQ:
+ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+ if (len < 0)
+ len = 0;
+ rc = put_user(len, (int __user *)argp);
+ break;
+ case TIOCINQ:
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb)
+ len = skb->len - QRTR_HDR_SIZE;
+ rc = put_user(len, (int __user *)argp);
+ break;
+ case SIOCGIFADDR:
+ if (copy_from_user(&ifr, argp, sizeof(ifr))) {
+ rc = -EFAULT;
+ break;
+ }
+
+ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
+ *sq = ipc->us;
+ if (copy_to_user(argp, &ifr, sizeof(ifr))) {
+ rc = -EFAULT;
+ break;
+ }
+ break;
+ case SIOCGSTAMP:
+ rc = sock_get_timestamp(sk, argp);
+ break;
+ case SIOCADDRT:
+ case SIOCDELRT:
+ case SIOCSIFADDR:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ rc = -EINVAL;
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+
+ release_sock(sk);
+
+ return rc;
+}
+
+static int qrtr_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct qrtr_sock *ipc;
+
+ if (!sk)
+ return 0;
+
+ lock_sock(sk);
+
+ ipc = qrtr_sk(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+
+ sock_set_flag(sk, SOCK_DEAD);
+ sock->sk = NULL;
+
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ qrtr_port_remove(ipc);
+
+ skb_queue_purge(&sk->sk_receive_queue);
+
+ release_sock(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+static const struct proto_ops qrtr_proto_ops = {
+ .owner = THIS_MODULE,
+ .family = AF_QIPCRTR,
+ .bind = qrtr_bind,
+ .connect = qrtr_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .listen = sock_no_listen,
+ .sendmsg = qrtr_sendmsg,
+ .recvmsg = qrtr_recvmsg,
+ .getname = qrtr_getname,
+ .ioctl = qrtr_ioctl,
+ .poll = datagram_poll,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .release = qrtr_release,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto qrtr_proto = {
+ .name = "QIPCRTR",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct qrtr_sock),
+};
+
+static int qrtr_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ struct qrtr_sock *ipc;
+ struct sock *sk;
+
+ if (sock->type != SOCK_DGRAM)
+ return -EPROTOTYPE;
+
+ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_set_flag(sk, SOCK_ZAPPED);
+
+ sock_init_data(sock, sk);
+ sock->ops = &qrtr_proto_ops;
+
+ ipc = qrtr_sk(sk);
+ ipc->us.sq_family = AF_QIPCRTR;
+ ipc->us.sq_node = qrtr_local_nid;
+ ipc->us.sq_port = 0;
+
+ return 0;
+}
+
+static const struct nla_policy qrtr_policy[IFA_MAX + 1] = {
+ [IFA_LOCAL] = { .type = NLA_U32 },
+};
+
+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct nlattr *tb[IFA_MAX + 1];
+ struct ifaddrmsg *ifm;
+ int rc;
+
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ASSERT_RTNL();
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+ if (!tb[IFA_LOCAL])
+ return -EINVAL;
+
+ qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]);
+ return 0;
+}
+
+static const struct net_proto_family qrtr_family = {
+ .owner = THIS_MODULE,
+ .family = AF_QIPCRTR,
+ .create = qrtr_create,
+};
+
+static int __init qrtr_proto_init(void)
+{
+ int rc;
+
+ rc = proto_register(&qrtr_proto, 1);
+ if (rc)
+ return rc;
+
+ rc = sock_register(&qrtr_family);
+ if (rc) {
+ proto_unregister(&qrtr_proto);
+ return rc;
+ }
+
+ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
+
+ return 0;
+}
+module_init(qrtr_proto_init);
+
+static void __exit qrtr_proto_fini(void)
+{
+ rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR);
+ sock_unregister(qrtr_family.family);
+ proto_unregister(&qrtr_proto);
+}
+module_exit(qrtr_proto_fini);
+
+MODULE_DESCRIPTION("Qualcomm IPC-router driver");
+MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
new file mode 100644
index 000000000000..2b848718f8fe
--- /dev/null
+++ b/net/qrtr/qrtr.h
@@ -0,0 +1,31 @@
+#ifndef __QRTR_H_
+#define __QRTR_H_
+
+#include <linux/types.h>
+
+struct sk_buff;
+
+/* endpoint node id auto assignment */
+#define QRTR_EP_NID_AUTO (-1)
+
+/**
+ * struct qrtr_endpoint - endpoint handle
+ * @xmit: Callback for outgoing packets
+ *
+ * The socket buffer passed to the xmit function becomes owned by the endpoint
+ * driver. As such, when the driver is done with the buffer, it should
+ * call kfree_skb() on failure, or consume_skb() on success.
+ */
+struct qrtr_endpoint {
+ int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb);
+ /* private: not for endpoint use */
+ struct qrtr_node *node;
+};
+
+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid);
+
+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep);
+
+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len);
+
+#endif
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
new file mode 100644
index 000000000000..84ebce73aa23
--- /dev/null
+++ b/net/qrtr/smd.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/soc/qcom/smd.h>
+
+#include "qrtr.h"
+
+struct qrtr_smd_dev {
+ struct qrtr_endpoint ep;
+ struct qcom_smd_channel *channel;
+};
+
+/* from smd to qrtr */
+static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev,
+ const void *data, size_t len)
+{
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+ int rc;
+
+ if (!qdev)
+ return -EAGAIN;
+
+ rc = qrtr_endpoint_post(&qdev->ep, data, len);
+ if (rc == -EINVAL) {
+ dev_err(&sdev->dev, "invalid ipcrouter packet\n");
+ /* return 0 to let smd drop the packet */
+ rc = 0;
+ }
+
+ return rc;
+}
+
+/* from qrtr to smd */
+static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
+{
+ struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep);
+ int rc;
+
+ rc = skb_linearize(skb);
+ if (rc)
+ goto out;
+
+ rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
+
+out:
+ if (rc)
+ kfree_skb(skb);
+ else
+ consume_skb(skb);
+ return rc;
+}
+
+static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
+{
+ struct qrtr_smd_dev *qdev;
+ int rc;
+
+ qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
+ if (!qdev)
+ return -ENOMEM;
+
+ qdev->channel = sdev->channel;
+ qdev->ep.xmit = qcom_smd_qrtr_send;
+
+ rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
+ if (rc)
+ return rc;
+
+ dev_set_drvdata(&sdev->dev, qdev);
+
+ dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
+
+ return 0;
+}
+
+static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
+{
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+
+ qrtr_endpoint_unregister(&qdev->ep);
+
+ dev_set_drvdata(&sdev->dev, NULL);
+}
+
+static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
+ { "IPCRTR" },
+ {}
+};
+
+static struct qcom_smd_driver qcom_smd_qrtr_driver = {
+ .probe = qcom_smd_qrtr_probe,
+ .remove = qcom_smd_qrtr_remove,
+ .callback = qcom_smd_qrtr_callback,
+ .smd_match_table = qcom_smd_qrtr_smd_match,
+ .driver = {
+ .name = "qcom_smd_qrtr",
+ .owner = THIS_MODULE,
+ },
+};
+
+module_qcom_smd_driver(qcom_smd_qrtr_driver);
+
+MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
+MODULE_LICENSE("GPL v2");
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 61ed2a8764ba..86187dad1440 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
/*
* This is the only path that sets tc->t_sock. Send and receive trust that
- * it is set. The RDS_CONN_CONNECTED bit protects those paths from being
+ * it is set. The RDS_CONN_UP bit protects those paths from being
* called while it isn't set.
*/
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
@@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
if (!tc)
return -ENOMEM;
+ mutex_init(&tc->t_conn_lock);
tc->t_sock = NULL;
tc->t_tinc = NULL;
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 64f873c0c6b6..41c228300525 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,10 @@ struct rds_tcp_connection {
struct list_head t_tcp_node;
struct rds_connection *conn;
+ /* t_conn_lock synchronizes the connection establishment between
+ * rds_tcp_accept_one and rds_tcp_conn_connect
+ */
+ struct mutex t_conn_lock;
struct socket *t_sock;
void *t_orig_write_space;
void *t_orig_data_ready;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5cb16875c460..49a3fcfed360 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
struct socket *sock = NULL;
struct sockaddr_in src, dest;
int ret;
+ struct rds_tcp_connection *tc = conn->c_transport_data;
+
+ mutex_lock(&tc->t_conn_lock);
+ if (rds_conn_up(conn)) {
+ mutex_unlock(&tc->t_conn_lock);
+ return 0;
+ }
ret = sock_create_kern(rds_conn_net(conn), PF_INET,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret < 0)
@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
}
out:
+ mutex_unlock(&tc->t_conn_lock);
if (sock)
sock_release(sock);
return ret;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 0936a4a32b47..be263cdf268b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_connection *conn;
int ret;
struct inet_sock *inet;
- struct rds_tcp_connection *rs_tcp;
+ struct rds_tcp_connection *rs_tcp = NULL;
+ int conn_state;
+ struct sock *nsk;
ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
sock->sk->sk_type, sock->sk->sk_protocol,
@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock)
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- if (rs_tcp->t_sock &&
- ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
- struct sock *nsk = new_sock->sk;
-
- nsk->sk_user_data = NULL;
- nsk->sk_prot->disconnect(nsk, 0);
- tcp_done(nsk);
- new_sock = NULL;
- ret = 0;
- goto out;
- } else if (rs_tcp->t_sock) {
- rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
- conn->c_outgoing = 0;
- }
-
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ mutex_lock(&rs_tcp->t_conn_lock);
+ conn_state = rds_conn_state(conn);
+ if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
+ goto rst_nsk;
+ if (rs_tcp->t_sock) {
+ /* Need to resolve a duelling SYN between peers.
+ * We have an outstanding SYN to this peer, which may
+ * potentially have transitioned to the RDS_CONN_UP state,
+ * so we must quiesce any send threads before resetting
+ * c_transport_data.
+ */
+ wait_event(conn->c_waitq,
+ !test_bit(RDS_IN_XMIT, &conn->c_flags));
+ if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
+ goto rst_nsk;
+ } else if (rs_tcp->t_sock) {
+ rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+ conn->c_outgoing = 0;
+ }
+ }
rds_tcp_set_callbacks(new_sock, conn);
- rds_connect_complete(conn);
+ rds_connect_complete(conn); /* marks RDS_CONN_UP */
+ new_sock = NULL;
+ ret = 0;
+ goto out;
+rst_nsk:
+ /* reset the newly returned accept sock and bail */
+ nsk = new_sock->sk;
+ rds_tcp_stats_inc(s_tcp_listen_closed_stale);
+ nsk->sk_user_data = NULL;
+ nsk->sk_prot->disconnect(nsk, 0);
+ tcp_done(nsk);
new_sock = NULL;
ret = 0;
-
out:
+ if (rs_tcp)
+ mutex_unlock(&rs_tcp->t_conn_lock);
if (new_sock)
sock_release(new_sock);
return ret;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 27a992154804..d75d8b56a9e3 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
if (left && tc->t_tinc_data_rem) {
- clone = skb_clone(skb, arg->gfp);
+ to_copy = min(tc->t_tinc_data_rem, left);
+
+ clone = pskb_extract(skb, offset, to_copy, arg->gfp);
if (!clone) {
desc->error = -ENOMEM;
goto out;
}
- to_copy = min(tc->t_tinc_data_rem, left);
- if (!pskb_pull(clone, offset) ||
- pskb_trim(clone, to_copy)) {
- pr_warn("rds_tcp_data_recv: pull/trim failed "
- "left %zu data_rem %zu skb_len %d\n",
- left, tc->t_tinc_data_rem, skb->len);
- kfree_skb(clone);
- desc->error = -ENOMEM;
- goto out;
- }
skb_queue_tail(&tinc->ti_skb_list, clone);
rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 01e038146b7c..6ff97412a0bb 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -698,12 +698,12 @@ void rxrpc_data_ready(struct sock *sk)
if (skb_checksum_complete(skb)) {
rxrpc_free_skb(skb);
rxrpc_put_local(local);
- UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0);
+ __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
_leave(" [CSUM failed]");
return;
}
- UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0);
+ __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
/* The socket buffer we have is owned by UDP, with UDP's data all over
* it, but we really want our own data there.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 96066665e376..336774a535c3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -657,12 +657,15 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (compat_mode) {
if (a->type == TCA_OLD_COMPAT)
err = gnet_stats_start_copy_compat(skb, 0,
- TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d);
+ TCA_STATS,
+ TCA_XSTATS,
+ &p->tcfc_lock, &d,
+ TCA_PAD);
else
return 0;
} else
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
- &p->tcfc_lock, &d);
+ &p->tcfc_lock, &d, TCA_ACT_PAD);
if (err < 0)
goto errout;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 8c9f1f0459ab..c7123e01c2ca 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -53,9 +53,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
@@ -156,7 +158,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
- if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
+ if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
+ TCA_ACT_BPF_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index c0ed93ce2391..2ba700c765e0 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -163,7 +163,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
- if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
+ TCA_CONNMARK_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d22426cdebc0..28e934ed038a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -549,7 +549,7 @@ static int tcf_csum_dump(struct sk_buff *skb,
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 887fc1f209ff..1a6e09fbb2a5 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -177,7 +177,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
- if (nla_put(skb, TCA_GACT_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c589a9ba506a..556f44c9c454 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -550,7 +550,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
- if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
if (!is_zero_ether_addr(ife->eth_dst)) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 350e134cffb3..1464f6a09446 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -275,7 +275,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
- if (nla_put(skb, TCA_IPT_TM, sizeof (tm), &tm))
+ if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
goto nla_put_failure;
kfree(t);
return skb->len;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e8a760cf7775..dea57c1ec90c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -214,7 +214,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
- if (nla_put(skb, TCA_MIRRED_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 0f65cdfbfb1d..c0a879f940de 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -267,7 +267,7 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 429c3ab65142..c6e18f230af6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -203,7 +203,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- if (nla_put(skb, TCA_PEDIT_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
goto nla_put_failure;
kfree(opt);
return skb->len;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 75b2be13fbcc..2057fd56d74c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -155,7 +155,7 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
- if (nla_put(skb, TCA_DEF_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfcdbdc00c9b..51b24998904f 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -167,7 +167,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
- if (nla_put(skb, TCA_SKBEDIT_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index bab8ae0cefc0..c1682ab9bc7e 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -175,7 +175,7 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(v->tcf_tm.expires);
- if (nla_put(skb, TCA_VLAN_TM, sizeof(t), &t))
+ if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 425fe6a0eda3..7b342c779da7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -96,9 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
+ bpf_compute_data_end(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 563cdad76448..e64877a3c084 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1140,9 +1140,10 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
gpf->kcnts[i] += pf->kcnts[i];
}
- if (nla_put(skb, TCA_U32_PCNT,
- sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- gpf)) {
+ if (nla_put_64bit(skb, TCA_U32_PCNT,
+ sizeof(struct tc_u32_pcnt) +
+ n->sel.nkeys * sizeof(u64),
+ gpf, TCA_U32_PAD)) {
kfree(gpf);
goto nla_put_failure;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 3b180ff72f79..64f71a2155f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1365,7 +1365,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d) < 0)
+ qdisc_root_sleeping_lock(q), &d,
+ TCA_PAD) < 0)
goto nla_put_failure;
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1679,7 +1680,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d) < 0)
+ qdisc_root_sleeping_lock(q), &d,
+ TCA_PAD) < 0)
goto nla_put_failure;
if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a5e420b3d4ab..bb8bd9314629 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -59,8 +59,12 @@ struct fq_codel_sched_data {
u32 flows_cnt; /* number of flows */
u32 perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
+ u32 drop_batch_size;
+ u32 memory_limit;
struct codel_params cparams;
struct codel_stats cstats;
+ u32 memory_usage;
+ u32 drop_overmemory;
u32 drop_overlimit;
u32 new_flow_count;
@@ -135,17 +139,21 @@ static inline void flow_queue_add(struct fq_codel_flow *flow,
skb->next = NULL;
}
-static unsigned int fq_codel_drop(struct Qdisc *sch)
+static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
unsigned int maxbacklog = 0, idx = 0, i, len;
struct fq_codel_flow *flow;
+ unsigned int threshold;
+ unsigned int mem = 0;
- /* Queue is full! Find the fat flow and drop packet from it.
+ /* Queue is full! Find the fat flow and drop packet(s) from it.
* This might sound expensive, but with 1024 flows, we scan
* 4KB of memory, and we dont need to handle a complex tree
* in fast path (packet queue/enqueue) with many cache misses.
+ * In stress mode, we'll try to drop 64 packets from the flow,
+ * amortizing this linear lookup to one cache line per drop.
*/
for (i = 0; i < q->flows_cnt; i++) {
if (q->backlogs[i] > maxbacklog) {
@@ -153,15 +161,26 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
idx = i;
}
}
+
+ /* Our goal is to drop half of this fat flow backlog */
+ threshold = maxbacklog >> 1;
+
flow = &q->flows[idx];
- skb = dequeue_head(flow);
- len = qdisc_pkt_len(skb);
+ len = 0;
+ i = 0;
+ do {
+ skb = dequeue_head(flow);
+ len += qdisc_pkt_len(skb);
+ mem += skb->truesize;
+ kfree_skb(skb);
+ } while (++i < max_packets && len < threshold);
+
+ flow->dropped += i;
q->backlogs[idx] -= len;
- sch->q.qlen--;
- qdisc_qstats_drop(sch);
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- flow->dropped++;
+ q->memory_usage -= mem;
+ sch->qstats.drops += i;
+ sch->qstats.backlog -= len;
+ sch->q.qlen -= i;
return idx;
}
@@ -170,16 +189,17 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
unsigned int prev_backlog;
prev_backlog = sch->qstats.backlog;
- fq_codel_drop(sch);
+ fq_codel_drop(sch, 1U);
return prev_backlog - sch->qstats.backlog;
}
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx, prev_backlog;
+ unsigned int idx, prev_backlog, prev_qlen;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
+ bool memory_limited;
idx = fq_codel_classify(skb, sch, &ret);
if (idx == 0) {
@@ -202,20 +222,29 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow->deficit = q->quantum;
flow->dropped = 0;
}
- if (++sch->q.qlen <= sch->limit)
+ q->memory_usage += skb->truesize;
+ memory_limited = q->memory_usage > q->memory_limit;
+ if (++sch->q.qlen <= sch->limit && !memory_limited)
return NET_XMIT_SUCCESS;
prev_backlog = sch->qstats.backlog;
- q->drop_overlimit++;
- /* Return Congestion Notification only if we dropped a packet
- * from this flow.
+ prev_qlen = sch->q.qlen;
+
+ /* fq_codel_drop() is quite expensive, as it performs a linear search
+ * in q->backlogs[] to find a fat flow.
+ * So instead of dropping a single packet, drop half of its backlog
+ * with a 64 packets limit to not add a too big cpu spike here.
*/
- if (fq_codel_drop(sch) == idx)
- return NET_XMIT_CN;
+ ret = fq_codel_drop(sch, q->drop_batch_size);
+
+ q->drop_overlimit += prev_qlen - sch->q.qlen;
+ if (memory_limited)
+ q->drop_overmemory += prev_qlen - sch->q.qlen;
+ /* As we dropped packet(s), better let upper stack know this */
+ qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen,
+ prev_backlog - sch->qstats.backlog);
- /* As we dropped a packet, better let upper stack know this */
- qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
- return NET_XMIT_SUCCESS;
+ return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS;
}
/* This is the specific function called from codel_dequeue()
@@ -289,6 +318,7 @@ begin:
list_del_init(&flow->flowchain);
goto begin;
}
+ q->memory_usage -= skb->truesize;
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
@@ -335,6 +365,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -386,7 +418,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_FQ_CODEL_QUANTUM])
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
- while (sch->q.qlen > sch->limit) {
+ if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
+ q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+
+ if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
+ q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+
+ while (sch->q.qlen > sch->limit ||
+ q->memory_usage > q->memory_limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb);
@@ -431,6 +470,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
sch->limit = 10*1024;
q->flows_cnt = 1024;
+ q->memory_limit = 32 << 20; /* 32 MBytes */
+ q->drop_batch_size = 64;
q->quantum = psched_mtu(qdisc_dev(sch));
q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
@@ -489,6 +530,10 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->cparams.ecn) ||
nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
+ q->drop_batch_size) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
+ q->memory_limit) ||
nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
q->flows_cnt))
goto nla_put_failure;
@@ -517,6 +562,8 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
st.qdisc_stats.new_flow_count = q->new_flow_count;
st.qdisc_stats.ce_mark = q->cstats.ce_mark;
+ st.qdisc_stats.memory_usage = q->memory_usage;
+ st.qdisc_stats.drop_overmemory = q->drop_overmemory;
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 80742edea96f..269dd71b3828 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -108,35 +108,6 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
return skb;
}
-static inline int handle_dev_cpu_collision(struct sk_buff *skb,
- struct netdev_queue *dev_queue,
- struct Qdisc *q)
-{
- int ret;
-
- if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
- /*
- * Same CPU holding the lock. It may be a transient
- * configuration error, when hard_start_xmit() recurses. We
- * detect it by checking xmit owner and drop the packet when
- * deadloop is detected. Return OK to try the next skb.
- */
- kfree_skb_list(skb);
- net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
- dev_queue->dev->name);
- ret = qdisc_qlen(q);
- } else {
- /*
- * Another cpu is holding lock, requeue & delay xmits for
- * some time.
- */
- __this_cpu_inc(softnet_data.cpu_collision);
- ret = dev_requeue_skb(skb, q);
- }
-
- return ret;
-}
-
/*
* Transmit possibly several skbs, and handle the return status as
* required. Holding the __QDISC___STATE_RUNNING bit guarantees that
@@ -174,9 +145,6 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
if (dev_xmit_complete(ret)) {
/* Driver sent out skb successfully or skb was consumed */
ret = qdisc_qlen(q);
- } else if (ret == NETDEV_TX_LOCKED) {
- /* Driver try lock failed */
- ret = handle_dev_cpu_collision(skb, txq, q);
} else {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if (unlikely(ret != NETDEV_TX_BUSY))
@@ -259,13 +227,12 @@ unsigned long dev_trans_start(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
- res = dev->trans_start;
- for (i = 0; i < dev->num_tx_queues; i++) {
+ res = netdev_get_tx_queue(dev, 0)->trans_start;
+ for (i = 1; i < dev->num_tx_queues; i++) {
val = netdev_get_tx_queue(dev, i)->trans_start;
if (val && time_after(val, res))
res = val;
}
- dev->trans_start = res;
return res;
}
@@ -288,10 +255,7 @@ static void dev_watchdog(unsigned long arg)
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
- /*
- * old device drivers set dev->trans_start
- */
- trans_start = txq->trans_start ? : dev->trans_start;
+ trans_start = txq->trans_start;
if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
@@ -807,7 +771,7 @@ void dev_activate(struct net_device *dev)
transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
if (need_watchdog) {
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
dev_watchdog_up(dev);
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 491d6fd6430c..205bed00dd34 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
sch->q.qlen++;
}
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct sk_buff *segs;
+ netdev_features_t features = netif_skb_features(skb);
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs)) {
+ qdisc_reshape_fail(skb, sch);
+ return NULL;
+ }
+ consume_skb(skb);
+ return segs;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
+ struct sk_buff *segs = NULL;
+ unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+ int nb = 0;
int count = 1;
+ int rc = NET_XMIT_SUCCESS;
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (skb_is_gso(skb)) {
+ segs = netem_segment(skb, sch);
+ if (!segs)
+ return NET_XMIT_DROP;
+ } else {
+ segs = skb;
+ }
+
+ skb = segs;
+ segs = segs->next;
+
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb)))
- return qdisc_drop(skb, sch);
+ skb_checksum_help(skb))) {
+ rc = qdisc_drop(skb, sch);
+ goto finish_segs;
+ }
skb->data[prandom_u32() % skb_headlen(skb)] ^=
1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.requeues++;
}
+finish_segs:
+ if (segs) {
+ while (segs) {
+ skb2 = segs->next;
+ segs->next = NULL;
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ last_len = segs->len;
+ rc = qdisc_enqueue(segs, sch);
+ if (rc != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(rc))
+ qdisc_qstats_drop(sch);
+ } else {
+ nb++;
+ len += last_len;
+ }
+ segs = skb2;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ }
return NET_XMIT_SUCCESS;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 958ef5f33f4b..1eb94bf18ef4 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -239,7 +239,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
offset = 0;
if ((whole > 1) || (whole && over))
- SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
+ SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
/* Create chunks for all the full sized DATA chunks. */
for (i = 0, len = first_len; i < whole; i++) {
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 00b8445364e3..a701527a9480 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
if (val != cmp) {
/* CRC failure, dump it. */
- SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
+ __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS);
return -1;
}
return 0;
@@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
- SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
+ __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
if (skb_linearize(skb))
goto discard_it;
@@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb)
*/
if (!asoc) {
if (sctp_rcv_ootb(skb)) {
- SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
+ __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
goto discard_release;
}
}
@@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb)
skb = NULL; /* sctp_chunk_free already freed the skb */
goto discard_release;
}
- SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
+ __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG);
} else {
- SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
+ __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
}
@@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
discard_it:
- SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
+ __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
@@ -532,7 +532,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
*app = asoc;
*tpp = transport;
@@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
/* Warning: The sock lock is held. Remember to call
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index b335ffcef0b9..9d87bba0ff1d 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
* Eventually, we should clean up inqueue to not rely
* on the BH related data structures.
*/
+ local_bh_disable();
list_add_tail(&chunk->list, &q->in_chunk_list);
if (chunk->asoc)
chunk->asoc->stats.ipackets++;
q->immediate.func(&q->immediate);
+ local_bh_enable();
}
/* Peek at the next chunk on the inqeue. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ce46f1c7f133..0657d18a85bf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -162,7 +162,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
goto out;
}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index bb2d8d9608e9..8e3e769dc9ea 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -145,7 +145,11 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
else
amt = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_WMEM_ALLOC] = amt;
- mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ if (asoc && asoc->ep->rcvbuf_policy)
+ amt = atomic_read(&asoc->rmem_alloc);
+ else
+ amt = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RMEM_ALLOC] = amt;
mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
@@ -161,8 +165,9 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
if (ext & (1 << (INET_DIAG_INFO - 1))) {
struct nlattr *attr;
- attr = nla_reserve(skb, INET_DIAG_INFO,
- sizeof(struct sctp_info));
+ attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
+ sizeof(struct sctp_info),
+ INET_DIAG_PAD);
if (!attr)
goto errout;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e8f0112f9b28..aa3712259368 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1741,10 +1741,9 @@ out:
} else if (local_cork)
error = sctp_outq_uncork(&asoc->outqueue, gfp);
- if (sp->pending_data_ready) {
- sk->sk_data_ready(sk);
- sp->pending_data_ready = 0;
- }
+ if (sp->data_ready_signalled)
+ sp->data_ready_signalled = 0;
+
return error;
nomem:
error = -ENOMEM;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ec12a8920e5f..ec166d2bd2d9 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -194,6 +194,7 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
{
struct sock *sk = ulpq->asoc->base.sk;
+ struct sctp_sock *sp = sctp_sk(sk);
struct sk_buff_head *queue, *skb_list;
struct sk_buff *skb = sctp_event2skb(event);
int clear_pd = 0;
@@ -211,7 +212,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
sk_incoming_cpu_update(sk);
}
/* Check if the user wishes to receive this event. */
- if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
+ if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
goto out_free;
/* If we are in partial delivery mode, post to the lobby until
@@ -219,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
* the association the cause of the partial delivery.
*/
- if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
+ if (atomic_read(&sp->pd_mode) == 0) {
queue = &sk->sk_receive_queue;
} else {
if (ulpq->pd_mode) {
@@ -231,7 +232,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
if ((event->msg_flags & MSG_NOTIFICATION) ||
(SCTP_DATA_NOT_FRAG ==
(event->msg_flags & SCTP_DATA_FRAG_MASK)))
- queue = &sctp_sk(sk)->pd_lobby;
+ queue = &sp->pd_lobby;
else {
clear_pd = event->msg_flags & MSG_EOR;
queue = &sk->sk_receive_queue;
@@ -242,10 +243,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
* can queue this to the receive queue instead
* of the lobby.
*/
- if (sctp_sk(sk)->frag_interleave)
+ if (sp->frag_interleave)
queue = &sk->sk_receive_queue;
else
- queue = &sctp_sk(sk)->pd_lobby;
+ queue = &sp->pd_lobby;
}
}
@@ -264,8 +265,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
if (clear_pd)
sctp_ulpq_clear_pd(ulpq);
- if (queue == &sk->sk_receive_queue)
- sctp_sk(sk)->pending_data_ready = 1;
+ if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
+ sp->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
return 1;
out_free:
@@ -1126,11 +1129,13 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
{
struct sctp_ulpevent *ev = NULL;
struct sock *sk;
+ struct sctp_sock *sp;
if (!ulpq->pd_mode)
return;
sk = ulpq->asoc->base.sk;
+ sp = sctp_sk(sk);
if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
&sctp_sk(sk)->subscribe))
ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
@@ -1140,6 +1145,8 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
/* If there is data waiting, send it up the socket now. */
- if (sctp_ulpq_clear_pd(ulpq) || ev)
- sctp_sk(sk)->pending_data_ready = 1;
+ if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) {
+ sp->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
}
diff --git a/net/socket.c b/net/socket.c
index 5dbb0bbe12a7..7789d79609dd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
flags |= SKBTX_SCHED_TSTAMP;
- if (tsflags & SOF_TIMESTAMPING_TX_ACK)
- flags |= SKBTX_ACK_TSTAMP;
-
*tx_flags = flags;
}
EXPORT_SYMBOL(__sock_tx_timestamp);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d0756ac5c0f2..a6c68dc086af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1018,11 +1018,11 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS);
+ __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
goto out_unlock;
}
- UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
+ __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
xprt_adjust_cwnd(xprt, task, copied);
xprt_complete_rqst(task, copied);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 2b9b98f1c2ff..b7e01d88bdc5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -305,6 +305,8 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev,
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
err, attr->id);
+ if (attr->complete)
+ attr->complete(dev, err, attr->complete_priv);
}
static int switchdev_port_attr_set_defer(struct net_device *dev,
@@ -434,6 +436,8 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
err, obj->id);
+ if (obj->complete)
+ obj->complete(dev, err, obj->complete_priv);
}
static int switchdev_port_obj_add_defer(struct net_device *dev,
@@ -502,6 +506,8 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
err, obj->id);
+ if (obj->complete)
+ obj->complete(dev, err, obj->complete_priv);
}
static int switchdev_port_obj_del_defer(struct net_device *dev,
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e2bdb07a49a2..fe1b062c4f18 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -112,11 +112,9 @@ static int __init tipc_init(void)
pr_info("Activated (version " TIPC_MOD_VER ")\n");
- sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
- TIPC_LOW_IMPORTANCE;
- sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
- TIPC_CRITICAL_IMPORTANCE;
- sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
+ sysctl_tipc_rmem[0] = RCVBUF_MIN;
+ sysctl_tipc_rmem[1] = RCVBUF_DEF;
+ sysctl_tipc_rmem[2] = RCVBUF_MAX;
err = tipc_netlink_start();
if (err)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 58bf51541813..024da8af91f0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -743,16 +743,26 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_bcast_tag(struct tipc_msg *m)
+static inline u32 msg_conn_ack(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
}
-static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
+static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline u32 msg_adv_win(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 0, 0xffff);
+}
+
+static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 9, 0, 0xffff, n);
+}
+
static inline u32 msg_max_pkt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff) * 4;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 68d9f7b8485c..d903f560e2fd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, 2012-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2012-2016, Ericsson AB
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -191,6 +191,20 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
tipc_node_put(n);
return mtu;
}
+
+u16 tipc_node_get_capabilities(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ u16 caps;
+
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return TIPC_NODE_CAPABILITIES;
+ caps = n->capabilities;
+ tipc_node_put(n);
+ return caps;
+}
+
/*
* A trivial power-of-two bitmask technique is used for speed, since this
* operation is done for every incoming TIPC packet. The number of hash table
@@ -304,8 +318,11 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
spin_lock_bh(&tn->node_list_lock);
n = tipc_node_find(net, addr);
- if (n)
+ if (n) {
+ /* Same node may come back with new capabilities */
+ n->capabilities = capabilities;
goto exit;
+ }
n = kzalloc(sizeof(*n), GFP_ATOMIC);
if (!n) {
pr_warn("Node creation failed, no memory\n");
@@ -554,6 +571,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
*slot1 = bearer_id;
tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
n->action_flags |= TIPC_NOTIFY_NODE_UP;
+ tipc_link_set_active(nl, true);
tipc_bcast_add_peer(n->net, nl, xmitq);
return;
}
@@ -1451,6 +1469,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
int bearer_id = b->identity;
struct tipc_link_entry *le;
u16 bc_ack = msg_bcast_ack(hdr);
+ u32 self = tipc_own_addr(net);
int rc = 0;
__skb_queue_head_init(&xmitq);
@@ -1467,6 +1486,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
return tipc_node_bc_rcv(net, skb, bearer_id);
}
+ /* Discard unicast link messages destined for another node */
+ if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self)))
+ goto discard;
+
/* Locate neighboring node that sent packet */
n = tipc_node_find(net, msg_prevnode(hdr));
if (unlikely(!n))
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f39d9d06e8bb..8264b3d97dc4 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,10 +45,11 @@
/* Optional capabilities supported by this code version
*/
enum {
- TIPC_BCAST_SYNCH = (1 << 1)
+ TIPC_BCAST_SYNCH = (1 << 1),
+ TIPC_BLOCK_FLOWCTL = (2 << 1)
};
-#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
@@ -70,6 +71,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3eeb50a27b89..12628890c219 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -96,8 +96,11 @@ struct tipc_sock {
uint conn_timeout;
atomic_t dupl_rcvcnt;
bool link_cong;
- uint sent_unacked;
- uint rcv_unacked;
+ u16 snt_unacked;
+ u16 snd_win;
+ u16 peer_caps;
+ u16 rcv_unacked;
+ u16 rcv_win;
struct sockaddr_tipc remote;
struct rhash_head node;
struct rcu_head rcu;
@@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
return container_of(sk, struct tipc_sock, sk);
}
-static int tsk_conn_cong(struct tipc_sock *tsk)
+static bool tsk_conn_cong(struct tipc_sock *tsk)
{
- return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN;
+ return tsk->snt_unacked >= tsk->snd_win;
+}
+
+/* tsk_blocks(): translate a buffer size in bytes to number of
+ * advertisable blocks, taking into account the ratio truesize(len)/len
+ * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
+ */
+static u16 tsk_adv_blocks(int len)
+{
+ return len / FLOWCTL_BLK_SZ / 4;
+}
+
+/* tsk_inc(): increment counter for sent or received data
+ * - If block based flow control is not supported by peer we
+ * fall back to message based ditto, incrementing the counter
+ */
+static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
+{
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return ((msglen / FLOWCTL_BLK_SZ) + 1);
+ return 1;
}
/**
@@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
- tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
+ /* Start out with safe limits until we receive an advertised window */
+ tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
+ tsk->rcv_win = tsk->snd_win;
+
if (sock->state == SS_READY) {
tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
@@ -775,7 +801,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
struct sock *sk = &tsk->sk;
struct tipc_msg *hdr = buf_msg(skb);
int mtyp = msg_type(hdr);
- int conn_cong;
+ bool conn_cong;
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr))
@@ -789,7 +815,9 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
return;
} else if (mtyp == CONN_ACK) {
conn_cong = tsk_conn_cong(tsk);
- tsk->sent_unacked -= msg_msgcnt(hdr);
+ tsk->snt_unacked -= msg_conn_ack(hdr);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ tsk->snd_win = msg_adv_win(hdr);
if (conn_cong)
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
@@ -1020,12 +1048,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
u32 dnode;
uint mtu, send, sent = 0;
struct iov_iter save;
+ int hlen = MIN_H_SIZE;
/* Handle implied connection establishment */
if (unlikely(dest)) {
rc = __tipc_sendmsg(sock, m, dsz);
+ hlen = msg_hdr_sz(mhdr);
if (dsz && (dsz == rc))
- tsk->sent_unacked = 1;
+ tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
return rc;
}
if (dsz > (uint)INT_MAX)
@@ -1054,7 +1084,7 @@ next:
if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) {
- tsk->sent_unacked++;
+ tsk->snt_unacked += tsk_inc(tsk, send + hlen);
sent += send;
if (sent == dsz)
return dsz;
@@ -1118,6 +1148,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ return;
+
+ /* Fall back to message based flow control */
+ tsk->rcv_win = FLOWCTL_MSG_WIN;
+ tsk->snd_win = FLOWCTL_MSG_WIN;
}
/**
@@ -1214,7 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
{
struct net *net = sock_net(&tsk->sk);
struct sk_buff *skb = NULL;
@@ -1230,7 +1267,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
if (!skb)
return;
msg = buf_msg(skb);
- msg_set_msgcnt(msg, ack);
+ msg_set_conn_ack(msg, tsk->rcv_unacked);
+ tsk->rcv_unacked = 0;
+
+ /* Adjust to and advertize the correct window limit */
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
+ tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
+ msg_set_adv_win(msg, tsk->rcv_win);
+ }
tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
}
@@ -1288,7 +1332,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
long timeo;
unsigned int sz;
u32 err;
- int res;
+ int res, hlen;
/* Catch invalid receive requests */
if (unlikely(!buf_len))
@@ -1313,6 +1357,7 @@ restart:
buf = skb_peek(&sk->sk_receive_queue);
msg = buf_msg(buf);
sz = msg_data_sz(msg);
+ hlen = msg_hdr_sz(msg);
err = msg_errcode(msg);
/* Discard an empty non-errored message & try again */
@@ -1335,7 +1380,7 @@ restart:
sz = buf_len;
m->msg_flags |= MSG_TRUNC;
}
- res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
+ res = skb_copy_datagram_msg(buf, hlen, m, sz);
if (res)
goto exit;
res = sz;
@@ -1347,15 +1392,15 @@ restart:
res = -ECONNRESET;
}
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if ((sock->state != SS_READY) &&
- (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_sk_send_ack(tsk, tsk->rcv_unacked);
- tsk->rcv_unacked = 0;
- }
- tsk_advance_rx_queue(sk);
+ if (unlikely(flags & MSG_PEEK))
+ goto exit;
+
+ if (likely(sock->state != SS_READY)) {
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+ if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
+ tipc_sk_send_ack(tsk);
}
+ tsk_advance_rx_queue(sk);
exit:
release_sock(sk);
return res;
@@ -1384,7 +1429,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
int sz_to_copy, target, needed;
int sz_copied = 0;
u32 err;
- int res = 0;
+ int res = 0, hlen;
/* Catch invalid receive attempts */
if (unlikely(!buf_len))
@@ -1410,6 +1455,7 @@ restart:
buf = skb_peek(&sk->sk_receive_queue);
msg = buf_msg(buf);
sz = msg_data_sz(msg);
+ hlen = msg_hdr_sz(msg);
err = msg_errcode(msg);
/* Discard an empty non-errored message & try again */
@@ -1434,8 +1480,7 @@ restart:
needed = (buf_len - sz_copied);
sz_to_copy = (sz <= needed) ? sz : needed;
- res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
- m, sz_to_copy);
+ res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
if (res)
goto exit;
@@ -1457,20 +1502,18 @@ restart:
res = -ECONNRESET;
}
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_sk_send_ack(tsk, tsk->rcv_unacked);
- tsk->rcv_unacked = 0;
- }
- tsk_advance_rx_queue(sk);
- }
+ if (unlikely(flags & MSG_PEEK))
+ goto exit;
+
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+ if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
+ tipc_sk_send_ack(tsk);
+ tsk_advance_rx_queue(sk);
/* Loop around if more data is required */
if ((sz_copied < buf_len) && /* didn't get all requested data */
(!skb_queue_empty(&sk->sk_receive_queue) ||
(sz_copied < target)) && /* and more is ready or required */
- (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
(!err)) /* and haven't reached a FIN */
goto restart;
@@ -1602,30 +1645,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
/**
* rcvbuf_limit - get proper overload limit of socket receive queue
* @sk: socket
- * @buf: message
+ * @skb: message
*
- * For all connection oriented messages, irrespective of importance,
- * the default overload value (i.e. 67MB) is set as limit.
+ * For connection oriented messages, irrespective of importance,
+ * default queue limit is 2 MB.
*
- * For all connectionless messages, by default new queue limits are
- * as belows:
+ * For connectionless messages, queue limits are based on message
+ * importance as follows:
*
- * TIPC_LOW_IMPORTANCE (4 MB)
- * TIPC_MEDIUM_IMPORTANCE (8 MB)
- * TIPC_HIGH_IMPORTANCE (16 MB)
- * TIPC_CRITICAL_IMPORTANCE (32 MB)
+ * TIPC_LOW_IMPORTANCE (2 MB)
+ * TIPC_MEDIUM_IMPORTANCE (4 MB)
+ * TIPC_HIGH_IMPORTANCE (8 MB)
+ * TIPC_CRITICAL_IMPORTANCE (16 MB)
*
* Returns overload limit according to corresponding message importance
*/
-static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
{
- struct tipc_msg *msg = buf_msg(buf);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+
+ if (unlikely(!msg_connected(hdr)))
+ return sk->sk_rcvbuf << msg_importance(hdr);
- if (msg_connected(msg))
- return sysctl_tipc_rmem[2];
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return sk->sk_rcvbuf;
- return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
- msg_importance(msg);
+ return FLOWCTL_MSG_LIM;
}
/**
@@ -1748,7 +1794,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Try backlog, compensating for double-counted bytes */
dcnt = &tipc_sk(sk)->dupl_rcvcnt;
- if (sk->sk_backlog.len)
+ if (!sk->sk_backlog.len)
atomic_set(dcnt, 0);
lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
if (likely(!sk_add_backlog(sk, skb, lim)))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 4241f22069dc..06fb5944cf76 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -1,6 +1,6 @@
/* net/tipc/socket.h: Include file for TIPC socket code
*
- * Copyright (c) 2014-2015, Ericsson AB
+ * Copyright (c) 2014-2016, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,17 @@
#include <net/sock.h>
#include <net/genetlink.h>
-#define TIPC_CONNACK_INTV 256
-#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
-#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
- SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
+/* Compatibility values for deprecated message based flow control */
+#define FLOWCTL_MSG_WIN 512
+#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE))
+
+#define FLOWCTL_BLK_SZ 1024
+
+/* Socket receive buffer sizes */
+#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512)
+#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2)
+#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16)
+
int tipc_socket_init(void);
void tipc_socket_stop(void);
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 79de588c7bd6..0dd02244e21d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -326,8 +326,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
return tipc_subscrp_cancel(s, subscriber);
}
- if (s)
- tipc_subscrp_subscribe(net, s, subscriber, swap);
+ tipc_subscrp_subscribe(net, s, subscriber, swap);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3dce53ebea92..b5f1221f48d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
else if (sk->sk_shutdown & RCV_SHUTDOWN)
err = 0;
- if (copied > 0) {
- /* We only do these additional bookkeeping/notification steps
- * if we actually copied something out of the queue pair
- * instead of just peeking ahead.
- */
-
- if (!(flags & MSG_PEEK)) {
- /* If the other side has shutdown for sending and there
- * is nothing more to read, then modify the socket
- * state.
- */
- if (vsk->peer_shutdown & SEND_SHUTDOWN) {
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = SS_UNCONNECTED;
- sock_set_flag(sk, SOCK_DONE);
- sk->sk_state_change(sk);
- }
- }
- }
+ if (copied > 0)
err = copied;
- }
out:
release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 56214736fe88..4120b7a538be 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -2051,7 +2051,7 @@ static u32 vmci_transport_get_local_cid(void)
return vmci_get_context_id();
}
-static struct vsock_transport vmci_transport = {
+static const struct vsock_transport vmci_transport = {
.init = vmci_transport_socket_init,
.destruct = vmci_transport_destruct,
.release = vmci_transport_release,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff4a91fcab9f..637387bbaaea 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
skb_dst_force(skb);
+ /* Inner headers are invalid now. */
+ skb->encapsulation = 0;
+
err = x->type->output(x, skb);
if (err == -EINPROGRESS)
goto out;